From 1b778edee853f0367ebd26da03da4a3fe0ac8a4c Mon Sep 17 00:00:00 2001 From: Alex Volkov Date: Tue, 10 Mar 2026 17:46:40 -0400 Subject: [PATCH 01/11] Annotations ahead of refactoring: bugs., looming bugs, and work notes. --- src/cc65/coptstop.c | 81 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/src/cc65/coptstop.c b/src/cc65/coptstop.c index 7a9416b51..b824dc5da 100644 --- a/src/cc65/coptstop.c +++ b/src/cc65/coptstop.c @@ -100,6 +100,15 @@ static int SameRegAValue (StackOpData* D) RegInfo* LRI = GetLastChangedRegInfo (D, &D->Lhs.A); RegInfo* RRI = GetLastChangedRegInfo (D, &D->Rhs.A); + /* ### Bug: The Rhs Reg A value used in the logic here can be from a return + ** value of the runtime call being optimized away (e.g. "jsr tosgtax"), + ** and *not* its operand. That happens when the call insn (JSR) has a label. + ** Then the RRI is that of the runtime call JSR, and the RRI->Out.RegA + ** compares here are not valid. + ** See OptStackOps(), case FoundPush: if (CE_HasLabel (E)) { ... + ** and usage in Opt_a_tosicmp(). + */ + /* RHS can have a -1 ChgIndex only if it is carried over from LHS */ if (RRI == 0 || (D->Rhs.A.ChgIndex >= 0 && @@ -124,6 +133,16 @@ static int SameRegXValue (StackOpData* D) RegInfo* LRI = GetLastChangedRegInfo (D, &D->Lhs.X); RegInfo* RRI = GetLastChangedRegInfo (D, &D->Rhs.X); + /* ### Bug: The Rhs Reg X value used in the logic here can be from a return + ** value of the runtime call being optimized away (e.g. "jsr tosgtax"), + ** and *not* its operand. That happens when the call insn (JSR) has a label. + ** Then the RRI is that of the runtime call JSR, and the RRI->Out.RegX + ** compares here are not valid. + ** See OptStackOps(), case FoundPush: if (CE_HasLabel (E)) { ... + ** RegAPreCondOk() has special armour code for this issue: + ** if (LhsHiVal != RhsHiVal) { /Cannot optimize/ break; } + */ + if (RRI == 0 || (D->Rhs.X.ChgIndex >= 0 && D->Rhs.X.ChgIndex == D->Lhs.X.ChgIndex) || @@ -338,6 +357,9 @@ static unsigned Opt___bzero (StackOpData* D) CodeLabel* L; /* Check if we're using a register variable */ + /* ### Caution: IsRegVar() has deterministic side effects over StackOpData, + ** overriding the ZP location set by PreCondOk(). + */ if (!IsRegVar (D)) { /* Store the value into the zeropage instead of pushing it */ AddStoreLhsX (D); @@ -430,6 +452,9 @@ static unsigned Opt_staspidx (StackOpData* D) CodeEntry* X; /* Check if we're using a register variable */ + /* ### Caution: IsRegVar() has deterministic side effects over StackOpData, + ** overriding the ZP location set by PreCondOk(). + */ if (!IsRegVar (D)) { /* Store the value into the zeropage instead of pushing it */ AddStoreLhsX (D); @@ -456,12 +481,20 @@ static unsigned Opt_staxspidx (StackOpData* D) const char* Arg = 0; /* Check if we're using a register variable */ + /* ### Caution: IsRegVar() has deterministic side effects over StackOpData, + ** overriding the ZP location set by PreCondOk(). + */ if (!IsRegVar (D)) { /* Store the value into the zeropage instead of pushing it */ AddStoreLhsX (D); AddStoreLhsA (D); } + /* Note: This subopt demands for REG_AX to not be used later. + ** X is unchanged by the code here, so only REG_A must not be used. + ** Note 2: Should be updated to common D->IP++ syntax. + */ + /* Inline the store */ /* sta (zp),y */ @@ -477,6 +510,10 @@ static unsigned Opt_staxspidx (StackOpData* D) } InsertEntry (D, X, D->OpIndex+2); + /* ### TODO: We could do a PHA here to save A if it is used, and remove + ** the "A not used" precondition entirely. + */ + if (RegValIsKnown (D->OpEntry->RI->In.RegX)) { /* Value of X is known */ Arg = MakeHexArg (D->OpEntry->RI->In.RegX); @@ -487,6 +524,10 @@ static unsigned Opt_staxspidx (StackOpData* D) } InsertEntry (D, X, D->OpIndex+3); + /* ### TODO: We could do a PLA here to restore A if it is used, and remove + ** the "A not used" precondition entirely. + */ + /* sta (zp),y */ X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI); InsertEntry (D, X, D->OpIndex+4); @@ -502,6 +543,9 @@ static unsigned Opt_staxspidx (StackOpData* D) } InsertEntry (D, X, D->OpIndex+5); + /* Note: We have not flagged any loads for removal. If any are unnecessary, + ** other optimizers will remove them. + */ /* Remove the push and the call to the staxspidx function */ RemoveRemainders (D); @@ -1115,6 +1159,9 @@ static unsigned Opt_a_tosbitwise (StackOpData* D, opc_t OPC) D->IP = D->OpIndex+1; /* Backup lhs if necessary */ + /* ### Note: This should be rearranged into a common form used in others: + ** if (Rhs) { } else if (Lhs) { } else { ZP }. + */ if ((D->Rhs.A.Flags & LI_DIRECT) == 0) { if ((D->Lhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) { /* Just reload lhs */ @@ -1137,6 +1184,13 @@ static unsigned Opt_a_tosbitwise (StackOpData* D, opc_t OPC) D->Rhs.A.Flags |= LI_REMOVE; } + /* ### Bug to come: there are dangerous Rhs X removal attempts here. + ** Runtime function calls can be "loads", and must be treated as a + ** single A/X unit, so removal of X load alone is not valid. + ** This can be solved with an additional "removable Rhs" test, or + ** by simply not forcing the LI_REMOVE flag and letting other optimizers + ** remove any unnecessary loads. + */ /* Do high-byte operation only when its result is used */ if ((GetRegInfo (D->Code, D->IP, REG_X) & REG_X) != 0) { /* Replace the high-byte load with 0 for EOR, or just leave it alone */ @@ -1256,6 +1310,11 @@ static unsigned Opt_a_tosicmp (StackOpData* D) RegInfo* RI; const char* Arg; + /* ### Bug to come: The Reg A value used in SameRegAValue() call here + ** can be from the wrong instruction in some cases, producing invalid + ** result. Any effects of this issue are currently suppressed because + ** other subopts eliminate this subopt's use cases. + */ if (!SameRegAValue (D)) { /* Because of SameRegAValue */ CHECK (D->Rhs.A.ChgIndex >= 0); @@ -1280,6 +1339,12 @@ static unsigned Opt_a_tosicmp (StackOpData* D) InsertEntry (D, X, D->IP++); } else { if ((D->Rhs.A.Flags & LI_RELOAD_Y) == 0) { + /* ### Bug to come: This CMP uses AM65_ZP addressing mode instead + ** of D->Rhs.A.LoadEntry->AM. It is subtle. Assembly formatter + ** will not care, but other optimizers can bug because the stated + ** mode is incorrect. Most likely, the line was copied from + ** above and addressing mode never fixed. + */ /* Cmp directly with RHS src */ X = NewCodeEntry (OP65_CMP, AM65_ZP, D->Rhs.A.LoadEntry->Arg, 0, D->OpEntry->LI); InsertEntry (D, X, D->IP++); @@ -1462,6 +1527,10 @@ static unsigned Opt_a_tosxor (StackOpData* D) +/* Note: A subopt MUST commit to every use case (precondition) that it +** advertises here. Unhandled advertised cases will result in stack corruption, +** because OptStackOps() is committed at the point of call and cannot back out. +*/ /* The first column of these two tables must be sorted in lexical order */ /* CAUTION: table must be sorted for bsearch */ @@ -1941,6 +2010,9 @@ unsigned OptStackOps (CodeSeg* S) ** Treat this as a change to all regs. */ ClearLoadInfo (&Data.Rhs); + /* ### Note: this causes the below SameRegXValue() to not + ** work correctly in some cases. + */ Data.Rhs.A.ChgIndex = I; Data.Rhs.X.ChgIndex = I; Data.Rhs.Y.ChgIndex = I; @@ -1949,6 +2021,12 @@ unsigned OptStackOps (CodeSeg* S) /* Subroutine call: Check if this is one of the functions, ** we're going to replace. */ + /* ### Note: A-only subopts are greedy here. When an A-only + ** subopt exists (by name), only its preconditions are ever + ** checked. There is no fallback to the full A/X subopts. + ** When the A-only preconditions fail, good A/X cases are + ** left unoptimized. + */ if (SameRegXValue (&Data)) { Data.OptFunc = FindFunc (FuncRegATable, FUNC_COUNT (FuncRegATable), E->Arg); IsRegAOptFunc = 1; @@ -2071,6 +2149,9 @@ unsigned OptStackOps (CodeSeg* S) ** load tracking but at least a/x has probably lost between ** pushax and here and will be tracked again when restarting. */ + /* ### Note: PreCondOk() and RegAPreCondOk() should be merged + ** into a single precondition system. + */ if (IsRegAOptFunc ? !RegAPreCondOk (&Data) : !PreCondOk (&Data)) { /* Unflag entries that can't be removed */ ResetDontRemoveEntryFlags (&Data); From 505698c45047769ac499f989948ab3d5cc795a0c Mon Sep 17 00:00:00 2001 From: Alex Volkov Date: Tue, 10 Mar 2026 19:23:40 -0400 Subject: [PATCH 02/11] Refactor: remove StackOpData side effects from IsRegVar() and PreCondOk()/RegAPreCondOk(); setting StackOpData.ZPLo/ZPHi is explicit. --- src/cc65/codeoptutil.c | 22 +++++-- src/cc65/codeoptutil.h | 5 +- src/cc65/coptstop.c | 145 ++++++++++++++++++++++++++++++----------- 3 files changed, 125 insertions(+), 47 deletions(-) diff --git a/src/cc65/codeoptutil.c b/src/cc65/codeoptutil.c index 0d3dba95c..44174c5fc 100644 --- a/src/cc65/codeoptutil.c +++ b/src/cc65/codeoptutil.c @@ -756,6 +756,10 @@ void ResetStackOpData (StackOpData* Data) Data->PushIndex = -1; Data->OpIndex = -1; + + /* Clear to prevent silent optimizer bugs */ + Data->ZPLo = 0; + Data->ZPHi = 0; } @@ -946,10 +950,9 @@ void AdjustStackOffset (StackOpData* D, unsigned Offs) -int IsRegVar (StackOpData* D) +int IsRegVar (const StackOpData* D) /* If the value pushed is that of a zeropage variable that is unchanged until Op, -** replace ZPLo and ZPHi in the given StackOpData struct by the variable and return true. -** Otherwise leave D untouched and return false. +** return true. */ { CodeEntry* LoadA = D->Lhs.A.LoadEntry; @@ -979,9 +982,7 @@ int IsRegVar (StackOpData* D) return 0; } - /* Use the zero page location directly */ - D->ZPLo = LoadA->Arg; - D->ZPHi = LoadX->Arg; + /* ZP location can be used directly */ return 1; } @@ -990,6 +991,8 @@ int IsRegVar (StackOpData* D) void AddStoreLhsA (StackOpData* D) /* Add a store to zero page after the push insn */ { + CHECK (D->ZPLo != 0); + CodeEntry* X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI); InsertEntry (D, X, D->PushIndex+1); } @@ -999,6 +1002,8 @@ void AddStoreLhsA (StackOpData* D) void AddStoreLhsX (StackOpData* D) /* Add a store to zero page after the push insn */ { + CHECK (D->ZPHi != 0); + CodeEntry* X = NewCodeEntry (OP65_STX, AM65_ZP, D->ZPHi, 0, D->PushEntry->LI); InsertEntry (D, X, D->PushIndex+1); } @@ -1071,6 +1076,8 @@ void AddOpLow (StackOpData* D, opc_t OPC, LoadInfo* LI) } else { + CHECK (D->ZPLo != 0); + /* Op with temp storage */ X = NewCodeEntry (OPC, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI); InsertEntry (D, X, D->IP++); @@ -1134,6 +1141,9 @@ void AddOpHigh (StackOpData* D, opc_t OPC, LoadInfo* LI, int KeepResult) } } else { + + CHECK (D->ZPHi != 0); + /* opc zphi */ X = NewCodeEntry (OPC, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI); InsertEntry (D, X, D->IP++); diff --git a/src/cc65/codeoptutil.h b/src/cc65/codeoptutil.h index 140b11236..cf7743605 100644 --- a/src/cc65/codeoptutil.h +++ b/src/cc65/codeoptutil.h @@ -223,10 +223,9 @@ void AdjustStackOffset (StackOpData* D, unsigned Offs); ** OpIndex is adjusted according to the insertions. */ -int IsRegVar (StackOpData* D); +int IsRegVar (const StackOpData* D); /* If the value pushed is that of a zeropage variable that is unchanged until Op, -** replace ZPLo and ZPHi in the given StackOpData struct by the variable and return true. -** Otherwise leave D untouched and return false. +** return true. */ void AddStoreLhsA (StackOpData* D); diff --git a/src/cc65/coptstop.c b/src/cc65/coptstop.c index b824dc5da..f2dba2caf 100644 --- a/src/cc65/coptstop.c +++ b/src/cc65/coptstop.c @@ -160,6 +160,58 @@ static int SameRegXValue (StackOpData* D) +static void UseLhsRegVarLoc (StackOpData* D) +/* Prepare StackOpData to use the Lhs ZP location. */ +{ + /* Trying to use a non-ZP var is fatal here. */ + CHECK (IsRegVar (D)); + + /* Use the ZP location directly from Lhs */ + D->ZPLo = D->Lhs.A.LoadEntry->Arg; + D->ZPHi = D->Lhs.X.LoadEntry->Arg; +} + + + +static int HaveUnusedTempZPLoc (const StackOpData* D) +/* Determine if an unused temp ZP location is available. */ +{ + /* We've tracked the used ZP locations; see if any are unused. + ** Note: they must be checked as LO+HI pairs individually. + ** This must match the conditions used in ChooseTempZPLoc() exactly, + ** but it is a lesser evil than predicate side effects. + */ + return (D->ZPUsage & REG_PTR1) == 0 || (D->ZPUsage & REG_SREG) == 0 || + (D->ZPUsage & REG_PTR2) == 0; +} + + + +static void ChooseTempZPLoc (StackOpData* D) +/* Determine the temp ZP location to use from ZP usage flags. +** Note: one not being available is a fatal condition here. +*/ +{ + /* Determine the zero page locations to use. We've tracked the used + ** ZP locations, so try to find some for us that are unused. + */ + if ((D->ZPUsage & REG_PTR1) == 0) { + D->ZPLo = "ptr1"; + D->ZPHi = "ptr1+1"; + } else if ((D->ZPUsage & REG_SREG) == 0) { + D->ZPLo = "sreg"; + D->ZPHi = "sreg+1"; + } else if ((D->ZPUsage & REG_PTR2) == 0) { + D->ZPLo = "ptr2"; + D->ZPHi = "ptr2+1"; + } else { + /* No registers available */ + Internal ("No temp ZP locations available. Was pre-check peformed?"); + } +} + + + /*****************************************************************************/ /* Actual optimization functions */ /*****************************************************************************/ @@ -249,6 +301,10 @@ static unsigned Opt_toseqax_tosneax (StackOpData* D, const char* BoolTransformer } else { + /* Implement the op via a temp ZP location */ + /* HaveUnusedTempZPLoc precondition must have been met */ + ChooseTempZPLoc (D); + /* Save lhs into zeropage, then compare */ AddStoreLhsX (D); AddStoreLhsA (D); @@ -313,6 +369,10 @@ static unsigned Opt_tosshift (StackOpData* D, const char* Name) } else { + /* Implement the op via a temp ZP location */ + /* HaveUnusedTempZPLoc precondition must have been met */ + ChooseTempZPLoc (D); + /* Save lhs into zeropage and reload later */ AddStoreLhsX (D); AddStoreLhsA (D); @@ -357,10 +417,13 @@ static unsigned Opt___bzero (StackOpData* D) CodeLabel* L; /* Check if we're using a register variable */ - /* ### Caution: IsRegVar() has deterministic side effects over StackOpData, - ** overriding the ZP location set by PreCondOk(). - */ - if (!IsRegVar (D)) { + if (IsRegVar (D)) { + /* Can use the Lhs ZP var directly here */ + UseLhsRegVarLoc (D); + } else { + /* HaveUnusedTempZPLoc precondition must have been met */ + ChooseTempZPLoc (D); + /* Store the value into the zeropage instead of pushing it */ AddStoreLhsX (D); AddStoreLhsA (D); @@ -452,10 +515,13 @@ static unsigned Opt_staspidx (StackOpData* D) CodeEntry* X; /* Check if we're using a register variable */ - /* ### Caution: IsRegVar() has deterministic side effects over StackOpData, - ** overriding the ZP location set by PreCondOk(). - */ - if (!IsRegVar (D)) { + if (IsRegVar (D)) { + /* Can use the Lhs ZP var directly here */ + UseLhsRegVarLoc (D); + } else { + /* HaveUnusedTempZPLoc precondition must have been met */ + ChooseTempZPLoc (D); + /* Store the value into the zeropage instead of pushing it */ AddStoreLhsX (D); AddStoreLhsA (D); @@ -481,10 +547,13 @@ static unsigned Opt_staxspidx (StackOpData* D) const char* Arg = 0; /* Check if we're using a register variable */ - /* ### Caution: IsRegVar() has deterministic side effects over StackOpData, - ** overriding the ZP location set by PreCondOk(). - */ - if (!IsRegVar (D)) { + if (IsRegVar (D)) { + /* Can use the Lhs ZP var directly here */ + UseLhsRegVarLoc (D); + } else { + /* HaveUnusedTempZPLoc precondition must have been met */ + ChooseTempZPLoc (D); + /* Store the value into the zeropage instead of pushing it */ AddStoreLhsX (D); AddStoreLhsA (D); @@ -564,6 +633,9 @@ static unsigned Opt_tosaddax (StackOpData* D) /* We need the entry behind the add */ CHECK (D->NextEntry != 0); + /* Because of HaveUnusedTempZPLoc precondition in all cases */ + ChooseTempZPLoc (D); + /* Check if the X register is known and zero when the add is done, and ** if the add is followed by ** @@ -698,6 +770,9 @@ static unsigned Opt_tosaddax (StackOpData* D) static unsigned Opt_tosandax (StackOpData* D) /* Optimize the tosandax sequence */ { + /* HaveUnusedTempZPLoc precondition must have been met */ + ChooseTempZPLoc (D); + /* Store the value into the zeropage instead of pushing it */ ReplacePushByStore (D); @@ -867,6 +942,9 @@ static unsigned Opt_tosneax (StackOpData* D) static unsigned Opt_tosorax (StackOpData* D) /* Optimize the tosorax sequence */ { + /* HaveUnusedTempZPLoc precondition must have been met */ + ChooseTempZPLoc (D); + /* Store the value into the zeropage instead of pushing it */ ReplacePushByStore (D); @@ -1114,6 +1192,8 @@ static unsigned Opt_tosxorax (StackOpData* D) { CodeEntry* X; + /* HaveUnusedTempZPLoc precondition must have been met */ + ChooseTempZPLoc (D); /* Store the value into the zeropage instead of pushing it */ ReplacePushByStore (D); @@ -1168,9 +1248,14 @@ static unsigned Opt_a_tosbitwise (StackOpData* D, opc_t OPC) X = NewCodeEntry (OPC, D->Lhs.A.LoadEntry->AM, D->Lhs.A.LoadEntry->Arg, 0, D->OpEntry->LI); InsertEntry (D, X, D->IP++); } else { + /* Implement the op via a temp ZP location */ + /* HaveUnusedTempZPLoc precondition must have been met */ + ChooseTempZPLoc (D); + /* Backup lhs */ X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI); InsertEntry (D, X, D->PushIndex+1); + /* Add code for low operand */ X = NewCodeEntry (OPC, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI); InsertEntry (D, X, D->IP++); @@ -1261,6 +1346,9 @@ static unsigned Opt_a_toscmpbool (StackOpData* D, const char* BoolTransformer) /* This shouldn't fail */ CHECK (BoolTransformer); + /* HaveUnusedTempZPLoc precondition must have been met */ + ChooseTempZPLoc (D); + /* Save lhs into zeropage */ AddStoreLhsA (D); /* AddStoreLhsA may have moved the OpIndex, recalculate insertion point to prevent label migration. */ @@ -1319,6 +1407,9 @@ static unsigned Opt_a_tosicmp (StackOpData* D) /* Because of SameRegAValue */ CHECK (D->Rhs.A.ChgIndex >= 0); + /* HaveUnusedTempZPLoc precondition must have been met */ + ChooseTempZPLoc (D); + /* Store LHS in ZP and reload it before op */ X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI); InsertEntry (D, X, D->PushIndex + 1); @@ -1729,19 +1820,8 @@ static int PreCondOk (StackOpData* D) return 0; } - /* Determine the zero page locations to use. We've tracked the used - ** ZP locations, so try to find some for us that are unused. - */ - if ((D->ZPUsage & REG_PTR1) == REG_NONE) { - D->ZPLo = "ptr1"; - D->ZPHi = "ptr1+1"; - } else if ((D->ZPUsage & REG_SREG) == REG_NONE) { - D->ZPLo = "sreg"; - D->ZPHi = "sreg+1"; - } else if ((D->ZPUsage & REG_PTR2) == REG_NONE) { - D->ZPLo = "ptr2"; - D->ZPHi = "ptr2+1"; - } else { + /* Check if an unused temp ZP location is available */ + if (!HaveUnusedTempZPLoc (D)) { /* No registers available */ return 0; } @@ -1884,19 +1964,8 @@ static int RegAPreCondOk (StackOpData* D) return 0; } - /* Determine the zero page locations to use. We've tracked the used - ** ZP locations, so try to find some for us that are unused. - */ - if ((D->ZPUsage & REG_PTR1) == REG_NONE) { - D->ZPLo = "ptr1"; - D->ZPHi = "ptr1+1"; - } else if ((D->ZPUsage & REG_SREG) == REG_NONE) { - D->ZPLo = "sreg"; - D->ZPHi = "sreg+1"; - } else if ((D->ZPUsage & REG_PTR2) == REG_NONE) { - D->ZPLo = "ptr2"; - D->ZPHi = "ptr2+1"; - } else { + /* Check if an unused temp ZP location is available */ + if (!HaveUnusedTempZPLoc (D)) { /* No registers available */ return 0; } From bf5d8c44e4432f7b7d417e0249570ce018b478dc Mon Sep 17 00:00:00 2001 From: Alex Volkov Date: Tue, 10 Mar 2026 20:08:35 -0400 Subject: [PATCH 03/11] BUGFIX: Fix the "same X value" and "same A value" evaluations issue in OptStackOps(), where the values compared come from the wrong instruction when a runtime call has an asm label. --- src/cc65/coptstop.c | 88 ++++++++++++--------------------------------- 1 file changed, 23 insertions(+), 65 deletions(-) diff --git a/src/cc65/coptstop.c b/src/cc65/coptstop.c index f2dba2caf..71ea61889 100644 --- a/src/cc65/coptstop.c +++ b/src/cc65/coptstop.c @@ -94,68 +94,34 @@ struct OptFuncDesc { -static int SameRegAValue (StackOpData* D) -/* Check if Rhs Reg A == Lhs Reg A */ +static int SameRegAValueAtOp (const StackOpData* D, CodeEntry* OpEntry) +/* Check if Rhs Reg A at OpIndex == Lhs Reg A at PushIndex */ { - RegInfo* LRI = GetLastChangedRegInfo (D, &D->Lhs.A); - RegInfo* RRI = GetLastChangedRegInfo (D, &D->Rhs.A); + CodeEntry* PushEntry; - /* ### Bug: The Rhs Reg A value used in the logic here can be from a return - ** value of the runtime call being optimized away (e.g. "jsr tosgtax"), - ** and *not* its operand. That happens when the call insn (JSR) has a label. - ** Then the RRI is that of the runtime call JSR, and the RRI->Out.RegA - ** compares here are not valid. - ** See OptStackOps(), case FoundPush: if (CE_HasLabel (E)) { ... - ** and usage in Opt_a_tosicmp(). - */ - - /* RHS can have a -1 ChgIndex only if it is carried over from LHS */ - if (RRI == 0 || - (D->Rhs.A.ChgIndex >= 0 && - D->Rhs.A.ChgIndex == D->Lhs.A.ChgIndex) || - (LRI != 0 && - RegValIsKnown (LRI->Out.RegA) && - RegValIsKnown (RRI->Out.RegA) && - (LRI->Out.RegA & 0xFF) == (RRI->Out.RegA & 0xFF))) { - - return 1; - } - - return 0; + CHECK (D->PushIndex >= 0); + PushEntry = CS_GetEntry (D->Code, D->PushIndex); + return + RegValIsKnown (PushEntry->RI->In.RegA) && + RegValIsKnown (OpEntry->RI->In.RegA) && + PushEntry->RI->In.RegA == OpEntry->RI->In.RegA; } -static int SameRegXValue (StackOpData* D) -/* Check if Rhs Reg X == Lhs Reg X */ +static int SameRegXValueAtOp (const StackOpData* D, CodeEntry* OpEntry) +/* Check if Rhs Reg X at OpIndex == Lhs Reg X at PushIndex */ { - RegInfo* LRI = GetLastChangedRegInfo (D, &D->Lhs.X); - RegInfo* RRI = GetLastChangedRegInfo (D, &D->Rhs.X); + CodeEntry* PushEntry; - /* ### Bug: The Rhs Reg X value used in the logic here can be from a return - ** value of the runtime call being optimized away (e.g. "jsr tosgtax"), - ** and *not* its operand. That happens when the call insn (JSR) has a label. - ** Then the RRI is that of the runtime call JSR, and the RRI->Out.RegX - ** compares here are not valid. - ** See OptStackOps(), case FoundPush: if (CE_HasLabel (E)) { ... - ** RegAPreCondOk() has special armour code for this issue: - ** if (LhsHiVal != RhsHiVal) { /Cannot optimize/ break; } - */ - - if (RRI == 0 || - (D->Rhs.X.ChgIndex >= 0 && - D->Rhs.X.ChgIndex == D->Lhs.X.ChgIndex) || - (LRI != 0 && - RegValIsKnown (LRI->Out.RegX) && - RegValIsKnown (RRI->Out.RegX) && - (LRI->Out.RegX & 0xFF) == (RRI->Out.RegX & 0xFF))) { - - return 1; - } - - return 0; + CHECK (D->PushIndex >= 0); + PushEntry = CS_GetEntry (D->Code, D->PushIndex); + return + RegValIsKnown (PushEntry->RI->In.RegX) && + RegValIsKnown (OpEntry->RI->In.RegX) && + PushEntry->RI->In.RegX == OpEntry->RI->In.RegX; } @@ -1251,11 +1217,11 @@ static unsigned Opt_a_tosbitwise (StackOpData* D, opc_t OPC) /* Implement the op via a temp ZP location */ /* HaveUnusedTempZPLoc precondition must have been met */ ChooseTempZPLoc (D); - + /* Backup lhs */ X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI); InsertEntry (D, X, D->PushIndex+1); - + /* Add code for low operand */ X = NewCodeEntry (OPC, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI); InsertEntry (D, X, D->IP++); @@ -1398,13 +1364,8 @@ static unsigned Opt_a_tosicmp (StackOpData* D) RegInfo* RI; const char* Arg; - /* ### Bug to come: The Reg A value used in SameRegAValue() call here - ** can be from the wrong instruction in some cases, producing invalid - ** result. Any effects of this issue are currently suppressed because - ** other subopts eliminate this subopt's use cases. - */ - if (!SameRegAValue (D)) { - /* Because of SameRegAValue */ + if (!SameRegAValueAtOp (D, D->OpEntry)) { + /* Because of SameRegAValueAtOp */ CHECK (D->Rhs.A.ChgIndex >= 0); /* HaveUnusedTempZPLoc precondition must have been met */ @@ -2079,9 +2040,6 @@ unsigned OptStackOps (CodeSeg* S) ** Treat this as a change to all regs. */ ClearLoadInfo (&Data.Rhs); - /* ### Note: this causes the below SameRegXValue() to not - ** work correctly in some cases. - */ Data.Rhs.A.ChgIndex = I; Data.Rhs.X.ChgIndex = I; Data.Rhs.Y.ChgIndex = I; @@ -2096,7 +2054,7 @@ unsigned OptStackOps (CodeSeg* S) ** When the A-only preconditions fail, good A/X cases are ** left unoptimized. */ - if (SameRegXValue (&Data)) { + if (SameRegXValueAtOp (&Data, E)) { Data.OptFunc = FindFunc (FuncRegATable, FUNC_COUNT (FuncRegATable), E->Arg); IsRegAOptFunc = 1; } From 82d672e904d4fb897b4282689cb4c28df6802b2e Mon Sep 17 00:00:00 2001 From: Alex Volkov Date: Tue, 10 Mar 2026 20:47:39 -0400 Subject: [PATCH 04/11] Refactor: rearrange Opt_a_tosbitwise() code to follow the common if-else if-else pattern form used by other subopts. --- src/cc65/coptstop.c | 46 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/src/cc65/coptstop.c b/src/cc65/coptstop.c index 71ea61889..5fa3f5dce 100644 --- a/src/cc65/coptstop.c +++ b/src/cc65/coptstop.c @@ -1204,35 +1204,33 @@ static unsigned Opt_a_tosbitwise (StackOpData* D, opc_t OPC) /* Inline the bitwise operation */ D->IP = D->OpIndex+1; - /* Backup lhs if necessary */ - /* ### Note: This should be rearranged into a common form used in others: - ** if (Rhs) { } else if (Lhs) { } else { ZP }. - */ - if ((D->Rhs.A.Flags & LI_DIRECT) == 0) { - if ((D->Lhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) { - /* Just reload lhs */ - X = NewCodeEntry (OPC, D->Lhs.A.LoadEntry->AM, D->Lhs.A.LoadEntry->Arg, 0, D->OpEntry->LI); - InsertEntry (D, X, D->IP++); - } else { - /* Implement the op via a temp ZP location */ - /* HaveUnusedTempZPLoc precondition must have been met */ - ChooseTempZPLoc (D); + if ((D->Rhs.A.Flags & LI_DIRECT) != 0) { - /* Backup lhs */ - X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI); - InsertEntry (D, X, D->PushIndex+1); - - /* Add code for low operand */ - X = NewCodeEntry (OPC, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI); - InsertEntry (D, X, D->IP++); - } - } else { - /* Add code for low operand */ + /* Add code for low operand using direct Rhs */ X = NewCodeEntry (OPC, D->Rhs.A.LoadEntry->AM, D->Rhs.A.LoadEntry->Arg, 0, D->OpEntry->LI); InsertEntry (D, X, D->IP++); - /* Rhs load entries may be removed */ + /* Rhs load entries must be removed */ D->Rhs.A.Flags |= LI_REMOVE; + + } else if ((D->Lhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) { + + /* Add code for low operand using direct Lhs */ + X = NewCodeEntry (OPC, D->Lhs.A.LoadEntry->AM, D->Lhs.A.LoadEntry->Arg, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); + + } else { + /* Implement the op via a temp ZP location */ + /* HaveUnusedTempZPLoc precondition must have been met */ + ChooseTempZPLoc (D); + + /* Backup lhs */ + X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI); + InsertEntry (D, X, D->PushIndex+1); + + /* Add code for low operand */ + X = NewCodeEntry (OPC, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI); + InsertEntry (D, X, D->IP++); } /* ### Bug to come: there are dangerous Rhs X removal attempts here. From 99ccd7e1a31ef231f885f6484b39ead7fdacb915 Mon Sep 17 00:00:00 2001 From: Alex Volkov Date: Wed, 11 Mar 2026 17:44:33 -0400 Subject: [PATCH 05/11] Refactor: Replace OptStackOps() complex (and buggy) flag-based subopt precondition evaluation system with plain language predicate functions. Any custom preconditions can now be expressed without much trouble. Also, begin establishing parity between precondition logic and actual subopt branching logic. --- src/cc65/coptstop.c | 827 ++++++++++++++++++++++++-------------------- 1 file changed, 450 insertions(+), 377 deletions(-) diff --git a/src/cc65/coptstop.c b/src/cc65/coptstop.c index 5fa3f5dce..0cbb4beec 100644 --- a/src/cc65/coptstop.c +++ b/src/cc65/coptstop.c @@ -53,37 +53,15 @@ -/* Flags for the functions */ -typedef enum { - OP_NONE = 0x00, /* Nothing special */ - OP_A_KNOWN = 0x01, /* Value of A must be known */ - OP_X_ZERO = 0x02, /* X must be zero */ - OP_LHS_LOAD = 0x04, /* Must have load insns for LHS */ - OP_LHS_SAME_BY_OP = 0x08, /* Load result of LHS must be the same if relocated to op */ - OP_LHS_LOAD_DIRECT = 0x0C, /* Must have direct load insn for LHS */ - OP_RHS_LOAD = 0x10, /* Must have load insns for RHS */ - OP_RHS_SAME_BY_OP = 0x20, /* Load result of RHS must be the same if relocated to op */ - OP_RHS_LOAD_DIRECT = 0x30, /* Must have direct load insn for RHS */ - OP_AX_INTERCHANGE = 0x40, /* Preconditions of A/X may be interchanged */ - OP_LR_INTERCHANGE = 0x80, /* Preconditions of LHS/RHS may be interchanged */ - OP_LHS_SAME_BY_PUSH = 0x0100, /* LHS must load the same content if relocated to push */ - OP_RHS_SAME_BY_PUSH = 0x0200, /* RHS must load the same content if relocated to push */ - OP_LHS_SAME_BY_RHS = 0x0400, /* LHS must load the same content if relocated to RHS */ - OP_RHS_SAME_BY_LHS = 0x0800, /* RHS must load the same content if relocated to LHS */ - OP_LHS_REMOVE = 0x1000, /* LHS must be removable or RHS may use ZP store/load */ - OP_LHS_REMOVE_DIRECT = 0x3000, /* LHS must be directly removable */ - OP_RHS_REMOVE = 0x4000, /* RHS must be removable or LHS may use ZP store/load */ - OP_RHS_REMOVE_DIRECT = 0xC000, /* RHS must be directly removable */ -} OP_FLAGS; - /* Structure that describes an optimizer subfunction for a specific op */ typedef unsigned (*OptFunc) (StackOpData* D); +typedef int (*PreCondFunc) (const StackOpData* D); + typedef struct OptFuncDesc OptFuncDesc; struct OptFuncDesc { const char* Name; /* Name of the replaced runtime function */ OptFunc Func; /* Function pointer */ - unsigned UnusedRegs; /* Regs that must not be used later */ - OP_FLAGS Flags; /* Flags */ + PreCondFunc PreCond; /* Precondition predicate pointer */ }; @@ -118,6 +96,13 @@ static int SameRegXValueAtOp (const StackOpData* D, CodeEntry* OpEntry) CHECK (D->PushIndex >= 0); PushEntry = CS_GetEntry (D->Code, D->PushIndex); + /* ### This is a temporary workaround, removable in next phases. */ + if (OpEntry == 0) { + /* Not provided; then OpIndex must be present */ + CHECK (D->OpIndex >= 0); + OpEntry = CS_GetEntry (D->Code, D->OpIndex); + } + return RegValIsKnown (PushEntry->RI->In.RegX) && RegValIsKnown (OpEntry->RI->In.RegX) && @@ -126,6 +111,108 @@ static int SameRegXValueAtOp (const StackOpData* D, CodeEntry* OpEntry) +static inline int RegIsDirectLoaded (const LoadRegInfo* LRI) +/* Check if Reg specified by LoadRegInfo is direct-loaded. */ +{ + /* Must have load insns for Reg, and must be direct */ + return (LRI->Flags & LI_LOAD_INSN) != 0 && + (LRI->Flags & LI_DIRECT) != 0; +} + + + +static inline int RegIsDirectNonStackLoaded (const LoadRegInfo* LRI) +/* Check if Reg specified by LoadRegInfo is direct-loaded without +** stack-relative Y-indexing. +*/ +{ + /* Note: this is not a precise test. abs,y addressing would also + ** require reloading Y. + */ + return (LRI->Flags & (LI_LOAD_INSN | LI_DIRECT | LI_RELOAD_Y)) == + (LI_LOAD_INSN | LI_DIRECT /* no LI_RELOAD_Y */); +} + + + +static inline int RegLoadIsRemovable (const LoadRegInfo* LRI) +/* Check if Reg specified by LoadRegInfo is marked as non-removable. */ +{ + /* Must have load entry for Reg, which is not marked */ + return LRI->LoadEntry != 0 && + (LRI->LoadEntry->Flags & CEF_DONT_REMOVE) == 0; +} + + + +static inline int LhsIsDirectLoad (const StackOpData* D) +/* Check if Lhs A/X is direct-loaded. */ +{ + return RegIsDirectLoaded (&D->Lhs.A) && RegIsDirectLoaded (&D->Lhs.X); +} + + + +static inline int RhsIsDirectLoad (const StackOpData* D) +/* Check if Rhs A/X is direct-loaded. */ +{ + return RegIsDirectLoaded (&D->Rhs.A) && RegIsDirectLoaded (&D->Rhs.X); +} + + + +static inline int LhsIsDirectNonStackLoad (const StackOpData* D) +/* Check if Lhs A/X is direct-loaded and not stack-relative. */ +{ + /* Note: this is not a precise test. abs,y addressing would also + ** require reloading Y. + */ + return RegIsDirectNonStackLoaded (&D->Lhs.A) && + RegIsDirectNonStackLoaded (&D->Lhs.X); +} + + + +static inline int RhsIsDirectNonStackLoad (const StackOpData* D) +/* Check if Rhs A/X is direct-loaded and not stack-relative. */ +{ + /* Note: this is not a precise test. abs,y addressing would also + ** require reloading Y. + */ + return RegIsDirectNonStackLoaded (&D->Rhs.A) && + RegIsDirectNonStackLoaded (&D->Rhs.X); +} + + + +static int RhsIsRemovable (const StackOpData* D) +/* Check if Rhs A/X is direct-loaded and removable (because Lhs A/X must +** be in force at operation). +*/ +{ + /* Must have direct load insns for Rhs */ + if (!RhsIsDirectLoad (D)) { + return 0; /* Fail */ + } + + /* Rhs loads must be removable */ + if (!RegLoadIsRemovable (&D->Rhs.A) || !RegLoadIsRemovable (&D->Rhs.X)) { + return 0; /* Fail */ + } + + /* Special condition: for Rhs to be removable, it must be loaded + ** exactly *once*. When multiple loads exists, only the last one would + ** be removed, and the Lhs A/X will not be in force at op. + */ + if (D->RhsMultiChg) { + return 0; /* Fail */ + } + + return 1; /* Pass */ +} + + + static void UseLhsRegVarLoc (StackOpData* D) /* Prepare StackOpData to use the Lhs ZP location. */ { @@ -148,7 +235,7 @@ static int HaveUnusedTempZPLoc (const StackOpData* D) ** but it is a lesser evil than predicate side effects. */ return (D->ZPUsage & REG_PTR1) == 0 || (D->ZPUsage & REG_SREG) == 0 || - (D->ZPUsage & REG_PTR2) == 0; + (D->ZPUsage & REG_PTR2) == 0; } @@ -178,6 +265,259 @@ static void ChooseTempZPLoc (StackOpData* D) +/*****************************************************************************/ +/* Precondition Predicates */ +/*****************************************************************************/ + + + +static int CanUseAny (const StackOpData* D) +/* Precondition: any operand will do when temp ZP location is available +** as fallback. +*/ +{ + return HaveUnusedTempZPLoc (D); +} + + + +static int CanUseDirectLhsOrTempZP (const StackOpData* D) +/* Precondition: Either Lhs is direct-loaded, or temp ZP location +** must be available. Used by subopts falling back on temp ZP with +** AddStoreLhsX(), AddStoreLhsA(), ReplacePushByStore(). +*/ +{ + return LhsIsDirectLoad (D) || HaveUnusedTempZPLoc (D); +} + + + +static int CanUseNonStackLhsOrTempZP (const StackOpData* D) +/* Precondition: Either Lhs is direct-loaded and not stack-relative, +** or temp ZP location must be available. Used by subopts falling +** back on temp ZP after testing !LI_RELOAD_Y. +*/ +{ + return LhsIsDirectNonStackLoad (D) || HaveUnusedTempZPLoc (D); +} + + + +static int CanUseNonStackLhsOrAny (const StackOpData* D) +/* Precondition: Either Lhs is direct-loaded and not stack-relative, +** or Rhs is direct-loaded and removable (because Lhs A/X must be in force +** at operation), or temp ZP location must be available. +** Used by subopts falling back on temp ZP after testing !LI_RELOAD_Y. +*/ +{ + return LhsIsDirectNonStackLoad (D) || RhsIsRemovable (D) || + HaveUnusedTempZPLoc (D); +} + + + +static int CanUseRegVarOrTempZP (const StackOpData* D) +/* Precondition: either the operand is a register (ZP) var, or a temp +** ZP location is available to store its value. +*/ +{ + return IsRegVar (D) || HaveUnusedTempZPLoc (D); +} + + + +static int CanUseRemovableRhs (const StackOpData* D) +/* Precondition: Rhs is direct-loaded and removable (because Lhs A/X must +** be in force at operation). +*/ +{ + return RhsIsRemovable (D); +} + + + +/* ### Note: This is a temporary, artificially-limited precondition reproducing +** the original (OP_RHS_REMOVE_DIRECT | OP_RHS_LOAD_DIRECT) evaluation, +** but not useful otherwise. +*/ +static int CanUseRemovableRhsWithTempZP (const StackOpData* D) +/* Precondition: Rhs is direct-loaded and removable (because Lhs A/X must +** be in force at operation), and a temp ZP location is available. +*/ +{ + return RhsIsDirectLoad (D) && RhsIsRemovable (D) && HaveUnusedTempZPLoc (D); +} + + + +static int CanUseDirectLhsOrRemovableRhs (const StackOpData* D) +/* Precondition: Either Lhs is direct-loaded, or Rhs is direct-loaded +** and removable (because Lhs A/X must be in force at operation). +*/ +{ + return LhsIsDirectLoad (D) || RhsIsRemovable (D); +} + + + +/* ### Note: This is a temporary, artificially-limited precondition reproducing +** the original (OP_LR_INTERCHANGE | OP_RHS_REMOVE_DIRECT) evaluation, +** but not useful otherwise. +*/ +static int CanUseDirectWithRemovableRhsAndTempZP (const StackOpData* D) +/* Precondition: Lhs is direct-loaded, or Rhs is direct-loaded, and +** Rhs is removable, and a temp ZP location is available. +*/ +{ + return (LhsIsDirectLoad (D) || RhsIsDirectLoad (D)) && + RhsIsRemovable (D) && HaveUnusedTempZPLoc (D); +} + + + +static int CanUseAnyRemovableOrTempZP (const StackOpData* D) +/* Precondition: Either Lhs is direct-loaded, or Rhs is direct-loaded +** and removable (because Lhs A/X must be in force at operation), or +** a temp ZP location is available to store the Lhs. +*/ +{ + return CanUseDirectLhsOrRemovableRhs (D) || HaveUnusedTempZPLoc (D); +} + + + +static int CanUseRegADirectLhsOrRemovableRhs (const StackOpData* D) +/* Precondition: Either Lhs reg A is direct-loaded, or entire Rhs is +** direct-loaded and removable (because Lhs A/X must be in force +** at operation). +*/ +{ + /* Note: the entire Rhs A/X must be removable, not just A. */ + return RegIsDirectLoaded (&D->Lhs.A) || RhsIsRemovable (D); +} + + + +static int CanUseRegANonStackLhsOrRemovableRhs (const StackOpData* D) +/* Precondition: Either Lhs reg A is direct-loaded and not stack-relative, +** or entire Rhs is direct-loaded removable (because Lhs A/X must be +** in force at operation). +*/ +{ + /* Note: the entire Rhs A/X must be removable, not just A. */ + return RegIsDirectNonStackLoaded (&D->Lhs.A) || RhsIsRemovable (D); +} + + + +static int MustHaveTempZP (const StackOpData* D) +/* Precondition: a temp ZP location must be available in all cases */ +{ + return HaveUnusedTempZPLoc (D); +} + + + +static int WithAXlt100CanUseRegVarOrTempZP (const StackOpData* D) +/* Precondition: When X is 0 and A is known at op, either the operand is a +** register (ZP) var, or a temp ZP location is available to store its value. +*/ +{ + CHECK (D->OpEntry != 0); + + return CanUseRegVarOrTempZP (D) && + /* X is 0 at op, and A is known at op */ + (D->OpEntry->RI->In.RegX == 0) && + RegValIsKnown (D->OpEntry->RI->In.RegA); +} + + + +static int WithUnusedACanUseRegVarOrTempZP (const StackOpData* D) +/* Precondition: either the operand is a register (ZP) var, or a temp ZP +** location is available to store its value. Reg A must not be used later +** (because it is changed). +*/ +{ + return (IsRegVar (D) || HaveUnusedTempZPLoc (D)) && + !RegAUsed (D->Code, D->OpIndex + 1); +} + + + +static int WithSameXMustHaveTempZP (const StackOpData* D) +/* Precondition: When Rhs X == Lhs X, any operand will do because a temp +** ZP location will be used. Used by subopts primarily targeting temp +** ZP with AddStoreLhsA() or similar. +*/ +{ + return SameRegXValueAtOp (D, 0) && HaveUnusedTempZPLoc (D); +} + + + +static int WithSameXCanUseRemovableRhs (const StackOpData* D) +/* Precondition: When Rhs X == Lhs X, Rhs must be direct-loaded and removable +** (because Lhs A/X must be in force at operation). +*/ +{ + /* Note: the entire Rhs A/X must be removable, not just A. */ + return SameRegXValueAtOp (D, 0) && RhsIsRemovable (D); +} + + + +/* ### Note: This is a temporary, artificially-limited precondition reproducing +** the original (OP_RHS_REMOVE_DIRECT | OP_RHS_LOAD_DIRECT) evaluation +** for A-only subopts, but not useful otherwise. +*/ +static int WithSameXCanUseRemovableRhsWithTempZP (const StackOpData* D) +/* Precondition: When Rhs X == Lhs X, Rhs is direct-loaded and removable, +** and a temp ZP location is available. +*/ +{ + return SameRegXValueAtOp (D, 0) && RhsIsDirectLoad (D) && + RhsIsRemovable (D) && HaveUnusedTempZPLoc (D); +} + + + +static int WithSameXCanUseNonStackLhsOrRemovableRhs (const StackOpData* D) +/* Precondition: When Rhs X == Lhs X, either Lhs reg A is direct-loaded and not +** stack-relative, or Rhs must be direct-loaded and removable (because Lhs A/X +** must be in force at operation). +*/ +{ + return SameRegXValueAtOp (D, 0) && + CanUseRegANonStackLhsOrRemovableRhs (D); +} + + + +static int WithSameXCanUseNonStackLhsOrAny (const StackOpData* D) +/* Precondition: When Rhs X == Lhs X, either Lhs reg A is direct-loaded and not +** stack-relative, or Rhs must be direct-loaded and removable (because Lhs A/X +** must be in force at operation), or a temp ZP location is available. +*/ +{ + return SameRegXValueAtOp (D, 0) && + (CanUseRegANonStackLhsOrRemovableRhs (D) || HaveUnusedTempZPLoc (D)); +} + + + +static int WithSameXCanUseAny (const StackOpData* D) +/* Precondition: When Rhs X == Lhs X, either Lhs reg A is direct-loaded, or Rhs +** is direct-loaded and removable (because Lhs A/X must be in force at +** operation), or a temp ZP location is available to store the Lhs. +*/ +{ + return SameRegXValueAtOp (D, 0) && + (CanUseRegADirectLhsOrRemovableRhs (D) || HaveUnusedTempZPLoc (D)); +} + + + /*****************************************************************************/ /* Actual optimization functions */ /*****************************************************************************/ @@ -201,8 +541,7 @@ static unsigned Opt_toseqax_tosneax (StackOpData* D, const char* BoolTransformer /* If the lhs is direct (but not stack relative), encode compares with lhs ** effectively reverting the order (which doesn't matter for ==). */ - if ((D->Lhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT && - (D->Lhs.X.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) { + if (LhsIsDirectNonStackLoad (D)) { CodeEntry* LoadX = D->Lhs.X.LoadEntry; CodeEntry* LoadA = D->Lhs.A.LoadEntry; @@ -225,9 +564,7 @@ static unsigned Opt_toseqax_tosneax (StackOpData* D, const char* BoolTransformer D->Lhs.X.Flags |= LI_REMOVE; D->Lhs.A.Flags |= LI_REMOVE; - } else if ((D->Rhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT && - (D->Rhs.X.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT && - D->RhsMultiChg == 0) { + } else if (RhsIsDirectNonStackLoad (D) && RhsIsRemovable (D)) { CodeEntry* LoadX = D->Rhs.X.LoadEntry; CodeEntry* LoadA = D->Rhs.A.LoadEntry; @@ -250,8 +587,7 @@ static unsigned Opt_toseqax_tosneax (StackOpData* D, const char* BoolTransformer D->Rhs.X.Flags |= LI_REMOVE; D->Rhs.A.Flags |= LI_REMOVE; - } else if ((D->Rhs.A.Flags & LI_DIRECT) != 0 && - (D->Rhs.X.Flags & LI_DIRECT) != 0) { + } else if (RhsIsRemovable (D)) { D->IP = D->OpIndex+1; @@ -308,8 +644,7 @@ static unsigned Opt_tosshift (StackOpData* D, const char* Name) /* If the lhs is direct (but not stack relative), we can just reload the ** data later. */ - if ((D->Lhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT && - (D->Lhs.X.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) { + if (LhsIsDirectNonStackLoad (D)) { CodeEntry* LoadX = D->Lhs.X.LoadEntry; CodeEntry* LoadA = D->Lhs.A.LoadEntry; @@ -497,6 +832,9 @@ static unsigned Opt_staspidx (StackOpData* D) X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI); InsertEntry (D, X, D->OpIndex+1); + /* Note: We have not flagged any loads for removal. If any are unnecessary, + ** other optimizers will remove them. + */ /* Remove the push and the call to the staspidx function */ RemoveRemainders (D); @@ -525,7 +863,7 @@ static unsigned Opt_staxspidx (StackOpData* D) AddStoreLhsA (D); } - /* Note: This subopt demands for REG_AX to not be used later. + /* Note: Earlier, this subopt demanded for REG_AX to not be used later. ** X is unchanged by the code here, so only REG_A must not be used. ** Note 2: Should be updated to common D->IP++ syntax. */ @@ -701,10 +1039,11 @@ static unsigned Opt_tosaddax (StackOpData* D) /* Value of first op high byte is unknown. Load from ZP or ** original storage. */ - if (D->Lhs.X.Flags & LI_DIRECT) { + if (RegIsDirectLoaded (&D->Lhs.X)) { CodeEntry* LoadX = D->Lhs.X.LoadEntry; X = NewCodeEntry (OP65_LDX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI); } else { + CHECK (D->ZPHi != 0); X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI); } } @@ -788,12 +1127,12 @@ static unsigned Opt_tosgeax (StackOpData* D) CodeEntry* X; CodeLabel* L; + /* Because of CanUseRemovableRhs */ + CHECK (RhsIsRemovable (D)); + /* Inline the sbc */ D->IP = D->OpIndex+1; - /* Must be true because of OP_RHS_LOAD */ - CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); - /* Add code for low operand */ AddOpLow (D, OP65_CMP, &D->Rhs); @@ -845,13 +1184,12 @@ static unsigned Opt_tosltax (StackOpData* D) CodeEntry* X; CodeLabel* L; + /* Because of CanUseRemovableRhs */ + CHECK (RhsIsRemovable (D)); /* Inline the compare */ D->IP = D->OpIndex+1; - /* Must be true because of OP_RHS_LOAD */ - CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); - /* Add code for low operand */ AddOpLow (D, OP65_CMP, &D->Rhs); @@ -951,6 +1289,8 @@ static unsigned Opt_tossubax (StackOpData* D) { CodeEntry* X; + /* Because of CanUseRemovableRhs */ + CHECK (RhsIsRemovable (D)); /* Inline the sbc */ D->IP = D->OpIndex+1; @@ -959,9 +1299,6 @@ static unsigned Opt_tossubax (StackOpData* D) X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI); InsertEntry (D, X, D->IP++); - /* Must be true because of OP_RHS_LOAD */ - CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); - /* Add code for low operand */ AddOpLow (D, OP65_SBC, &D->Rhs); @@ -986,13 +1323,12 @@ static unsigned Opt_tosugeax (StackOpData* D) { CodeEntry* X; + /* Because of CanUseRemovableRhs */ + CHECK (RhsIsRemovable (D)); /* Inline the sbc */ D->IP = D->OpIndex+1; - /* Must be true because of OP_RHS_LOAD */ - CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); - /* Add code for low operand */ AddOpLow (D, OP65_CMP, &D->Rhs); @@ -1029,13 +1365,12 @@ static unsigned Opt_tosugtax (StackOpData* D) { CodeEntry* X; + /* Because of CanUseRemovableRhs */ + CHECK (RhsIsRemovable (D)); /* Inline the sbc */ D->IP = D->OpIndex+1; - /* Must be true because of OP_RHS_LOAD */ - CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); - /* sec */ X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI); InsertEntry (D, X, D->IP++); @@ -1076,13 +1411,12 @@ static unsigned Opt_tosuleax (StackOpData* D) { CodeEntry* X; + /* Because of CanUseRemovableRhs */ + CHECK (RhsIsRemovable (D)); /* Inline the sbc */ D->IP = D->OpIndex+1; - /* Must be true because of OP_RHS_LOAD */ - CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); - /* sec */ X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI); InsertEntry (D, X, D->IP++); @@ -1123,13 +1457,12 @@ static unsigned Opt_tosultax (StackOpData* D) { CodeEntry* X; + /* Because of CanUseRemovableRhs */ + CHECK (RhsIsRemovable (D)); /* Inline the sbc */ D->IP = D->OpIndex+1; - /* Must be true because of OP_RHS_LOAD */ - CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); - /* Add code for low operand */ AddOpLow (D, OP65_CMP, &D->Rhs); @@ -1204,7 +1537,7 @@ static unsigned Opt_a_tosbitwise (StackOpData* D, opc_t OPC) /* Inline the bitwise operation */ D->IP = D->OpIndex+1; - if ((D->Rhs.A.Flags & LI_DIRECT) != 0) { + if (RhsIsRemovable (D)) { /* Add code for low operand using direct Rhs */ X = NewCodeEntry (OPC, D->Rhs.A.LoadEntry->AM, D->Rhs.A.LoadEntry->Arg, 0, D->OpEntry->LI); @@ -1213,7 +1546,7 @@ static unsigned Opt_a_tosbitwise (StackOpData* D, opc_t OPC) /* Rhs load entries must be removed */ D->Rhs.A.Flags |= LI_REMOVE; - } else if ((D->Lhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) { + } else if (RegIsDirectNonStackLoaded (&D->Lhs.A)) { /* Add code for low operand using direct Lhs */ X = NewCodeEntry (OPC, D->Lhs.A.LoadEntry->AM, D->Lhs.A.LoadEntry->Arg, 0, D->OpEntry->LI); @@ -1244,6 +1577,7 @@ static unsigned Opt_a_tosbitwise (StackOpData* D, opc_t OPC) if ((GetRegInfo (D->Code, D->IP, REG_X) & REG_X) != 0) { /* Replace the high-byte load with 0 for EOR, or just leave it alone */ if (OPC == OP65_EOR) { + /* Since this is a "same X" EOR, the result is always 0. */ X = NewCodeEntry (OP65_LDX, AM65_IMM, MakeHexArg (0), 0, D->Rhs.X.ChgEntry->LI); InsertEntry (D, X, D->IP++); D->Rhs.X.Flags |= LI_REMOVE; @@ -1272,9 +1606,7 @@ static unsigned Opt_a_toscmpbool (StackOpData* D, const char* BoolTransformer) D->IP = D->OpIndex + 1; - if (!D->RhsMultiChg && - (D->Rhs.A.Flags & LI_DIRECT) != 0 && - (D->Rhs.A.LoadEntry->Flags & CEF_DONT_REMOVE) == 0) { + if (RhsIsRemovable (D)) { /* cmp */ AddOpLow (D, OP65_CMP, &D->Rhs); @@ -1283,7 +1615,7 @@ static unsigned Opt_a_toscmpbool (StackOpData* D, const char* BoolTransformer) D->Rhs.X.Flags |= LI_REMOVE; D->Rhs.A.Flags |= LI_REMOVE; - } else if ((D->Lhs.A.Flags & LI_DIRECT) != 0) { + } else if (RegIsDirectLoaded (&D->Lhs.A)) { /* If the lhs is direct (but not stack relative), encode compares with lhs, ** effectively reversing the order (which doesn't matter for == and !=). */ @@ -1377,7 +1709,10 @@ static unsigned Opt_a_tosicmp (StackOpData* D) D->IP = D->OpIndex + 1; - if ((D->Rhs.A.Flags & LI_DIRECT) == 0) { + /* ### Note: This should be updated to follow the common + ** if (Non-Stack) { } else if (Direct) { } else { } pattern. + */ + if (!RegIsDirectLoaded (&D->Rhs.A)) { /* RHS src is not directly comparable */ X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI); InsertEntry (D, X, D->Rhs.A.ChgIndex + 1); @@ -1484,13 +1819,12 @@ static unsigned Opt_a_tossub (StackOpData* D) { CodeEntry* X; + /* Because of CanUseRemovableRhs */ + CHECK (RhsIsRemovable (D)); /* Inline the sbc */ D->IP = D->OpIndex+1; - /* Must be true because of OP_RHS_LOAD */ - CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); - /* sec */ X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI); InsertEntry (D, X, D->IP++); @@ -1586,47 +1920,47 @@ static unsigned Opt_a_tosxor (StackOpData* D) /* CAUTION: table must be sorted for bsearch */ static const OptFuncDesc FuncTable[] = { /* BEGIN SORTED.SH */ - { "___bzero", Opt___bzero, REG_NONE, OP_X_ZERO | OP_A_KNOWN }, - { "staspidx", Opt_staspidx, REG_NONE, OP_NONE }, - { "staxspidx", Opt_staxspidx, REG_AX, OP_NONE }, - { "tosaddax", Opt_tosaddax, REG_NONE, OP_NONE }, - { "tosandax", Opt_tosandax, REG_NONE, OP_NONE }, - { "tosaslax", Opt_tosaslax, REG_NONE, OP_NONE }, - { "tosasrax", Opt_tosasrax, REG_NONE, OP_NONE }, - { "toseqax", Opt_toseqax, REG_NONE, OP_LR_INTERCHANGE | OP_RHS_REMOVE_DIRECT }, - { "tosgeax", Opt_tosgeax, REG_NONE, OP_RHS_REMOVE_DIRECT | OP_RHS_LOAD_DIRECT }, - { "tosltax", Opt_tosltax, REG_NONE, OP_RHS_REMOVE_DIRECT | OP_RHS_LOAD_DIRECT }, - { "tosneax", Opt_tosneax, REG_NONE, OP_LR_INTERCHANGE | OP_RHS_REMOVE_DIRECT }, - { "tosorax", Opt_tosorax, REG_NONE, OP_NONE }, - { "tosshlax", Opt_tosshlax, REG_NONE, OP_NONE }, - { "tosshrax", Opt_tosshrax, REG_NONE, OP_NONE }, - { "tossubax", Opt_tossubax, REG_NONE, OP_RHS_REMOVE_DIRECT | OP_RHS_LOAD_DIRECT }, - { "tosugeax", Opt_tosugeax, REG_NONE, OP_RHS_REMOVE_DIRECT | OP_RHS_LOAD_DIRECT }, - { "tosugtax", Opt_tosugtax, REG_NONE, OP_RHS_REMOVE_DIRECT | OP_RHS_LOAD_DIRECT }, - { "tosuleax", Opt_tosuleax, REG_NONE, OP_RHS_REMOVE_DIRECT | OP_RHS_LOAD_DIRECT }, - { "tosultax", Opt_tosultax, REG_NONE, OP_RHS_REMOVE_DIRECT | OP_RHS_LOAD_DIRECT }, - { "tosxorax", Opt_tosxorax, REG_NONE, OP_NONE }, + { "___bzero", Opt___bzero, WithAXlt100CanUseRegVarOrTempZP }, + { "staspidx", Opt_staspidx, CanUseRegVarOrTempZP }, + { "staxspidx", Opt_staxspidx, WithUnusedACanUseRegVarOrTempZP }, + { "tosaddax", Opt_tosaddax, MustHaveTempZP }, + { "tosandax", Opt_tosandax, MustHaveTempZP }, + { "tosaslax", Opt_tosaslax, MustHaveTempZP }, + { "tosasrax", Opt_tosasrax, MustHaveTempZP }, + { "toseqax", Opt_toseqax, CanUseDirectWithRemovableRhsAndTempZP }, + { "tosgeax", Opt_tosgeax, CanUseRemovableRhsWithTempZP }, + { "tosltax", Opt_tosltax, CanUseRemovableRhsWithTempZP }, + { "tosneax", Opt_tosneax, CanUseDirectWithRemovableRhsAndTempZP }, + { "tosorax", Opt_tosorax, MustHaveTempZP }, + { "tosshlax", Opt_tosshlax, MustHaveTempZP }, + { "tosshrax", Opt_tosshrax, MustHaveTempZP }, + { "tossubax", Opt_tossubax, CanUseRemovableRhsWithTempZP }, + { "tosugeax", Opt_tosugeax, CanUseRemovableRhsWithTempZP }, + { "tosugtax", Opt_tosugtax, CanUseRemovableRhsWithTempZP }, + { "tosuleax", Opt_tosuleax, CanUseRemovableRhsWithTempZP }, + { "tosultax", Opt_tosultax, CanUseRemovableRhsWithTempZP }, + { "tosxorax", Opt_tosxorax, MustHaveTempZP }, /* END SORTED.SH */ }; /* CAUTION: table must be sorted for bsearch */ static const OptFuncDesc FuncRegATable[] = { /* BEGIN SORTED.SH */ - { "tosandax", Opt_a_tosand, REG_NONE, OP_RHS_REMOVE_DIRECT | OP_RHS_LOAD_DIRECT }, - { "toseqax", Opt_a_toseq, REG_NONE, OP_NONE }, - { "tosgeax", Opt_a_tosuge, REG_NONE, OP_NONE }, - { "tosgtax", Opt_a_tosugt, REG_NONE, OP_NONE }, - { "tosicmp", Opt_a_tosicmp, REG_NONE, OP_NONE }, - { "tosleax", Opt_a_tosule, REG_NONE, OP_NONE }, - { "tosltax", Opt_a_tosult, REG_NONE, OP_NONE }, - { "tosneax", Opt_a_tosne, REG_NONE, OP_NONE }, - { "tosorax", Opt_a_tosor, REG_NONE, OP_RHS_REMOVE_DIRECT | OP_RHS_LOAD_DIRECT }, - { "tossubax", Opt_a_tossub, REG_NONE, OP_RHS_REMOVE_DIRECT | OP_RHS_LOAD_DIRECT }, - { "tosugeax", Opt_a_tosuge, REG_NONE, OP_NONE }, - { "tosugtax", Opt_a_tosugt, REG_NONE, OP_NONE }, - { "tosuleax", Opt_a_tosule, REG_NONE, OP_NONE }, - { "tosultax", Opt_a_tosult, REG_NONE, OP_NONE }, - { "tosxorax", Opt_a_tosxor, REG_NONE, OP_RHS_REMOVE_DIRECT | OP_RHS_LOAD_DIRECT }, + { "tosandax", Opt_a_tosand, WithSameXCanUseRemovableRhsWithTempZP }, + { "toseqax", Opt_a_toseq, WithSameXMustHaveTempZP }, + { "tosgeax", Opt_a_tosuge, WithSameXMustHaveTempZP }, + { "tosgtax", Opt_a_tosugt, WithSameXMustHaveTempZP }, + { "tosicmp", Opt_a_tosicmp, WithSameXMustHaveTempZP }, + { "tosleax", Opt_a_tosule, WithSameXMustHaveTempZP }, + { "tosltax", Opt_a_tosult, WithSameXMustHaveTempZP }, + { "tosneax", Opt_a_tosne, WithSameXMustHaveTempZP }, + { "tosorax", Opt_a_tosor, WithSameXCanUseRemovableRhsWithTempZP }, + { "tossubax", Opt_a_tossub, WithSameXCanUseRemovableRhsWithTempZP }, + { "tosugeax", Opt_a_tosuge, WithSameXMustHaveTempZP }, + { "tosugtax", Opt_a_tosugt, WithSameXMustHaveTempZP }, + { "tosuleax", Opt_a_tosule, WithSameXMustHaveTempZP }, + { "tosultax", Opt_a_tosult, WithSameXMustHaveTempZP }, + { "tosxorax", Opt_a_tosxor, WithSameXCanUseRemovableRhsWithTempZP }, /* END SORTED.SH */ }; @@ -1654,283 +1988,23 @@ static const OptFuncDesc* FindFunc (const OptFuncDesc FuncTable[], size_t Count, static int PreCondOk (StackOpData* D) /* Check if the preconditions for a call to the optimizer subfunction are -** satisfied. As a side effect, this function will also choose the zero page -** register to use for temporary storage. +** satisfied. */ { - LoadInfo* Lhs; - LoadInfo* Rhs; - LoadRegInfo* LhsLo; - LoadRegInfo* LhsHi; - LoadRegInfo* RhsLo; - LoadRegInfo* RhsHi; - short LoVal; - short HiVal; - int I; - int Passed = 0; - - /* Check the flags */ const OptFuncDesc* Desc = D->OptFunc; - unsigned UnusedRegs = Desc->UnusedRegs; - if (UnusedRegs != REG_NONE && - (GetRegInfo (D->Code, D->OpIndex+1, UnusedRegs) & UnusedRegs) != 0) { - /* Cannot optimize */ - return 0; + + /* Common to all: must have a basic block */ + if (!CS_IsBasicBlock (D->Code, D->PushIndex, D->OpIndex)) { + return 0; /* Fail */ } - Passed = 0; - LoVal = D->OpEntry->RI->In.RegA; - HiVal = D->OpEntry->RI->In.RegX; - /* Check normally first, then interchange A/X and check again if necessary */ - for (I = (Desc->Flags & OP_AX_INTERCHANGE ? 0 : 1); !Passed && I < 2; ++I) { - - do { - if ((Desc->Flags & OP_A_KNOWN) != 0 && - RegValIsUnknown (LoVal)) { - /* Cannot optimize */ - break; - } - if ((Desc->Flags & OP_X_ZERO) != 0 && - HiVal != 0) { - /* Cannot optimize */ - break; - } - Passed = 1; - } while (0); - - /* Interchange A/X */ - LoVal = D->OpEntry->RI->In.RegX; - HiVal = D->OpEntry->RI->In.RegA; - } - if (!Passed) { - /* Cannot optimize */ - return 0; + /* Call the pre-cond predicate if one was provided. */ + if (Desc->PreCond == 0 || Desc->PreCond (D)) { + /* Preconditions passed (even if they were null). */ + return 1; } - Passed = 0; - Lhs = &D->Lhs; - Rhs = &D->Rhs; - /* Check normally first, then interchange LHS/RHS and check again if necessary */ - for (I = (Desc->Flags & OP_LR_INTERCHANGE ? 0 : 1); !Passed && I < 2; ++I) { - - do { - LhsLo = &Lhs->A; - LhsHi = &Lhs->X; - RhsLo = &Rhs->A; - RhsHi = &Rhs->X; - /* Currently we have only LHS/RHS checks with identical requirements for A/X, - ** so we don't need to check twice for now. - */ - - if ((Desc->Flags & OP_LHS_LOAD) != 0) { - if ((LhsLo->Flags & LhsHi->Flags & LI_LOAD_INSN) == 0) { - /* Cannot optimize */ - break; - } else if ((Desc->Flags & OP_LHS_LOAD_DIRECT) != 0) { - if ((LhsLo->Flags & LhsHi->Flags & LI_DIRECT) == 0) { - /* Cannot optimize */ - break; - } - } - } - if ((Desc->Flags & OP_RHS_LOAD) != 0) { - if ((RhsLo->Flags & RhsHi->Flags & LI_LOAD_INSN) == 0) { - /* Cannot optimize */ - break; - } else if ((Desc->Flags & OP_RHS_LOAD_DIRECT) != 0) { - if ((RhsLo->Flags & RhsHi->Flags & LI_DIRECT) == 0) { - /* Cannot optimize */ - break; - } - } - } - if ((Desc->Flags & OP_LHS_REMOVE) != 0) { - /* Check if the load entries cannot be removed */ - if ((LhsLo->LoadEntry != 0 && (LhsLo->LoadEntry->Flags & CEF_DONT_REMOVE) != 0) || - (LhsHi->LoadEntry != 0 && (LhsHi->LoadEntry->Flags & CEF_DONT_REMOVE) != 0)) { - if ((Desc->Flags & OP_LHS_REMOVE_DIRECT) != 0) { - /* Cannot optimize */ - break; - } - } - } - if ((Desc->Flags & OP_RHS_REMOVE) != 0) { - if ((RhsLo->LoadEntry != 0 && (RhsLo->LoadEntry->Flags & CEF_DONT_REMOVE) != 0) || - (RhsHi->LoadEntry != 0 && (RhsHi->LoadEntry->Flags & CEF_DONT_REMOVE) != 0)) { - if ((Desc->Flags & OP_RHS_REMOVE_DIRECT) != 0) { - /* Cannot optimize */ - break; - } - } - } - if (D->RhsMultiChg && (Desc->Flags & OP_RHS_REMOVE_DIRECT) != 0) { - /* Cannot optimize */ - break; - } - Passed = 1; - } while (0); - - /* Interchange LHS/RHS for next round */ - Lhs = &D->Rhs; - Rhs = &D->Lhs; - } - if (!Passed) { - /* Cannot optimize */ - return 0; - } - - /* Check if an unused temp ZP location is available */ - if (!HaveUnusedTempZPLoc (D)) { - /* No registers available */ - return 0; - } - - /* Determine if we have a basic block */ - return CS_IsBasicBlock (D->Code, D->PushIndex, D->OpIndex); -} - - - -static int RegAPreCondOk (StackOpData* D) -/* Check if the preconditions for a call to the RegA-only optimizer subfunction -** are satisfied. As a side effect, this function will also choose the zero page -** register to use for temporary storage. -*/ -{ - LoadInfo* Lhs; - LoadInfo* Rhs; - LoadRegInfo* LhsLo; - LoadRegInfo* RhsLo; - short LhsLoVal, LhsHiVal; - short RhsLoVal, RhsHiVal; - int I; - int Passed = 0; - - /* Check the flags */ - const OptFuncDesc* Desc = D->OptFunc; - unsigned UnusedRegs = Desc->UnusedRegs; - if (UnusedRegs != REG_NONE && - (GetRegInfo (D->Code, D->OpIndex+1, UnusedRegs) & UnusedRegs) != 0) { - /* Cannot optimize */ - return 0; - } - - Passed = 0; - LhsLoVal = D->PushEntry->RI->In.RegA; - LhsHiVal = D->PushEntry->RI->In.RegX; - RhsLoVal = D->OpEntry->RI->In.RegA; - RhsHiVal = D->OpEntry->RI->In.RegX; - /* Check normally first, then interchange A/X and check again if necessary */ - for (I = (Desc->Flags & OP_AX_INTERCHANGE ? 0 : 1); !Passed && I < 2; ++I) { - - do { - if (LhsHiVal != RhsHiVal) { - /* Cannot optimize */ - break; - } - if ((Desc->Flags & OP_A_KNOWN) != 0 && - RegValIsUnknown (LhsLoVal)) { - /* Cannot optimize */ - break; - } - if ((Desc->Flags & OP_X_ZERO) != 0 && - LhsHiVal != 0) { - /* Cannot optimize */ - break; - } - Passed = 1; - } while (0); - - /* Suppress warning about unused assignment in GCC */ - (void)RhsLoVal; - - /* Interchange A/X */ - LhsLoVal = D->PushEntry->RI->In.RegX; - LhsHiVal = D->PushEntry->RI->In.RegA; - RhsLoVal = D->OpEntry->RI->In.RegX; - RhsHiVal = D->OpEntry->RI->In.RegA; - } - if (!Passed) { - /* Cannot optimize */ - return 0; - } - - Passed = 0; - Lhs = &D->Lhs; - Rhs = &D->Rhs; - /* Check normally first, then interchange LHS/RHS and check again if necessary */ - for (I = (Desc->Flags & OP_LR_INTERCHANGE ? 0 : 1); !Passed && I < 2; ++I) { - - do { - LhsLo = &Lhs->A; - RhsLo = &Rhs->A; - /* Currently we have only LHS/RHS checks with identical requirements for A/X, - ** so we don't need to check twice for now. - */ - - if ((Desc->Flags & OP_LHS_LOAD) != 0) { - if ((LhsLo->Flags & LI_LOAD_INSN) == 0) { - /* Cannot optimize */ - break; - } else if ((Desc->Flags & OP_LHS_LOAD_DIRECT) != 0) { - if ((LhsLo->Flags & LI_DIRECT) == 0) { - /* Cannot optimize */ - break; - } - } - } - if ((Desc->Flags & OP_RHS_LOAD) != 0) { - if ((RhsLo->Flags & LI_LOAD_INSN) == 0) { - /* Cannot optimize */ - break; - } else if ((Desc->Flags & OP_RHS_LOAD_DIRECT) != 0) { - if ((RhsLo->Flags & LI_DIRECT) == 0) { - /* Cannot optimize */ - break; - } - } - } - if ((Desc->Flags & OP_LHS_REMOVE) != 0) { - /* Check if the load entries cannot be removed */ - if ((LhsLo->LoadEntry != 0 && (LhsLo->LoadEntry->Flags & CEF_DONT_REMOVE) != 0)) { - if ((Desc->Flags & OP_LHS_REMOVE_DIRECT) != 0) { - /* Cannot optimize */ - break; - } - } - } - if ((Desc->Flags & OP_RHS_REMOVE) != 0) { - if ((RhsLo->LoadEntry != 0 && (RhsLo->LoadEntry->Flags & CEF_DONT_REMOVE) != 0)) { - if ((Desc->Flags & OP_RHS_REMOVE_DIRECT) != 0) { - /* Cannot optimize */ - break; - } - } - } - if (D->RhsMultiChg && (Desc->Flags & OP_RHS_REMOVE_DIRECT) != 0) { - /* Cannot optimize */ - break; - } - Passed = 1; - } while (0); - - /* Interchange LHS/RHS for next round */ - Lhs = &D->Rhs; - Rhs = &D->Lhs; - } - if (!Passed) { - /* Cannot optimize */ - return 0; - } - - /* Check if an unused temp ZP location is available */ - if (!HaveUnusedTempZPLoc (D)) { - /* No registers available */ - return 0; - } - - /* Determine if we have a basic block */ - return CS_IsBasicBlock (D->Code, D->PushIndex, D->OpIndex); + return 0; /* Preconditions failed */ } @@ -2051,6 +2125,8 @@ unsigned OptStackOps (CodeSeg* S) ** checked. There is no fallback to the full A/X subopts. ** When the A-only preconditions fail, good A/X cases are ** left unoptimized. + ** The FuncTables should be merged into a single precondition + ** system. */ if (SameRegXValueAtOp (&Data, E)) { Data.OptFunc = FindFunc (FuncRegATable, FUNC_COUNT (FuncRegATable), E->Arg); @@ -2174,10 +2250,7 @@ unsigned OptStackOps (CodeSeg* S) ** load tracking but at least a/x has probably lost between ** pushax and here and will be tracked again when restarting. */ - /* ### Note: PreCondOk() and RegAPreCondOk() should be merged - ** into a single precondition system. - */ - if (IsRegAOptFunc ? !RegAPreCondOk (&Data) : !PreCondOk (&Data)) { + if (!PreCondOk (&Data)) { /* Unflag entries that can't be removed */ ResetDontRemoveEntryFlags (&Data); I = Data.PushIndex; From 73f414955469b18f02b356221bca86dc27b994b5 Mon Sep 17 00:00:00 2001 From: Alex Volkov Date: Wed, 11 Mar 2026 19:09:34 -0400 Subject: [PATCH 06/11] Refactor: remove the now unused flag variable in OptStackOps() --- src/cc65/coptstop.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/cc65/coptstop.c b/src/cc65/coptstop.c index 0cbb4beec..53cd2681a 100644 --- a/src/cc65/coptstop.c +++ b/src/cc65/coptstop.c @@ -2026,8 +2026,7 @@ unsigned OptStackOps (CodeSeg* S) unsigned PushedRegs = 0; /* Track if the same regs are used after the push */ int RhsAChgIndex; /* Track if rhs is changed more than once */ int RhsXChgIndex; /* Track if rhs is changed more than once */ - int IsRegAOptFunc = 0; /* Whether to use the RegA-only optimizations */ - + enum { Initialize, Search, @@ -2130,11 +2129,9 @@ unsigned OptStackOps (CodeSeg* S) */ if (SameRegXValueAtOp (&Data, E)) { Data.OptFunc = FindFunc (FuncRegATable, FUNC_COUNT (FuncRegATable), E->Arg); - IsRegAOptFunc = 1; } if (Data.OptFunc == 0) { Data.OptFunc = FindFunc (FuncTable, FUNC_COUNT (FuncTable), E->Arg); - IsRegAOptFunc = 0; } if (Data.OptFunc) { /* Disallow removing Rhs loads if the registers are used */ From ca98b218e87b267e012bd121aa7742cdf87051bf Mon Sep 17 00:00:00 2001 From: Alex Volkov Date: Wed, 11 Mar 2026 19:17:50 -0400 Subject: [PATCH 07/11] Remove the unused (for now) predicate functions in coptstop.c to keep the compiler happy. --- src/cc65/coptstop.c | 147 -------------------------------------------- 1 file changed, 147 deletions(-) diff --git a/src/cc65/coptstop.c b/src/cc65/coptstop.c index 53cd2681a..0db8e6a16 100644 --- a/src/cc65/coptstop.c +++ b/src/cc65/coptstop.c @@ -271,51 +271,6 @@ static void ChooseTempZPLoc (StackOpData* D) -static int CanUseAny (const StackOpData* D) -/* Precondition: any operand will do when temp ZP location is available -** as fallback. -*/ -{ - return HaveUnusedTempZPLoc (D); -} - - - -static int CanUseDirectLhsOrTempZP (const StackOpData* D) -/* Precondition: Either Lhs is direct-loaded, or temp ZP location -** must be available. Used by subopts falling back on temp ZP with -** AddStoreLhsX(), AddStoreLhsA(), ReplacePushByStore(). -*/ -{ - return LhsIsDirectLoad (D) || HaveUnusedTempZPLoc (D); -} - - - -static int CanUseNonStackLhsOrTempZP (const StackOpData* D) -/* Precondition: Either Lhs is direct-loaded and not stack-relative, -** or temp ZP location must be available. Used by subopts falling -** back on temp ZP after testing !LI_RELOAD_Y. -*/ -{ - return LhsIsDirectNonStackLoad (D) || HaveUnusedTempZPLoc (D); -} - - - -static int CanUseNonStackLhsOrAny (const StackOpData* D) -/* Precondition: Either Lhs is direct-loaded and not stack-relative, -** or Rhs is direct-loaded and removable (because Lhs A/X must be in force -** at operation), or temp ZP location must be available. -** Used by subopts falling back on temp ZP after testing !LI_RELOAD_Y. -*/ -{ - return LhsIsDirectNonStackLoad (D) || RhsIsRemovable (D) || - HaveUnusedTempZPLoc (D); -} - - - static int CanUseRegVarOrTempZP (const StackOpData* D) /* Precondition: either the operand is a register (ZP) var, or a temp ** ZP location is available to store its value. @@ -326,16 +281,6 @@ static int CanUseRegVarOrTempZP (const StackOpData* D) -static int CanUseRemovableRhs (const StackOpData* D) -/* Precondition: Rhs is direct-loaded and removable (because Lhs A/X must -** be in force at operation). -*/ -{ - return RhsIsRemovable (D); -} - - - /* ### Note: This is a temporary, artificially-limited precondition reproducing ** the original (OP_RHS_REMOVE_DIRECT | OP_RHS_LOAD_DIRECT) evaluation, ** but not useful otherwise. @@ -350,16 +295,6 @@ static int CanUseRemovableRhsWithTempZP (const StackOpData* D) -static int CanUseDirectLhsOrRemovableRhs (const StackOpData* D) -/* Precondition: Either Lhs is direct-loaded, or Rhs is direct-loaded -** and removable (because Lhs A/X must be in force at operation). -*/ -{ - return LhsIsDirectLoad (D) || RhsIsRemovable (D); -} - - - /* ### Note: This is a temporary, artificially-limited precondition reproducing ** the original (OP_LR_INTERCHANGE | OP_RHS_REMOVE_DIRECT) evaluation, ** but not useful otherwise. @@ -375,41 +310,6 @@ static int CanUseDirectWithRemovableRhsAndTempZP (const StackOpData* D) -static int CanUseAnyRemovableOrTempZP (const StackOpData* D) -/* Precondition: Either Lhs is direct-loaded, or Rhs is direct-loaded -** and removable (because Lhs A/X must be in force at operation), or -** a temp ZP location is available to store the Lhs. -*/ -{ - return CanUseDirectLhsOrRemovableRhs (D) || HaveUnusedTempZPLoc (D); -} - - - -static int CanUseRegADirectLhsOrRemovableRhs (const StackOpData* D) -/* Precondition: Either Lhs reg A is direct-loaded, or entire Rhs is -** direct-loaded and removable (because Lhs A/X must be in force -** at operation). -*/ -{ - /* Note: the entire Rhs A/X must be removable, not just A. */ - return RegIsDirectLoaded (&D->Lhs.A) || RhsIsRemovable (D); -} - - - -static int CanUseRegANonStackLhsOrRemovableRhs (const StackOpData* D) -/* Precondition: Either Lhs reg A is direct-loaded and not stack-relative, -** or entire Rhs is direct-loaded removable (because Lhs A/X must be -** in force at operation). -*/ -{ - /* Note: the entire Rhs A/X must be removable, not just A. */ - return RegIsDirectNonStackLoaded (&D->Lhs.A) || RhsIsRemovable (D); -} - - - static int MustHaveTempZP (const StackOpData* D) /* Precondition: a temp ZP location must be available in all cases */ { @@ -456,17 +356,6 @@ static int WithSameXMustHaveTempZP (const StackOpData* D) -static int WithSameXCanUseRemovableRhs (const StackOpData* D) -/* Precondition: When Rhs X == Lhs X, Rhs must be direct-loaded and removable -** (because Lhs A/X must be in force at operation). -*/ -{ - /* Note: the entire Rhs A/X must be removable, not just A. */ - return SameRegXValueAtOp (D, 0) && RhsIsRemovable (D); -} - - - /* ### Note: This is a temporary, artificially-limited precondition reproducing ** the original (OP_RHS_REMOVE_DIRECT | OP_RHS_LOAD_DIRECT) evaluation ** for A-only subopts, but not useful otherwise. @@ -482,42 +371,6 @@ static int WithSameXCanUseRemovableRhsWithTempZP (const StackOpData* D) -static int WithSameXCanUseNonStackLhsOrRemovableRhs (const StackOpData* D) -/* Precondition: When Rhs X == Lhs X, either Lhs reg A is direct-loaded and not -** stack-relative, or Rhs must be direct-loaded and removable (because Lhs A/X -** must be in force at operation). -*/ -{ - return SameRegXValueAtOp (D, 0) && - CanUseRegANonStackLhsOrRemovableRhs (D); -} - - - -static int WithSameXCanUseNonStackLhsOrAny (const StackOpData* D) -/* Precondition: When Rhs X == Lhs X, either Lhs reg A is direct-loaded and not -** stack-relative, or Rhs must be direct-loaded and removable (because Lhs A/X -** must be in force at operation), or a temp ZP location is available. -*/ -{ - return SameRegXValueAtOp (D, 0) && - (CanUseRegANonStackLhsOrRemovableRhs (D) || HaveUnusedTempZPLoc (D)); -} - - - -static int WithSameXCanUseAny (const StackOpData* D) -/* Precondition: When Rhs X == Lhs X, either Lhs reg A is direct-loaded, or Rhs -** is direct-loaded and removable (because Lhs A/X must be in force at -** operation), or a temp ZP location is available to store the Lhs. -*/ -{ - return SameRegXValueAtOp (D, 0) && - (CanUseRegADirectLhsOrRemovableRhs (D) || HaveUnusedTempZPLoc (D)); -} - - - /*****************************************************************************/ /* Actual optimization functions */ /*****************************************************************************/ From 57cc683d91e60ff3cdacbdd4f0666d385ccb19e3 Mon Sep 17 00:00:00 2001 From: Alex Volkov Date: Wed, 11 Mar 2026 19:33:36 -0400 Subject: [PATCH 08/11] Whitespace fix (left after var removal) --- src/cc65/coptstop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cc65/coptstop.c b/src/cc65/coptstop.c index 0db8e6a16..07ad6cb71 100644 --- a/src/cc65/coptstop.c +++ b/src/cc65/coptstop.c @@ -1879,7 +1879,7 @@ unsigned OptStackOps (CodeSeg* S) unsigned PushedRegs = 0; /* Track if the same regs are used after the push */ int RhsAChgIndex; /* Track if rhs is changed more than once */ int RhsXChgIndex; /* Track if rhs is changed more than once */ - + enum { Initialize, Search, From 2fd9411f7f184ff2b88038b6502a86ce4cb9d273 Mon Sep 17 00:00:00 2001 From: Alex Volkov Date: Fri, 20 Mar 2026 19:51:04 -0400 Subject: [PATCH 09/11] Expand bug2395 tests to other operators and conditions --- test/val/bug2395.c | 112 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/test/val/bug2395.c b/test/val/bug2395.c index 07c5cd7c5..7cdf8b30b 100644 --- a/test/val/bug2395.c +++ b/test/val/bug2395.c @@ -8,6 +8,7 @@ unsigned char a, b; unsigned char c = 199; unsigned char d = 100; +int i = 50; int main(void) { int fails = 0; @@ -38,6 +39,117 @@ int main(void) { printf("AND error: a %d instead of %d\n", a, b); fails++; } + + a = c + (d != 0); + b = c + 1; + + printf("%u ^ (%u != 0) => %u\n", c, d, a); + if (a != b) { + printf("ADD error: a %d instead of %d\n", a, b); + fails++; + } + + a = c - (d != 0); + b = c - 1; + + printf("%u ^ (%u != 0) => %u\n", c, d, a); + if (a != b) { + printf("SUB error: a %d instead of %d\n", a, b); + fails++; + } + + a = c ^ (d >= 0); + b = c ^ 1; + + printf("%u ^ (%u >= 0) => %u\n", c, d, a); + if (a != b) { + printf("XOR error: a %d instead of %d\n", a, b); + fails++; + } + + a = c | (d >= 0); + b = c | 1; + + printf("%u | (%u >= 0) => %u\n", c, d, a); + if (a != b) { + printf("OR error: a %d instead of %d\n", a, b); + fails++; + } + + a = c & (d >= 0); + b = c & 1; + + printf("%u & (%u >= 0) => %u\n", c, d, a); + if (a != b) { + printf("AND error: a %d instead of %d\n", a, b); + fails++; + } + + a = c + (d >= 0); + b = c + 1; + + printf("%u ^ (%u >= 0) => %u\n", c, d, a); + if (a != b) { + printf("ADD error: a %d instead of %d\n", a, b); + fails++; + } + + a = c - (d >= 0); + b = c - 1; + + printf("%u ^ (%u >= 0) => %u\n", c, d, a); + if (a != b) { + printf("SUB error: a %d instead of %d\n", a, b); + fails++; + } + + printf("%d errors\n", fails); + + a = c ^ (i >= 0); + b = c ^ 1; + + printf("%u ^ (%d >= 0) => %u\n", c, d, a); + if (a != b) { + printf("XOR int cmp error: a %d instead of %d\n", a, b); + fails++; + } + + a = c | (i >= 0); + b = c | 1; + + printf("%u | (%d >= 0) => %u\n", c, i, a); + if (a != b) { + printf("OR int cmp error: a %d instead of %d\n", a, b); + fails++; + } + + a = c & (i >= 0); + b = c & 1; + + printf("%u & (%d >= 0) => %u\n", c, i, a); + if (a != b) { + printf("AND int cmp error: a %d instead of %d\n", a, b); + fails++; + } + + a = c + (i >= 0); + b = c + 1; + + printf("%u ^ (%d >= 0) => %u\n", c, i, a); + if (a != b) { + printf("ADD int cmp error: a %d instead of %d\n", a, b); + fails++; + } + + a = c - (i >= 0); + b = c - 1; + + printf("%u ^ (%d >= 0) => %u\n", c, i, a); + if (a != b) { + printf("SUB int cmp error: a %d instead of %d\n", a, b); + fails++; + } + printf("%d errors\n", fails); return fails; From f464b5049544b7658a1a8b763c1911da04387591 Mon Sep 17 00:00:00 2001 From: Alex Volkov Date: Fri, 20 Mar 2026 19:51:37 -0400 Subject: [PATCH 10/11] Minimal test for issue #2946 --- test/val/bug2946.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 test/val/bug2946.c diff --git a/test/val/bug2946.c b/test/val/bug2946.c new file mode 100644 index 000000000..81ba05195 --- /dev/null +++ b/test/val/bug2946.c @@ -0,0 +1,22 @@ + +/* bug #2946: Incomplete, non-removable Rhs asserts in OptStackOps */ + +#include +#include + +unsigned char a, b = 0; +unsigned char c = 10; +unsigned char d = 1; + +int main(void) { + /* 'if' needed to produce a label below, moved by OptJumpTarget3 */ + if (b != 0) { + /* Operation not important; it only affects the removal of one LDX #$00 */ + b = 0; + } + + /* d >= 0 is const, A/X=1 (Warning: Result of comparison is always true) */ + a = c - (d >= 0); + + return !(a == 9); +} From d036c4aaae7d23aaee05f9e947772367239a4072 Mon Sep 17 00:00:00 2001 From: Alex Volkov Date: Fri, 20 Mar 2026 19:51:55 -0400 Subject: [PATCH 11/11] Minimal test for issue #2947 --- test/val/bug2947.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 test/val/bug2947.c diff --git a/test/val/bug2947.c b/test/val/bug2947.c new file mode 100644 index 000000000..dbb479f2c --- /dev/null +++ b/test/val/bug2947.c @@ -0,0 +1,22 @@ + +/* bug #2947: Opt_a_tosbitwise() attempts to remove non-removable Rhs X */ + +#include +#include + +unsigned char a, b = 0; +unsigned char c = 10; +unsigned char d = 1; + +int main(void) { + /* 'if' needed to produce a label below, moved by OptJumpTarget3 */ + if (b != 0) { + /* Operation not important; it only affects the removal of one LDX #$00 */ + b = 0; + } + + /* d >= 0 is const, A/X=1 (Warning: Result of comparison is always true) */ + a = c ^ (d >= 0); + + return !(a == 11); +}