1 | /*****************************************************************************/ |
2 | /* */ |
3 | /* coptstop.c */ |
4 | /* */ |
5 | /* Optimize operations that take operands via the stack */ |
6 | /* */ |
7 | /* */ |
8 | /* */ |
9 | /* (C) 2001-2019, Ullrich von Bassewitz */ |
10 | /* Roemerstrasse 52 */ |
11 | /* D-70794 Filderstadt */ |
12 | /* EMail: uz@cc65.org */ |
13 | /* */ |
14 | /* */ |
15 | /* This software is provided 'as-is', without any expressed or implied */ |
16 | /* warranty. In no event will the authors be held liable for any damages */ |
17 | /* arising from the use of this software. */ |
18 | /* */ |
19 | /* Permission is granted to anyone to use this software for any purpose, */ |
20 | /* including commercial applications, and to alter it and redistribute it */ |
21 | /* freely, subject to the following restrictions: */ |
22 | /* */ |
23 | /* 1. The origin of this software must not be misrepresented; you must not */ |
24 | /* claim that you wrote the original software. If you use this software */ |
25 | /* in a product, an acknowledgment in the product documentation would be */ |
26 | /* appreciated but is not required. */ |
27 | /* 2. Altered source versions must be plainly marked as such, and must not */ |
28 | /* be misrepresented as being the original software. */ |
29 | /* 3. This notice may not be removed or altered from any source */ |
30 | /* distribution. */ |
31 | /* */ |
32 | /*****************************************************************************/ |
33 | |
34 | |
35 | |
36 | #include <stdlib.h> |
37 | |
38 | /* common */ |
39 | #include "chartype.h" |
40 | |
41 | /* cc65 */ |
42 | #include "codeent.h" |
43 | #include "codeinfo.h" |
44 | #include "coptstop.h" |
45 | #include "error.h" |
46 | |
47 | |
48 | |
49 | /*****************************************************************************/ |
50 | /* Load tracking data */ |
51 | /*****************************************************************************/ |
52 | |
53 | |
54 | |
55 | /* LoadRegInfo flags set by DirectOp */ |
56 | typedef enum { |
57 | LI_NONE = 0x00, |
58 | LI_DIRECT = 0x01, /* Direct op may be used */ |
59 | LI_RELOAD_Y = 0x02, /* Reload index register Y */ |
60 | LI_REMOVE = 0x04, /* Load may be removed */ |
61 | LI_DONT_REMOVE = 0x08, /* Load may not be removed */ |
62 | LI_DUP_LOAD = 0x10, /* Duplicate load */ |
63 | } LI_FLAGS; |
64 | |
65 | /* Structure that tells us how to load the lhs values */ |
66 | typedef struct LoadRegInfo LoadRegInfo; |
67 | struct LoadRegInfo { |
68 | LI_FLAGS Flags; /* Tells us how to load */ |
69 | int LoadIndex; /* Index of load insn, -1 if invalid */ |
70 | CodeEntry* LoadEntry; /* The actual entry, 0 if invalid */ |
71 | int XferIndex; /* Index of transfer insn */ |
72 | CodeEntry* XferEntry; /* The actual transfer entry */ |
73 | int Offs; /* Stack offset if data is on stack */ |
74 | }; |
75 | |
76 | /* Now combined for both registers */ |
77 | typedef struct LoadInfo LoadInfo; |
78 | struct LoadInfo { |
79 | LoadRegInfo A; /* Info for A register */ |
80 | LoadRegInfo X; /* Info for X register */ |
81 | LoadRegInfo Y; /* Info for Y register */ |
82 | }; |
83 | |
84 | |
85 | |
86 | /*****************************************************************************/ |
87 | /* Data */ |
88 | /*****************************************************************************/ |
89 | |
90 | |
91 | |
92 | /* Flags for the functions */ |
93 | typedef enum { |
94 | OP_NONE = 0x00, /* Nothing special */ |
95 | OP_A_KNOWN = 0x01, /* Value of A must be known */ |
96 | OP_X_ZERO = 0x02, /* X must be zero */ |
97 | OP_LHS_LOAD = 0x04, /* Must have load insns for LHS */ |
98 | OP_LHS_LOAD_DIRECT = 0x0C, /* Must have direct load insn for LHS */ |
99 | OP_RHS_LOAD = 0x10, /* Must have load insns for RHS */ |
100 | OP_RHS_LOAD_DIRECT = 0x30, /* Must have direct load insn for RHS */ |
101 | } OP_FLAGS; |
102 | |
103 | /* Structure forward decl */ |
104 | typedef struct StackOpData StackOpData; |
105 | |
106 | /* Structure that describes an optimizer subfunction for a specific op */ |
107 | typedef unsigned (*OptFunc) (StackOpData* D); |
108 | typedef struct OptFuncDesc OptFuncDesc; |
109 | struct OptFuncDesc { |
110 | const char* Name; /* Name of the replaced runtime function */ |
111 | OptFunc Func; /* Function pointer */ |
112 | unsigned UnusedRegs; /* Regs that must not be used later */ |
113 | OP_FLAGS Flags; /* Flags */ |
114 | }; |
115 | |
116 | /* Structure that holds the needed data */ |
117 | struct StackOpData { |
118 | CodeSeg* Code; /* Pointer to code segment */ |
119 | unsigned Flags; /* Flags to remember things */ |
120 | |
121 | /* Pointer to optimizer subfunction description */ |
122 | const OptFuncDesc* OptFunc; |
123 | |
124 | /* ZP register usage inside the sequence */ |
125 | unsigned ZPUsage; |
126 | |
127 | /* Register load information for lhs and rhs */ |
128 | LoadInfo Lhs; |
129 | LoadInfo Rhs; |
130 | |
131 | /* Several indices of insns in the code segment */ |
132 | int PushIndex; /* Index of call to pushax in codeseg */ |
133 | int OpIndex; /* Index of actual operation */ |
134 | |
135 | /* Pointers to insns in the code segment */ |
136 | CodeEntry* PrevEntry; /* Entry before the call to pushax */ |
137 | CodeEntry* PushEntry; /* Pointer to entry with call to pushax */ |
138 | CodeEntry* OpEntry; /* Pointer to entry with op */ |
139 | CodeEntry* NextEntry; /* Entry after the op */ |
140 | |
141 | const char* ZPLo; /* Lo byte of zero page loc to use */ |
142 | const char* ZPHi; /* Hi byte of zero page loc to use */ |
143 | unsigned IP; /* Insertion point used by some routines */ |
144 | }; |
145 | |
146 | |
147 | |
148 | /*****************************************************************************/ |
149 | /* Load tracking code */ |
150 | /*****************************************************************************/ |
151 | |
152 | |
153 | |
154 | static void ClearLoadRegInfo (LoadRegInfo* RI) |
155 | /* Clear a LoadRegInfo struct */ |
156 | { |
157 | RI->Flags = LI_NONE; |
158 | RI->LoadIndex = -1; |
159 | RI->XferIndex = -1; |
160 | RI->Offs = 0; |
161 | } |
162 | |
163 | |
164 | |
165 | static void FinalizeLoadRegInfo (LoadRegInfo* RI, CodeSeg* S) |
166 | /* Prepare a LoadRegInfo struct for use */ |
167 | { |
168 | /* Get the entries */ |
169 | if (RI->LoadIndex >= 0) { |
170 | RI->LoadEntry = CS_GetEntry (S, RI->LoadIndex); |
171 | } else { |
172 | RI->LoadEntry = 0; |
173 | } |
174 | if (RI->XferIndex >= 0) { |
175 | RI->XferEntry = CS_GetEntry (S, RI->XferIndex); |
176 | } else { |
177 | RI->XferEntry = 0; |
178 | } |
179 | } |
180 | |
181 | |
182 | |
183 | static void ClearLoadInfo (LoadInfo* LI) |
184 | /* Clear a LoadInfo struct */ |
185 | { |
186 | ClearLoadRegInfo (&LI->A); |
187 | ClearLoadRegInfo (&LI->X); |
188 | ClearLoadRegInfo (&LI->Y); |
189 | } |
190 | |
191 | |
192 | |
193 | static void AdjustLoadRegInfo (LoadRegInfo* RI, int Index, int Change) |
194 | /* Adjust a load register info struct after deleting or inserting an entry |
195 | ** with a given index |
196 | */ |
197 | { |
198 | CHECK (abs (Change) == 1); |
199 | if (Change < 0) { |
200 | /* Deletion */ |
201 | if (Index < RI->LoadIndex) { |
202 | --RI->LoadIndex; |
203 | } else if (Index == RI->LoadIndex) { |
204 | /* Has been removed */ |
205 | RI->LoadIndex = -1; |
206 | RI->LoadEntry = 0; |
207 | } |
208 | if (Index < RI->XferIndex) { |
209 | --RI->XferIndex; |
210 | } else if (Index == RI->XferIndex) { |
211 | /* Has been removed */ |
212 | RI->XferIndex = -1; |
213 | RI->XferEntry = 0; |
214 | } |
215 | } else { |
216 | /* Insertion */ |
217 | if (Index <= RI->LoadIndex) { |
218 | ++RI->LoadIndex; |
219 | } |
220 | if (Index <= RI->XferIndex) { |
221 | ++RI->XferIndex; |
222 | } |
223 | } |
224 | } |
225 | |
226 | |
227 | |
228 | static void FinalizeLoadInfo (LoadInfo* LI, CodeSeg* S) |
229 | /* Prepare a LoadInfo struct for use */ |
230 | { |
231 | /* Get the entries */ |
232 | FinalizeLoadRegInfo (&LI->A, S); |
233 | FinalizeLoadRegInfo (&LI->X, S); |
234 | FinalizeLoadRegInfo (&LI->Y, S); |
235 | } |
236 | |
237 | |
238 | |
239 | static void AdjustLoadInfo (LoadInfo* LI, int Index, int Change) |
240 | /* Adjust a load info struct after deleting entry with a given index */ |
241 | { |
242 | AdjustLoadRegInfo (&LI->A, Index, Change); |
243 | AdjustLoadRegInfo (&LI->X, Index, Change); |
244 | AdjustLoadRegInfo (&LI->Y, Index, Change); |
245 | } |
246 | |
247 | |
248 | |
249 | static void HonourUseAndChg (LoadRegInfo* RI, unsigned Reg, const CodeEntry* E) |
250 | /* Honour use and change flags for an instruction */ |
251 | { |
252 | if (E->Chg & Reg) { |
253 | ClearLoadRegInfo (RI); |
254 | } else if ((E->Use & Reg) && RI->LoadIndex >= 0) { |
255 | RI->Flags |= LI_DONT_REMOVE; |
256 | } |
257 | } |
258 | |
259 | |
260 | |
261 | static void TrackLoads (LoadInfo* LI, CodeEntry* E, int I) |
262 | /* Track loads for a code entry */ |
263 | { |
264 | if (E->Info & OF_LOAD) { |
265 | |
266 | LoadRegInfo* RI = 0; |
267 | |
268 | /* Determine, which register was loaded */ |
269 | if (E->Chg & REG_A) { |
270 | RI = &LI->A; |
271 | } else if (E->Chg & REG_X) { |
272 | RI = &LI->X; |
273 | } else if (E->Chg & REG_Y) { |
274 | RI = &LI->Y; |
275 | } |
276 | CHECK (RI != 0); |
277 | |
278 | /* If we had a load or xfer op before, this is a duplicate load which |
279 | ** can cause problems if it encountered between the pushax and the op, |
280 | ** so remember it. |
281 | */ |
282 | if (RI->LoadIndex >= 0 || RI->XferIndex >= 0) { |
283 | RI->Flags |= LI_DUP_LOAD; |
284 | } |
285 | |
286 | /* Remember the load */ |
287 | RI->LoadIndex = I; |
288 | RI->XferIndex = -1; |
289 | |
290 | /* Set load flags */ |
291 | RI->Flags &= ~(LI_DIRECT | LI_RELOAD_Y); |
292 | if (E->AM == AM65_IMM || E->AM == AM65_ZP || E->AM == AM65_ABS) { |
293 | /* These insns are all ok and replaceable */ |
294 | RI->Flags |= LI_DIRECT; |
295 | } else if (E->AM == AM65_ZP_INDY && |
296 | RegValIsKnown (E->RI->In.RegY) && |
297 | strcmp (E->Arg, "sp" ) == 0) { |
298 | /* A load from the stack with known offset is also ok, but in this |
299 | ** case we must reload the index register later. Please note that |
300 | ** a load indirect via other zero page locations is not ok, since |
301 | ** these locations may change between the push and the actual |
302 | ** operation. |
303 | */ |
304 | RI->Offs = (unsigned char) E->RI->In.RegY; |
305 | RI->Flags |= (LI_DIRECT | LI_RELOAD_Y); |
306 | } |
307 | |
308 | |
309 | } else if (E->Info & OF_XFR) { |
310 | |
311 | /* Determine source and target of the transfer and handle the TSX insn */ |
312 | LoadRegInfo* Src; |
313 | LoadRegInfo* Tgt; |
314 | switch (E->OPC) { |
315 | case OP65_TAX: Src = &LI->A; Tgt = &LI->X; break; |
316 | case OP65_TAY: Src = &LI->A; Tgt = &LI->Y; break; |
317 | case OP65_TXA: Src = &LI->X; Tgt = &LI->A; break; |
318 | case OP65_TYA: Src = &LI->Y; Tgt = &LI->A; break; |
319 | case OP65_TSX: ClearLoadRegInfo (&LI->X); return; |
320 | case OP65_TXS: return; |
321 | default: Internal ("Unknown XFR insn in TrackLoads" ); |
322 | } |
323 | |
324 | /* If we had a load or xfer op before, this is a duplicate load which |
325 | ** can cause problems if it encountered between the pushax and the op, |
326 | ** so remember it. |
327 | */ |
328 | if (Tgt->LoadIndex >= 0 || Tgt->XferIndex >= 0) { |
329 | Tgt->Flags |= LI_DUP_LOAD; |
330 | } |
331 | |
332 | /* Transfer the data */ |
333 | Tgt->LoadIndex = Src->LoadIndex; |
334 | Tgt->XferIndex = I; |
335 | Tgt->Offs = Src->Offs; |
336 | Tgt->Flags &= ~(LI_DIRECT | LI_RELOAD_Y); |
337 | Tgt->Flags |= Src->Flags & (LI_DIRECT | LI_RELOAD_Y); |
338 | |
339 | } else if (CE_IsCallTo (E, "ldaxysp" ) && RegValIsKnown (E->RI->In.RegY)) { |
340 | |
341 | /* If we had a load or xfer op before, this is a duplicate load which |
342 | ** can cause problems if it encountered between the pushax and the op, |
343 | ** so remember it for both registers involved. |
344 | */ |
345 | if (LI->A.LoadIndex >= 0 || LI->A.XferIndex >= 0) { |
346 | LI->A.Flags |= LI_DUP_LOAD; |
347 | } |
348 | if (LI->X.LoadIndex >= 0 || LI->X.XferIndex >= 0) { |
349 | LI->X.Flags |= LI_DUP_LOAD; |
350 | } |
351 | |
352 | /* Both registers set, Y changed */ |
353 | LI->A.LoadIndex = I; |
354 | LI->A.XferIndex = -1; |
355 | LI->A.Flags |= (LI_DIRECT | LI_RELOAD_Y); |
356 | LI->A.Offs = (unsigned char) E->RI->In.RegY - 1; |
357 | |
358 | LI->X.LoadIndex = I; |
359 | LI->X.XferIndex = -1; |
360 | LI->X.Flags |= (LI_DIRECT | LI_RELOAD_Y); |
361 | LI->X.Offs = (unsigned char) E->RI->In.RegY; |
362 | |
363 | ClearLoadRegInfo (&LI->Y); |
364 | } else { |
365 | HonourUseAndChg (&LI->A, REG_A, E); |
366 | HonourUseAndChg (&LI->X, REG_X, E); |
367 | HonourUseAndChg (&LI->Y, REG_Y, E); |
368 | } |
369 | } |
370 | |
371 | |
372 | |
373 | /*****************************************************************************/ |
374 | /* Helpers */ |
375 | /*****************************************************************************/ |
376 | |
377 | |
378 | |
379 | static void InsertEntry (StackOpData* D, CodeEntry* E, int Index) |
380 | /* Insert a new entry. Depending on Index, D->PushIndex and D->OpIndex will |
381 | ** be adjusted by this function. |
382 | */ |
383 | { |
384 | /* Insert the entry into the code segment */ |
385 | CS_InsertEntry (D->Code, E, Index); |
386 | |
387 | /* Adjust register loads if necessary */ |
388 | AdjustLoadInfo (&D->Lhs, Index, 1); |
389 | AdjustLoadInfo (&D->Rhs, Index, 1); |
390 | |
391 | /* Adjust the indices if necessary */ |
392 | if (D->PushEntry && Index <= D->PushIndex) { |
393 | ++D->PushIndex; |
394 | } |
395 | if (D->OpEntry && Index <= D->OpIndex) { |
396 | ++D->OpIndex; |
397 | } |
398 | } |
399 | |
400 | |
401 | |
402 | static void DelEntry (StackOpData* D, int Index) |
403 | /* Delete an entry. Depending on Index, D->PushIndex and D->OpIndex will be |
404 | ** adjusted by this function, and PushEntry/OpEntry may get invalidated. |
405 | */ |
406 | { |
407 | /* Delete the entry from the code segment */ |
408 | CS_DelEntry (D->Code, Index); |
409 | |
410 | /* Adjust register loads if necessary */ |
411 | AdjustLoadInfo (&D->Lhs, Index, -1); |
412 | AdjustLoadInfo (&D->Rhs, Index, -1); |
413 | |
414 | /* Adjust the other indices if necessary */ |
415 | if (Index < D->PushIndex) { |
416 | --D->PushIndex; |
417 | } else if (Index == D->PushIndex) { |
418 | D->PushEntry = 0; |
419 | } |
420 | if (Index < D->OpIndex) { |
421 | --D->OpIndex; |
422 | } else if (Index == D->OpIndex) { |
423 | D->OpEntry = 0; |
424 | } |
425 | } |
426 | |
427 | |
428 | |
429 | static void AdjustStackOffset (StackOpData* D, unsigned Offs) |
430 | /* Adjust the offset for all stack accesses in the range PushIndex to OpIndex. |
431 | ** OpIndex is adjusted according to the insertions. |
432 | */ |
433 | { |
434 | /* Walk over all entries */ |
435 | int I = D->PushIndex + 1; |
436 | while (I < D->OpIndex) { |
437 | |
438 | CodeEntry* E = CS_GetEntry (D->Code, I); |
439 | |
440 | /* Check if this entry does a stack access, and if so, if it's a plain |
441 | ** load from stack, since this is needed later. |
442 | */ |
443 | int Correction = 0; |
444 | if ((E->Use & REG_SP) != 0) { |
445 | |
446 | /* Check for some things that should not happen */ |
447 | CHECK (E->AM == AM65_ZP_INDY || E->RI->In.RegY >= (short) Offs); |
448 | CHECK (strcmp (E->Arg, "sp" ) == 0); |
449 | /* We need to correct this one */ |
450 | Correction = (E->OPC == OP65_LDA)? 2 : 1; |
451 | |
452 | } else if (CE_IsCallTo (E, "ldaxysp" )) { |
453 | /* We need to correct this one */ |
454 | Correction = 1; |
455 | } |
456 | |
457 | if (Correction) { |
458 | /* Get the code entry before this one. If it's a LDY, adjust the |
459 | ** value. |
460 | */ |
461 | CodeEntry* P = CS_GetPrevEntry (D->Code, I); |
462 | if (P && P->OPC == OP65_LDY && CE_IsConstImm (P)) { |
463 | /* The Y load is just before the stack access, adjust it */ |
464 | CE_SetNumArg (P, P->Num - Offs); |
465 | } else { |
466 | /* Insert a new load instruction before the stack access */ |
467 | const char* Arg = MakeHexArg (E->RI->In.RegY - Offs); |
468 | CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); |
469 | InsertEntry (D, X, I++); |
470 | } |
471 | |
472 | /* If we need the value of Y later, be sure to reload it */ |
473 | if (RegYUsed (D->Code, I+1)) { |
474 | CodeEntry* N; |
475 | const char* Arg = MakeHexArg (E->RI->In.RegY); |
476 | if (Correction == 2 && (N = CS_GetNextEntry(D->Code, I)) != 0 && |
477 | ((N->Info & OF_ZBRA) != 0) && N->JumpTo != 0) { |
478 | /* The Y register is used but the load instruction loads A |
479 | ** and is followed by a branch that evaluates the zero flag. |
480 | ** This means that we cannot just insert the load insn |
481 | ** for the Y register at this place, because it would |
482 | ** destroy the Z flag. Instead place load insns at the |
483 | ** target of the branch and after it. |
484 | ** Note: There is a chance that this code won't work. The |
485 | ** jump may be a backwards jump (in which case the stack |
486 | ** offset has already been adjusted) or there may be other |
487 | ** instructions between the load and the conditional jump. |
488 | ** Currently the compiler does not generate such code, but |
489 | ** it is possible to force the optimizer into something |
490 | ** invalid by use of inline assembler. |
491 | */ |
492 | |
493 | /* Add load insn after the branch */ |
494 | CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); |
495 | InsertEntry (D, X, I+2); |
496 | |
497 | /* Add load insn before branch target */ |
498 | CodeEntry* Y = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); |
499 | int J = CS_GetEntryIndex (D->Code, N->JumpTo->Owner); |
500 | CHECK (J > I); /* Must not happen */ |
501 | InsertEntry (D, Y, J); |
502 | |
503 | /* Move the label to the new insn */ |
504 | CodeLabel* L = CS_GenLabel (D->Code, Y); |
505 | CS_MoveLabelRef (D->Code, N, L); |
506 | } else { |
507 | CodeEntry* X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); |
508 | InsertEntry (D, X, I+1); |
509 | /* Skip this instruction in the next round */ |
510 | ++I; |
511 | } |
512 | } |
513 | } |
514 | |
515 | /* Next entry */ |
516 | ++I; |
517 | } |
518 | |
519 | /* If we have rhs load insns that load from stack, we'll have to adjust |
520 | ** the offsets for these also. |
521 | */ |
522 | if (D->Rhs.A.Flags & LI_RELOAD_Y) { |
523 | D->Rhs.A.Offs -= Offs; |
524 | } |
525 | if (D->Rhs.X.Flags & LI_RELOAD_Y) { |
526 | D->Rhs.X.Offs -= Offs; |
527 | } |
528 | } |
529 | |
530 | |
531 | |
532 | static void AddStoreA (StackOpData* D) |
533 | /* Add a store to zero page after the push insn */ |
534 | { |
535 | CodeEntry* X = NewCodeEntry (OP65_STA, AM65_ZP, D->ZPLo, 0, D->PushEntry->LI); |
536 | InsertEntry (D, X, D->PushIndex+1); |
537 | } |
538 | |
539 | |
540 | |
541 | static void AddStoreX (StackOpData* D) |
542 | /* Add a store to zero page after the push insn */ |
543 | { |
544 | CodeEntry* X = NewCodeEntry (OP65_STX, AM65_ZP, D->ZPHi, 0, D->PushEntry->LI); |
545 | InsertEntry (D, X, D->PushIndex+1); |
546 | } |
547 | |
548 | |
549 | |
550 | static void ReplacePushByStore (StackOpData* D) |
551 | /* Replace the call to the push subroutine by a store into the zero page |
552 | ** location (actually, the push is not replaced, because we need it for |
553 | ** later, but the name is still ok since the push will get removed at the |
554 | ** end of each routine). |
555 | */ |
556 | { |
557 | /* Store the value into the zeropage instead of pushing it. Check high |
558 | ** byte first so that the store is later in A/X order. |
559 | */ |
560 | if ((D->Lhs.X.Flags & LI_DIRECT) == 0) { |
561 | AddStoreX (D); |
562 | } |
563 | if ((D->Lhs.A.Flags & LI_DIRECT) == 0) { |
564 | AddStoreA (D); |
565 | } |
566 | } |
567 | |
568 | |
569 | |
570 | static void AddOpLow (StackOpData* D, opc_t OPC, LoadInfo* LI) |
571 | /* Add an op for the low byte of an operator. This function honours the |
572 | ** OP_DIRECT and OP_RELOAD_Y flags and generates the necessary instructions. |
573 | ** All code is inserted at the current insertion point. |
574 | */ |
575 | { |
576 | CodeEntry* X; |
577 | |
578 | if ((LI->A.Flags & LI_DIRECT) != 0) { |
579 | /* Op with a variable location. If the location is on the stack, we |
580 | ** need to reload the Y register. |
581 | */ |
582 | if ((LI->A.Flags & LI_RELOAD_Y) == 0) { |
583 | |
584 | /* opc ... */ |
585 | CodeEntry* LoadA = LI->A.LoadEntry; |
586 | X = NewCodeEntry (OPC, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI); |
587 | InsertEntry (D, X, D->IP++); |
588 | |
589 | } else { |
590 | |
591 | /* ldy #offs */ |
592 | const char* Arg = MakeHexArg (LI->A.Offs); |
593 | X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI); |
594 | InsertEntry (D, X, D->IP++); |
595 | |
596 | /* opc (sp),y */ |
597 | X = NewCodeEntry (OPC, AM65_ZP_INDY, "sp" , 0, D->OpEntry->LI); |
598 | InsertEntry (D, X, D->IP++); |
599 | |
600 | } |
601 | |
602 | /* In both cases, we can remove the load */ |
603 | LI->A.Flags |= LI_REMOVE; |
604 | |
605 | } else { |
606 | |
607 | /* Op with temp storage */ |
608 | X = NewCodeEntry (OPC, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI); |
609 | InsertEntry (D, X, D->IP++); |
610 | |
611 | } |
612 | } |
613 | |
614 | |
615 | |
616 | static void AddOpHigh (StackOpData* D, opc_t OPC, LoadInfo* LI, int KeepResult) |
617 | /* Add an op for the high byte of an operator. Special cases (constant values |
618 | ** or similar) have to be checked separately, the function covers only the |
619 | ** generic case. Code is inserted at the insertion point. |
620 | */ |
621 | { |
622 | CodeEntry* X; |
623 | |
624 | if (KeepResult) { |
625 | /* pha */ |
626 | X = NewCodeEntry (OP65_PHA, AM65_IMP, 0, 0, D->OpEntry->LI); |
627 | InsertEntry (D, X, D->IP++); |
628 | } |
629 | |
630 | /* txa */ |
631 | X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI); |
632 | InsertEntry (D, X, D->IP++); |
633 | |
634 | if ((LI->X.Flags & LI_DIRECT) != 0) { |
635 | |
636 | if ((LI->X.Flags & LI_RELOAD_Y) == 0) { |
637 | |
638 | /* opc xxx */ |
639 | CodeEntry* LoadX = LI->X.LoadEntry; |
640 | X = NewCodeEntry (OPC, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI); |
641 | InsertEntry (D, X, D->IP++); |
642 | |
643 | } else { |
644 | |
645 | /* ldy #const */ |
646 | const char* Arg = MakeHexArg (LI->X.Offs); |
647 | X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI); |
648 | InsertEntry (D, X, D->IP++); |
649 | |
650 | /* opc (sp),y */ |
651 | X = NewCodeEntry (OPC, AM65_ZP_INDY, "sp" , 0, D->OpEntry->LI); |
652 | InsertEntry (D, X, D->IP++); |
653 | } |
654 | |
655 | /* In both cases, we can remove the load */ |
656 | LI->X.Flags |= LI_REMOVE; |
657 | |
658 | } else { |
659 | /* opc zphi */ |
660 | X = NewCodeEntry (OPC, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI); |
661 | InsertEntry (D, X, D->IP++); |
662 | } |
663 | |
664 | if (KeepResult) { |
665 | /* tax */ |
666 | X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, D->OpEntry->LI); |
667 | InsertEntry (D, X, D->IP++); |
668 | |
669 | /* pla */ |
670 | X = NewCodeEntry (OP65_PLA, AM65_IMP, 0, 0, D->OpEntry->LI); |
671 | InsertEntry (D, X, D->IP++); |
672 | } |
673 | } |
674 | |
675 | |
676 | |
677 | static void RemoveRegLoads (StackOpData* D, LoadInfo* LI) |
678 | /* Remove register load insns */ |
679 | { |
680 | /* Both registers may be loaded with one insn, but DelEntry will in this |
681 | ** case clear the other one. |
682 | */ |
683 | if ((LI->A.Flags & (LI_REMOVE | LI_DONT_REMOVE)) == LI_REMOVE) { |
684 | if (LI->A.LoadIndex >= 0) { |
685 | DelEntry (D, LI->A.LoadIndex); |
686 | } |
687 | if (LI->A.XferIndex >= 0) { |
688 | DelEntry (D, LI->A.XferIndex); |
689 | } |
690 | } |
691 | if ((LI->X.Flags & (LI_REMOVE | LI_DONT_REMOVE)) == LI_REMOVE) { |
692 | if (LI->X.LoadIndex >= 0) { |
693 | DelEntry (D, LI->X.LoadIndex); |
694 | } |
695 | if (LI->X.XferIndex >= 0) { |
696 | DelEntry (D, LI->X.XferIndex); |
697 | } |
698 | } |
699 | } |
700 | |
701 | |
702 | |
703 | static void RemoveRemainders (StackOpData* D) |
704 | /* Remove the code that is unnecessary after translation of the sequence */ |
705 | { |
706 | /* Remove the register loads for lhs and rhs */ |
707 | RemoveRegLoads (D, &D->Lhs); |
708 | RemoveRegLoads (D, &D->Rhs); |
709 | |
710 | /* Remove the push and the operator routine */ |
711 | DelEntry (D, D->OpIndex); |
712 | DelEntry (D, D->PushIndex); |
713 | } |
714 | |
715 | |
716 | |
717 | static int IsRegVar (StackOpData* D) |
718 | /* If the value pushed is that of a zeropage variable, replace ZPLo and ZPHi |
719 | ** in the given StackOpData struct by the variable and return true. Otherwise |
720 | ** leave D untouched and return false. |
721 | */ |
722 | { |
723 | CodeEntry* LoadA = D->Lhs.A.LoadEntry; |
724 | CodeEntry* LoadX = D->Lhs.X.LoadEntry; |
725 | unsigned Len; |
726 | |
727 | /* Must have both load insns */ |
728 | if (LoadA == 0 || LoadX == 0) { |
729 | return 0; |
730 | } |
731 | |
732 | /* Must be loads from zp */ |
733 | if (LoadA->AM != AM65_ZP || LoadX->AM != AM65_ZP) { |
734 | return 0; |
735 | } |
736 | |
737 | /* Must be the same zp loc with high byte in X */ |
738 | Len = strlen (LoadA->Arg); |
739 | if (strncmp (LoadA->Arg, LoadX->Arg, Len) != 0 || |
740 | strcmp (LoadX->Arg + Len, "+1" ) != 0) { |
741 | return 0; |
742 | } |
743 | |
744 | /* Use the zero page location directly */ |
745 | D->ZPLo = LoadA->Arg; |
746 | D->ZPHi = LoadX->Arg; |
747 | return 1; |
748 | } |
749 | |
750 | |
751 | |
752 | /*****************************************************************************/ |
753 | /* Actual optimization functions */ |
754 | /*****************************************************************************/ |
755 | |
756 | |
757 | |
758 | static unsigned Opt_toseqax_tosneax (StackOpData* D, const char* BoolTransformer) |
759 | /* Optimize the toseqax and tosneax sequences. */ |
760 | { |
761 | CodeEntry* X; |
762 | CodeLabel* L; |
763 | |
764 | /* Create a call to the boolean transformer function and a label for this |
765 | ** insn. This is needed for all variants. Other insns are inserted *before* |
766 | ** the call. |
767 | */ |
768 | X = NewCodeEntry (OP65_JSR, AM65_ABS, BoolTransformer, 0, D->OpEntry->LI); |
769 | InsertEntry (D, X, D->OpIndex + 1); |
770 | L = CS_GenLabel (D->Code, X); |
771 | |
772 | /* If the lhs is direct (but not stack relative), encode compares with lhs |
773 | ** effectively reverting the order (which doesn't matter for ==). |
774 | */ |
775 | if ((D->Lhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT && |
776 | (D->Lhs.X.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) { |
777 | |
778 | CodeEntry* LoadX = D->Lhs.X.LoadEntry; |
779 | CodeEntry* LoadA = D->Lhs.A.LoadEntry; |
780 | |
781 | D->IP = D->OpIndex+1; |
782 | |
783 | /* cpx */ |
784 | X = NewCodeEntry (OP65_CPX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI); |
785 | InsertEntry (D, X, D->IP++); |
786 | |
787 | /* bne L */ |
788 | X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI); |
789 | InsertEntry (D, X, D->IP++); |
790 | |
791 | /* cmp */ |
792 | X = NewCodeEntry (OP65_CMP, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI); |
793 | InsertEntry (D, X, D->IP++); |
794 | |
795 | /* Lhs load entries can be removed */ |
796 | D->Lhs.X.Flags |= LI_REMOVE; |
797 | D->Lhs.A.Flags |= LI_REMOVE; |
798 | |
799 | } else if ((D->Rhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT && |
800 | (D->Rhs.X.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) { |
801 | |
802 | CodeEntry* LoadX = D->Rhs.X.LoadEntry; |
803 | CodeEntry* LoadA = D->Rhs.A.LoadEntry; |
804 | |
805 | D->IP = D->OpIndex+1; |
806 | |
807 | /* cpx */ |
808 | X = NewCodeEntry (OP65_CPX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI); |
809 | InsertEntry (D, X, D->IP++); |
810 | |
811 | /* bne L */ |
812 | X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI); |
813 | InsertEntry (D, X, D->IP++); |
814 | |
815 | /* cmp */ |
816 | X = NewCodeEntry (OP65_CMP, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI); |
817 | InsertEntry (D, X, D->IP++); |
818 | |
819 | /* Rhs load entries can be removed */ |
820 | D->Rhs.X.Flags |= LI_REMOVE; |
821 | D->Rhs.A.Flags |= LI_REMOVE; |
822 | |
823 | } else if ((D->Rhs.A.Flags & LI_DIRECT) != 0 && |
824 | (D->Rhs.X.Flags & LI_DIRECT) != 0) { |
825 | |
826 | D->IP = D->OpIndex+1; |
827 | |
828 | /* Add operand for low byte */ |
829 | AddOpLow (D, OP65_CMP, &D->Rhs); |
830 | |
831 | /* bne L */ |
832 | X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI); |
833 | InsertEntry (D, X, D->IP++); |
834 | |
835 | /* Add operand for high byte */ |
836 | AddOpHigh (D, OP65_CMP, &D->Rhs, 0); |
837 | |
838 | } else { |
839 | |
840 | /* Save lhs into zeropage, then compare */ |
841 | AddStoreX (D); |
842 | AddStoreA (D); |
843 | |
844 | D->IP = D->OpIndex+1; |
845 | |
846 | /* cpx */ |
847 | X = NewCodeEntry (OP65_CPX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI); |
848 | InsertEntry (D, X, D->IP++); |
849 | |
850 | /* bne L */ |
851 | X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI); |
852 | InsertEntry (D, X, D->IP++); |
853 | |
854 | /* cmp */ |
855 | X = NewCodeEntry (OP65_CMP, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI); |
856 | InsertEntry (D, X, D->IP++); |
857 | |
858 | } |
859 | |
860 | /* Remove the push and the call to the tosgeax function */ |
861 | RemoveRemainders (D); |
862 | |
863 | /* We changed the sequence */ |
864 | return 1; |
865 | } |
866 | |
867 | |
868 | |
869 | static unsigned Opt_tosshift (StackOpData* D, const char* Name) |
870 | /* Optimize shift sequences. */ |
871 | { |
872 | CodeEntry* X; |
873 | |
874 | /* Store the value into the zeropage instead of pushing it */ |
875 | ReplacePushByStore (D); |
876 | |
877 | /* If the lhs is direct (but not stack relative), we can just reload the |
878 | ** data later. |
879 | */ |
880 | if ((D->Lhs.A.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT && |
881 | (D->Lhs.X.Flags & (LI_DIRECT | LI_RELOAD_Y)) == LI_DIRECT) { |
882 | |
883 | CodeEntry* LoadX = D->Lhs.X.LoadEntry; |
884 | CodeEntry* LoadA = D->Lhs.A.LoadEntry; |
885 | |
886 | /* Inline the shift */ |
887 | D->IP = D->OpIndex+1; |
888 | |
889 | /* tay */ |
890 | X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, D->OpEntry->LI); |
891 | InsertEntry (D, X, D->IP++); |
892 | |
893 | /* lda */ |
894 | X = NewCodeEntry (OP65_LDA, LoadA->AM, LoadA->Arg, 0, D->OpEntry->LI); |
895 | InsertEntry (D, X, D->IP++); |
896 | |
897 | /* ldx */ |
898 | X = NewCodeEntry (OP65_LDX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI); |
899 | InsertEntry (D, X, D->IP++); |
900 | |
901 | /* Lhs load entries can be removed */ |
902 | D->Lhs.X.Flags |= LI_REMOVE; |
903 | D->Lhs.A.Flags |= LI_REMOVE; |
904 | |
905 | } else { |
906 | |
907 | /* Save lhs into zeropage and reload later */ |
908 | AddStoreX (D); |
909 | AddStoreA (D); |
910 | |
911 | /* Be sure to setup IP after adding the stores, otherwise it will get |
912 | ** messed up. |
913 | */ |
914 | D->IP = D->OpIndex+1; |
915 | |
916 | /* tay */ |
917 | X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, D->OpEntry->LI); |
918 | InsertEntry (D, X, D->IP++); |
919 | |
920 | /* lda zp */ |
921 | X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI); |
922 | InsertEntry (D, X, D->IP++); |
923 | |
924 | /* ldx zp+1 */ |
925 | X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI); |
926 | InsertEntry (D, X, D->IP++); |
927 | |
928 | } |
929 | |
930 | /* jsr shlaxy/aslaxy/whatever */ |
931 | X = NewCodeEntry (OP65_JSR, AM65_ABS, Name, 0, D->OpEntry->LI); |
932 | InsertEntry (D, X, D->IP++); |
933 | |
934 | /* Remove the push and the call to the shift function */ |
935 | RemoveRemainders (D); |
936 | |
937 | /* We changed the sequence */ |
938 | return 1; |
939 | } |
940 | |
941 | |
942 | |
943 | static unsigned Opt___bzero (StackOpData* D) |
944 | /* Optimize the __bzero sequence */ |
945 | { |
946 | CodeEntry* X; |
947 | const char* Arg; |
948 | CodeLabel* L; |
949 | |
950 | /* Check if we're using a register variable */ |
951 | if (!IsRegVar (D)) { |
952 | /* Store the value into the zeropage instead of pushing it */ |
953 | AddStoreX (D); |
954 | AddStoreA (D); |
955 | } |
956 | |
957 | /* If the return value of __bzero is used, we have to add code to reload |
958 | ** a/x from the pointer variable. |
959 | */ |
960 | if (RegAXUsed (D->Code, D->OpIndex+1)) { |
961 | X = NewCodeEntry (OP65_LDA, AM65_ZP, D->ZPLo, 0, D->OpEntry->LI); |
962 | InsertEntry (D, X, D->OpIndex+1); |
963 | X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI); |
964 | InsertEntry (D, X, D->OpIndex+2); |
965 | } |
966 | |
967 | /* X is always zero, A contains the size of the data area to zero. |
968 | ** Note: A may be zero, in which case the operation is null op. |
969 | */ |
970 | if (D->OpEntry->RI->In.RegA != 0) { |
971 | |
972 | /* lda #$00 */ |
973 | X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00" , 0, D->OpEntry->LI); |
974 | InsertEntry (D, X, D->OpIndex+1); |
975 | |
976 | /* The value of A is known */ |
977 | if (D->OpEntry->RI->In.RegA <= 0x81) { |
978 | |
979 | /* Loop using the sign bit */ |
980 | |
981 | /* ldy #count-1 */ |
982 | Arg = MakeHexArg (D->OpEntry->RI->In.RegA - 1); |
983 | X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI); |
984 | InsertEntry (D, X, D->OpIndex+2); |
985 | |
986 | /* L: sta (zp),y */ |
987 | X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI); |
988 | InsertEntry (D, X, D->OpIndex+3); |
989 | L = CS_GenLabel (D->Code, X); |
990 | |
991 | /* dey */ |
992 | X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, D->OpEntry->LI); |
993 | InsertEntry (D, X, D->OpIndex+4); |
994 | |
995 | /* bpl L */ |
996 | X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, D->OpEntry->LI); |
997 | InsertEntry (D, X, D->OpIndex+5); |
998 | |
999 | } else { |
1000 | |
1001 | /* Loop using an explicit compare */ |
1002 | |
1003 | /* ldy #$00 */ |
1004 | X = NewCodeEntry (OP65_LDY, AM65_IMM, "$00" , 0, D->OpEntry->LI); |
1005 | InsertEntry (D, X, D->OpIndex+2); |
1006 | |
1007 | /* L: sta (zp),y */ |
1008 | X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI); |
1009 | InsertEntry (D, X, D->OpIndex+3); |
1010 | L = CS_GenLabel (D->Code, X); |
1011 | |
1012 | /* iny */ |
1013 | X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI); |
1014 | InsertEntry (D, X, D->OpIndex+4); |
1015 | |
1016 | /* cpy #count */ |
1017 | Arg = MakeHexArg (D->OpEntry->RI->In.RegA); |
1018 | X = NewCodeEntry (OP65_CPY, AM65_IMM, Arg, 0, D->OpEntry->LI); |
1019 | InsertEntry (D, X, D->OpIndex+5); |
1020 | |
1021 | /* bne L */ |
1022 | X = NewCodeEntry (OP65_BNE, AM65_BRA, L->Name, L, D->OpEntry->LI); |
1023 | InsertEntry (D, X, D->OpIndex+6); |
1024 | } |
1025 | |
1026 | } |
1027 | |
1028 | /* Remove the push and the call to the __bzero function */ |
1029 | RemoveRemainders (D); |
1030 | |
1031 | /* We changed the sequence */ |
1032 | return 1; |
1033 | } |
1034 | |
1035 | |
1036 | |
1037 | static unsigned Opt_staspidx (StackOpData* D) |
1038 | /* Optimize the staspidx sequence */ |
1039 | { |
1040 | CodeEntry* X; |
1041 | |
1042 | /* Check if we're using a register variable */ |
1043 | if (!IsRegVar (D)) { |
1044 | /* Store the value into the zeropage instead of pushing it */ |
1045 | AddStoreX (D); |
1046 | AddStoreA (D); |
1047 | } |
1048 | |
1049 | /* Replace the store subroutine call by a direct op */ |
1050 | X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI); |
1051 | InsertEntry (D, X, D->OpIndex+1); |
1052 | |
1053 | /* Remove the push and the call to the staspidx function */ |
1054 | RemoveRemainders (D); |
1055 | |
1056 | /* We changed the sequence */ |
1057 | return 1; |
1058 | } |
1059 | |
1060 | |
1061 | |
1062 | static unsigned Opt_staxspidx (StackOpData* D) |
1063 | /* Optimize the staxspidx sequence */ |
1064 | { |
1065 | CodeEntry* X; |
1066 | |
1067 | /* Check if we're using a register variable */ |
1068 | if (!IsRegVar (D)) { |
1069 | /* Store the value into the zeropage instead of pushing it */ |
1070 | AddStoreX (D); |
1071 | AddStoreA (D); |
1072 | } |
1073 | |
1074 | /* Inline the store */ |
1075 | |
1076 | /* sta (zp),y */ |
1077 | X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI); |
1078 | InsertEntry (D, X, D->OpIndex+1); |
1079 | |
1080 | if (RegValIsKnown (D->OpEntry->RI->In.RegY)) { |
1081 | /* Value of Y is known */ |
1082 | const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegY + 1); |
1083 | X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, D->OpEntry->LI); |
1084 | } else { |
1085 | X = NewCodeEntry (OP65_INY, AM65_IMP, 0, 0, D->OpEntry->LI); |
1086 | } |
1087 | InsertEntry (D, X, D->OpIndex+2); |
1088 | |
1089 | if (RegValIsKnown (D->OpEntry->RI->In.RegX)) { |
1090 | /* Value of X is known */ |
1091 | const char* Arg = MakeHexArg (D->OpEntry->RI->In.RegX); |
1092 | X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, D->OpEntry->LI); |
1093 | } else { |
1094 | /* Value unknown */ |
1095 | X = NewCodeEntry (OP65_TXA, AM65_IMP, 0, 0, D->OpEntry->LI); |
1096 | } |
1097 | InsertEntry (D, X, D->OpIndex+3); |
1098 | |
1099 | /* sta (zp),y */ |
1100 | X = NewCodeEntry (OP65_STA, AM65_ZP_INDY, D->ZPLo, 0, D->OpEntry->LI); |
1101 | InsertEntry (D, X, D->OpIndex+4); |
1102 | |
1103 | /* If we remove staxspidx, we must restore the Y register to what the |
1104 | ** function would return. |
1105 | */ |
1106 | X = NewCodeEntry (OP65_LDY, AM65_IMM, "$00" , 0, D->OpEntry->LI); |
1107 | InsertEntry (D, X, D->OpIndex+5); |
1108 | |
1109 | /* Remove the push and the call to the staxspidx function */ |
1110 | RemoveRemainders (D); |
1111 | |
1112 | /* We changed the sequence */ |
1113 | return 1; |
1114 | } |
1115 | |
1116 | |
1117 | |
1118 | static unsigned Opt_tosaddax (StackOpData* D) |
1119 | /* Optimize the tosaddax sequence */ |
1120 | { |
1121 | CodeEntry* X; |
1122 | CodeEntry* N; |
1123 | |
1124 | /* We need the entry behind the add */ |
1125 | CHECK (D->NextEntry != 0); |
1126 | |
1127 | /* Check if the X register is known and zero when the add is done, and |
1128 | ** if the add is followed by |
1129 | ** |
1130 | ** ldy #$00 |
1131 | ** jsr ldauidx ; or ldaidx |
1132 | ** |
1133 | ** If this is true, the addition does actually add an offset to a pointer |
1134 | ** before it is dereferenced. Since both subroutines take an offset in Y, |
1135 | ** we can pass the offset (instead of #$00) and remove the addition |
1136 | ** alltogether. |
1137 | */ |
1138 | if (D->OpEntry->RI->In.RegX == 0 && |
1139 | D->NextEntry->OPC == OP65_LDY && |
1140 | CE_IsKnownImm (D->NextEntry, 0) && |
1141 | !CE_HasLabel (D->NextEntry) && |
1142 | (N = CS_GetNextEntry (D->Code, D->OpIndex + 1)) != 0 && |
1143 | (CE_IsCallTo (N, "ldauidx" ) || |
1144 | CE_IsCallTo (N, "ldaidx" ))) { |
1145 | |
1146 | int Signed = (strcmp (N->Arg, "ldaidx" ) == 0); |
1147 | |
1148 | /* Store the value into the zeropage instead of pushing it */ |
1149 | AddStoreX (D); |
1150 | AddStoreA (D); |
1151 | |
1152 | /* Replace the ldy by a tay. Be sure to create the new entry before |
1153 | ** deleting the ldy, since we will reference the line info from this |
1154 | ** insn. |
1155 | */ |
1156 | X = NewCodeEntry (OP65_TAY, AM65_IMP, 0, 0, D->NextEntry->LI); |
1157 | DelEntry (D, D->OpIndex + 1); |
1158 | InsertEntry (D, X, D->OpIndex + 1); |
1159 | |
1160 | /* Replace the call to ldaidx/ldauidx. Since X is already zero, and |
1161 | ** the ptr is in the zero page location, we just need to load from |
1162 | ** the pointer, and fix X in case of ldaidx. |
1163 | */ |
1164 | X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, D->ZPLo, 0, N->LI); |
1165 | DelEntry (D, D->OpIndex + 2); |
1166 | InsertEntry (D, X, D->OpIndex + 2); |
1167 | if (Signed) { |
1168 | |
1169 | CodeLabel* L; |
1170 | |
1171 | /* Add sign extension - N is unused now */ |
1172 | N = CS_GetNextEntry (D->Code, D->OpIndex + 2); |
1173 | CHECK (N != 0); |
1174 | L = CS_GenLabel (D->Code, N); |
1175 | |
1176 | X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, X->LI); |
1177 | InsertEntry (D, X, D->OpIndex + 3); |
1178 | |
1179 | X = NewCodeEntry (OP65_DEX, AM65_IMP, 0, 0, X->LI); |
1180 | InsertEntry (D, X, D->OpIndex + 4); |
1181 | } |
1182 | |
1183 | } else { |
1184 | |
1185 | /* Store the value into the zeropage instead of pushing it */ |
1186 | ReplacePushByStore (D); |
1187 | |
1188 | /* Inline the add */ |
1189 | D->IP = D->OpIndex+1; |
1190 | |
1191 | /* clc */ |
1192 | X = NewCodeEntry (OP65_CLC, AM65_IMP, 0, 0, D->OpEntry->LI); |
1193 | InsertEntry (D, X, D->IP++); |
1194 | |
1195 | /* Low byte */ |
1196 | AddOpLow (D, OP65_ADC, &D->Lhs); |
1197 | |
1198 | /* High byte */ |
1199 | if (D->PushEntry->RI->In.RegX == 0) { |
1200 | |
1201 | /* The high byte is the value in X plus the carry */ |
1202 | CodeLabel* L = CS_GenLabel (D->Code, D->NextEntry); |
1203 | |
1204 | /* bcc L */ |
1205 | X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI); |
1206 | InsertEntry (D, X, D->IP++); |
1207 | |
1208 | /* inx */ |
1209 | X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI); |
1210 | InsertEntry (D, X, D->IP++); |
1211 | |
1212 | } else if (D->OpEntry->RI->In.RegX == 0 && |
1213 | (RegValIsKnown (D->PushEntry->RI->In.RegX) || |
1214 | (D->Lhs.X.Flags & LI_RELOAD_Y) == 0)) { |
1215 | |
1216 | /* The high byte is that of the first operand plus carry */ |
1217 | CodeLabel* L; |
1218 | if (RegValIsKnown (D->PushEntry->RI->In.RegX)) { |
1219 | /* Value of first op high byte is known */ |
1220 | const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX); |
1221 | X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI); |
1222 | } else { |
1223 | /* Value of first op high byte is unknown. Load from ZP or |
1224 | ** original storage. |
1225 | */ |
1226 | if (D->Lhs.X.Flags & LI_DIRECT) { |
1227 | CodeEntry* LoadX = D->Lhs.X.LoadEntry; |
1228 | X = NewCodeEntry (OP65_LDX, LoadX->AM, LoadX->Arg, 0, D->OpEntry->LI); |
1229 | } else { |
1230 | X = NewCodeEntry (OP65_LDX, AM65_ZP, D->ZPHi, 0, D->OpEntry->LI); |
1231 | } |
1232 | } |
1233 | InsertEntry (D, X, D->IP++); |
1234 | |
1235 | /* bcc label */ |
1236 | L = CS_GenLabel (D->Code, D->NextEntry); |
1237 | X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, D->OpEntry->LI); |
1238 | InsertEntry (D, X, D->IP++); |
1239 | |
1240 | /* inx */ |
1241 | X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, D->OpEntry->LI); |
1242 | InsertEntry (D, X, D->IP++); |
1243 | } else { |
1244 | /* High byte is unknown */ |
1245 | AddOpHigh (D, OP65_ADC, &D->Lhs, 1); |
1246 | } |
1247 | } |
1248 | |
1249 | /* Remove the push and the call to the tosaddax function */ |
1250 | RemoveRemainders (D); |
1251 | |
1252 | /* We changed the sequence */ |
1253 | return 1; |
1254 | } |
1255 | |
1256 | |
1257 | |
1258 | static unsigned Opt_tosandax (StackOpData* D) |
1259 | /* Optimize the tosandax sequence */ |
1260 | { |
1261 | /* Store the value into the zeropage instead of pushing it */ |
1262 | ReplacePushByStore (D); |
1263 | |
1264 | /* Inline the and, low byte */ |
1265 | D->IP = D->OpIndex + 1; |
1266 | AddOpLow (D, OP65_AND, &D->Lhs); |
1267 | |
1268 | /* High byte */ |
1269 | AddOpHigh (D, OP65_AND, &D->Lhs, 1); |
1270 | |
1271 | /* Remove the push and the call to the tosandax function */ |
1272 | RemoveRemainders (D); |
1273 | |
1274 | /* We changed the sequence */ |
1275 | return 1; |
1276 | } |
1277 | |
1278 | |
1279 | |
1280 | static unsigned Opt_tosaslax (StackOpData* D) |
1281 | /* Optimize the tosaslax sequence */ |
1282 | { |
1283 | return Opt_tosshift (D, "aslaxy" ); |
1284 | } |
1285 | |
1286 | |
1287 | |
1288 | static unsigned Opt_tosasrax (StackOpData* D) |
1289 | /* Optimize the tosasrax sequence */ |
1290 | { |
1291 | return Opt_tosshift (D, "asraxy" ); |
1292 | } |
1293 | |
1294 | |
1295 | |
1296 | static unsigned Opt_toseqax (StackOpData* D) |
1297 | /* Optimize the toseqax sequence */ |
1298 | { |
1299 | return Opt_toseqax_tosneax (D, "booleq" ); |
1300 | } |
1301 | |
1302 | |
1303 | |
1304 | static unsigned Opt_tosgeax (StackOpData* D) |
1305 | /* Optimize the tosgeax sequence */ |
1306 | { |
1307 | CodeEntry* X; |
1308 | CodeLabel* L; |
1309 | |
1310 | /* Inline the sbc */ |
1311 | D->IP = D->OpIndex+1; |
1312 | |
1313 | /* Must be true because of OP_RHS_LOAD */ |
1314 | CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); |
1315 | |
1316 | /* Add code for low operand */ |
1317 | AddOpLow (D, OP65_CMP, &D->Rhs); |
1318 | |
1319 | /* Add code for high operand */ |
1320 | AddOpHigh (D, OP65_SBC, &D->Rhs, 0); |
1321 | |
1322 | /* eor #$80 */ |
1323 | X = NewCodeEntry (OP65_EOR, AM65_IMM, "$80" , 0, D->OpEntry->LI); |
1324 | InsertEntry (D, X, D->IP++); |
1325 | |
1326 | /* asl a */ |
1327 | X = NewCodeEntry (OP65_ASL, AM65_ACC, "a" , 0, D->OpEntry->LI); |
1328 | InsertEntry (D, X, D->IP++); |
1329 | L = CS_GenLabel (D->Code, X); |
1330 | |
1331 | /* Insert a bvs L before the eor insn */ |
1332 | X = NewCodeEntry (OP65_BVS, AM65_BRA, L->Name, L, D->OpEntry->LI); |
1333 | InsertEntry (D, X, D->IP - 2); |
1334 | ++D->IP; |
1335 | |
1336 | /* lda #$00 */ |
1337 | X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00" , 0, D->OpEntry->LI); |
1338 | InsertEntry (D, X, D->IP++); |
1339 | |
1340 | /* ldx #$00 */ |
1341 | X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00" , 0, D->OpEntry->LI); |
1342 | InsertEntry (D, X, D->IP++); |
1343 | |
1344 | /* rol a */ |
1345 | X = NewCodeEntry (OP65_ROL, AM65_ACC, "a" , 0, D->OpEntry->LI); |
1346 | InsertEntry (D, X, D->IP++); |
1347 | |
1348 | /* Remove the push and the call to the tosgeax function */ |
1349 | RemoveRemainders (D); |
1350 | |
1351 | /* We changed the sequence */ |
1352 | return 1; |
1353 | } |
1354 | |
1355 | |
1356 | |
1357 | static unsigned Opt_tosltax (StackOpData* D) |
1358 | /* Optimize the tosltax sequence */ |
1359 | { |
1360 | CodeEntry* X; |
1361 | CodeLabel* L; |
1362 | |
1363 | |
1364 | /* Inline the compare */ |
1365 | D->IP = D->OpIndex+1; |
1366 | |
1367 | /* Must be true because of OP_RHS_LOAD */ |
1368 | CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); |
1369 | |
1370 | /* Add code for low operand */ |
1371 | AddOpLow (D, OP65_CMP, &D->Rhs); |
1372 | |
1373 | /* Add code for high operand */ |
1374 | AddOpHigh (D, OP65_SBC, &D->Rhs, 0); |
1375 | |
1376 | /* eor #$80 */ |
1377 | X = NewCodeEntry (OP65_EOR, AM65_IMM, "$80" , 0, D->OpEntry->LI); |
1378 | InsertEntry (D, X, D->IP++); |
1379 | |
1380 | /* asl a */ |
1381 | X = NewCodeEntry (OP65_ASL, AM65_ACC, "a" , 0, D->OpEntry->LI); |
1382 | InsertEntry (D, X, D->IP++); |
1383 | L = CS_GenLabel (D->Code, X); |
1384 | |
1385 | /* Insert a bvc L before the eor insn */ |
1386 | X = NewCodeEntry (OP65_BVC, AM65_BRA, L->Name, L, D->OpEntry->LI); |
1387 | InsertEntry (D, X, D->IP - 2); |
1388 | ++D->IP; |
1389 | |
1390 | /* lda #$00 */ |
1391 | X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00" , 0, D->OpEntry->LI); |
1392 | InsertEntry (D, X, D->IP++); |
1393 | |
1394 | /* ldx #$00 */ |
1395 | X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00" , 0, D->OpEntry->LI); |
1396 | InsertEntry (D, X, D->IP++); |
1397 | |
1398 | /* rol a */ |
1399 | X = NewCodeEntry (OP65_ROL, AM65_ACC, "a" , 0, D->OpEntry->LI); |
1400 | InsertEntry (D, X, D->IP++); |
1401 | |
1402 | /* Remove the push and the call to the tosltax function */ |
1403 | RemoveRemainders (D); |
1404 | |
1405 | /* We changed the sequence */ |
1406 | return 1; |
1407 | } |
1408 | |
1409 | |
1410 | |
1411 | static unsigned Opt_tosneax (StackOpData* D) |
1412 | /* Optimize the tosneax sequence */ |
1413 | { |
1414 | return Opt_toseqax_tosneax (D, "boolne" ); |
1415 | } |
1416 | |
1417 | |
1418 | |
1419 | static unsigned Opt_tosorax (StackOpData* D) |
1420 | /* Optimize the tosorax sequence */ |
1421 | { |
1422 | /* Store the value into the zeropage instead of pushing it */ |
1423 | ReplacePushByStore (D); |
1424 | |
1425 | /* Inline the or, low byte */ |
1426 | D->IP = D->OpIndex + 1; |
1427 | AddOpLow (D, OP65_ORA, &D->Lhs); |
1428 | |
1429 | /* High byte */ |
1430 | AddOpHigh (D, OP65_ORA, &D->Lhs, 1); |
1431 | |
1432 | /* Remove the push and the call to the tosorax function */ |
1433 | RemoveRemainders (D); |
1434 | |
1435 | /* We changed the sequence */ |
1436 | return 1; |
1437 | } |
1438 | |
1439 | |
1440 | |
1441 | static unsigned Opt_tosshlax (StackOpData* D) |
1442 | /* Optimize the tosshlax sequence */ |
1443 | { |
1444 | return Opt_tosshift (D, "shlaxy" ); |
1445 | } |
1446 | |
1447 | |
1448 | |
1449 | static unsigned Opt_tosshrax (StackOpData* D) |
1450 | /* Optimize the tosshrax sequence */ |
1451 | { |
1452 | return Opt_tosshift (D, "shraxy" ); |
1453 | } |
1454 | |
1455 | |
1456 | |
1457 | static unsigned Opt_tossubax (StackOpData* D) |
1458 | /* Optimize the tossubax sequence. Note: subtraction is not commutative! */ |
1459 | { |
1460 | CodeEntry* X; |
1461 | |
1462 | |
1463 | /* Inline the sbc */ |
1464 | D->IP = D->OpIndex+1; |
1465 | |
1466 | /* sec */ |
1467 | X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI); |
1468 | InsertEntry (D, X, D->IP++); |
1469 | |
1470 | /* Must be true because of OP_RHS_LOAD */ |
1471 | CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); |
1472 | |
1473 | /* Add code for low operand */ |
1474 | AddOpLow (D, OP65_SBC, &D->Rhs); |
1475 | |
1476 | /* Add code for high operand */ |
1477 | AddOpHigh (D, OP65_SBC, &D->Rhs, 1); |
1478 | |
1479 | /* Remove the push and the call to the tossubax function */ |
1480 | RemoveRemainders (D); |
1481 | |
1482 | /* We changed the sequence */ |
1483 | return 1; |
1484 | } |
1485 | |
1486 | |
1487 | |
1488 | static unsigned Opt_tosugeax (StackOpData* D) |
1489 | /* Optimize the tosugeax sequence */ |
1490 | { |
1491 | CodeEntry* X; |
1492 | |
1493 | |
1494 | /* Inline the sbc */ |
1495 | D->IP = D->OpIndex+1; |
1496 | |
1497 | /* Must be true because of OP_RHS_LOAD */ |
1498 | CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); |
1499 | |
1500 | /* Add code for low operand */ |
1501 | AddOpLow (D, OP65_CMP, &D->Rhs); |
1502 | |
1503 | /* Add code for high operand */ |
1504 | AddOpHigh (D, OP65_SBC, &D->Rhs, 0); |
1505 | |
1506 | /* lda #$00 */ |
1507 | X = NewCodeEntry (OP65_LDA, AM65_IMM, "$00" , 0, D->OpEntry->LI); |
1508 | InsertEntry (D, X, D->IP++); |
1509 | |
1510 | /* ldx #$00 */ |
1511 | X = NewCodeEntry (OP65_LDX, AM65_IMM, "$00" , 0, D->OpEntry->LI); |
1512 | InsertEntry (D, X, D->IP++); |
1513 | |
1514 | /* rol a */ |
1515 | X = NewCodeEntry (OP65_ROL, AM65_ACC, "a" , 0, D->OpEntry->LI); |
1516 | InsertEntry (D, X, D->IP++); |
1517 | |
1518 | /* Remove the push and the call to the tosugeax function */ |
1519 | RemoveRemainders (D); |
1520 | |
1521 | /* We changed the sequence */ |
1522 | return 1; |
1523 | } |
1524 | |
1525 | |
1526 | |
1527 | static unsigned Opt_tosugtax (StackOpData* D) |
1528 | /* Optimize the tosugtax sequence */ |
1529 | { |
1530 | CodeEntry* X; |
1531 | |
1532 | |
1533 | /* Inline the sbc */ |
1534 | D->IP = D->OpIndex+1; |
1535 | |
1536 | /* Must be true because of OP_RHS_LOAD */ |
1537 | CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); |
1538 | |
1539 | /* sec */ |
1540 | X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI); |
1541 | InsertEntry (D, X, D->IP++); |
1542 | |
1543 | /* Add code for low operand */ |
1544 | AddOpLow (D, OP65_SBC, &D->Rhs); |
1545 | |
1546 | /* We need the zero flag, so remember the immediate result */ |
1547 | X = NewCodeEntry (OP65_STA, AM65_ZP, "tmp1" , 0, D->OpEntry->LI); |
1548 | InsertEntry (D, X, D->IP++); |
1549 | |
1550 | /* Add code for high operand */ |
1551 | AddOpHigh (D, OP65_SBC, &D->Rhs, 0); |
1552 | |
1553 | /* Set Z flag */ |
1554 | X = NewCodeEntry (OP65_ORA, AM65_ZP, "tmp1" , 0, D->OpEntry->LI); |
1555 | InsertEntry (D, X, D->IP++); |
1556 | |
1557 | /* Transform to boolean */ |
1558 | X = NewCodeEntry (OP65_JSR, AM65_ABS, "boolugt" , 0, D->OpEntry->LI); |
1559 | InsertEntry (D, X, D->IP++); |
1560 | |
1561 | /* Remove the push and the call to the operator function */ |
1562 | RemoveRemainders (D); |
1563 | |
1564 | /* We changed the sequence */ |
1565 | return 1; |
1566 | } |
1567 | |
1568 | |
1569 | |
1570 | static unsigned Opt_tosuleax (StackOpData* D) |
1571 | /* Optimize the tosuleax sequence */ |
1572 | { |
1573 | CodeEntry* X; |
1574 | |
1575 | |
1576 | /* Inline the sbc */ |
1577 | D->IP = D->OpIndex+1; |
1578 | |
1579 | /* Must be true because of OP_RHS_LOAD */ |
1580 | CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); |
1581 | |
1582 | /* sec */ |
1583 | X = NewCodeEntry (OP65_SEC, AM65_IMP, 0, 0, D->OpEntry->LI); |
1584 | InsertEntry (D, X, D->IP++); |
1585 | |
1586 | /* Add code for low operand */ |
1587 | AddOpLow (D, OP65_SBC, &D->Rhs); |
1588 | |
1589 | /* We need the zero flag, so remember the immediate result */ |
1590 | X = NewCodeEntry (OP65_STA, AM65_ZP, "tmp1" , 0, D->OpEntry->LI); |
1591 | InsertEntry (D, X, D->IP++); |
1592 | |
1593 | /* Add code for high operand */ |
1594 | AddOpHigh (D, OP65_SBC, &D->Rhs, 0); |
1595 | |
1596 | /* Set Z flag */ |
1597 | X = NewCodeEntry (OP65_ORA, AM65_ZP, "tmp1" , 0, D->OpEntry->LI); |
1598 | InsertEntry (D, X, D->IP++); |
1599 | |
1600 | /* Transform to boolean */ |
1601 | X = NewCodeEntry (OP65_JSR, AM65_ABS, "boolule" , 0, D->OpEntry->LI); |
1602 | InsertEntry (D, X, D->IP++); |
1603 | |
1604 | /* Remove the push and the call to the operator function */ |
1605 | RemoveRemainders (D); |
1606 | |
1607 | /* We changed the sequence */ |
1608 | return 1; |
1609 | } |
1610 | |
1611 | |
1612 | |
1613 | static unsigned Opt_tosultax (StackOpData* D) |
1614 | /* Optimize the tosultax sequence */ |
1615 | { |
1616 | CodeEntry* X; |
1617 | |
1618 | |
1619 | /* Inline the sbc */ |
1620 | D->IP = D->OpIndex+1; |
1621 | |
1622 | /* Must be true because of OP_RHS_LOAD */ |
1623 | CHECK ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) != 0); |
1624 | |
1625 | /* Add code for low operand */ |
1626 | AddOpLow (D, OP65_CMP, &D->Rhs); |
1627 | |
1628 | /* Add code for high operand */ |
1629 | AddOpHigh (D, OP65_SBC, &D->Rhs, 0); |
1630 | |
1631 | /* Transform to boolean */ |
1632 | X = NewCodeEntry (OP65_JSR, AM65_ABS, "boolult" , 0, D->OpEntry->LI); |
1633 | InsertEntry (D, X, D->IP++); |
1634 | |
1635 | /* Remove the push and the call to the operator function */ |
1636 | RemoveRemainders (D); |
1637 | |
1638 | /* We changed the sequence */ |
1639 | return 1; |
1640 | } |
1641 | |
1642 | |
1643 | |
1644 | static unsigned Opt_tosxorax (StackOpData* D) |
1645 | /* Optimize the tosxorax sequence */ |
1646 | { |
1647 | CodeEntry* X; |
1648 | |
1649 | |
1650 | /* Store the value into the zeropage instead of pushing it */ |
1651 | ReplacePushByStore (D); |
1652 | |
1653 | /* Inline the xor, low byte */ |
1654 | D->IP = D->OpIndex + 1; |
1655 | AddOpLow (D, OP65_EOR, &D->Lhs); |
1656 | |
1657 | /* High byte */ |
1658 | if (RegValIsKnown (D->PushEntry->RI->In.RegX) && |
1659 | RegValIsKnown (D->OpEntry->RI->In.RegX)) { |
1660 | /* Both values known, precalculate the result */ |
1661 | const char* Arg = MakeHexArg (D->PushEntry->RI->In.RegX ^ D->OpEntry->RI->In.RegX); |
1662 | X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, D->OpEntry->LI); |
1663 | InsertEntry (D, X, D->IP++); |
1664 | } else if (D->PushEntry->RI->In.RegX != 0) { |
1665 | /* High byte is unknown */ |
1666 | AddOpHigh (D, OP65_EOR, &D->Lhs, 1); |
1667 | } |
1668 | |
1669 | /* Remove the push and the call to the tosandax function */ |
1670 | RemoveRemainders (D); |
1671 | |
1672 | /* We changed the sequence */ |
1673 | return 1; |
1674 | } |
1675 | |
1676 | |
1677 | |
1678 | /*****************************************************************************/ |
1679 | /* Code */ |
1680 | /*****************************************************************************/ |
1681 | |
1682 | |
1683 | |
1684 | static const OptFuncDesc FuncTable[] = { |
1685 | { "__bzero" , Opt___bzero, REG_NONE, OP_X_ZERO | OP_A_KNOWN }, |
1686 | { "staspidx" , Opt_staspidx, REG_NONE, OP_NONE }, |
1687 | { "staxspidx" , Opt_staxspidx, REG_AX, OP_NONE }, |
1688 | { "tosaddax" , Opt_tosaddax, REG_NONE, OP_NONE }, |
1689 | { "tosandax" , Opt_tosandax, REG_NONE, OP_NONE }, |
1690 | { "tosaslax" , Opt_tosaslax, REG_NONE, OP_NONE }, |
1691 | { "tosasrax" , Opt_tosasrax, REG_NONE, OP_NONE }, |
1692 | { "toseqax" , Opt_toseqax, REG_NONE, OP_NONE }, |
1693 | { "tosgeax" , Opt_tosgeax, REG_NONE, OP_RHS_LOAD_DIRECT }, |
1694 | { "tosltax" , Opt_tosltax, REG_NONE, OP_RHS_LOAD_DIRECT }, |
1695 | { "tosneax" , Opt_tosneax, REG_NONE, OP_NONE }, |
1696 | { "tosorax" , Opt_tosorax, REG_NONE, OP_NONE }, |
1697 | { "tosshlax" , Opt_tosshlax, REG_NONE, OP_NONE }, |
1698 | { "tosshrax" , Opt_tosshrax, REG_NONE, OP_NONE }, |
1699 | { "tossubax" , Opt_tossubax, REG_NONE, OP_RHS_LOAD_DIRECT }, |
1700 | { "tosugeax" , Opt_tosugeax, REG_NONE, OP_RHS_LOAD_DIRECT }, |
1701 | { "tosugtax" , Opt_tosugtax, REG_NONE, OP_RHS_LOAD_DIRECT }, |
1702 | { "tosuleax" , Opt_tosuleax, REG_NONE, OP_RHS_LOAD_DIRECT }, |
1703 | { "tosultax" , Opt_tosultax, REG_NONE, OP_RHS_LOAD_DIRECT }, |
1704 | { "tosxorax" , Opt_tosxorax, REG_NONE, OP_NONE }, |
1705 | }; |
1706 | #define FUNC_COUNT (sizeof(FuncTable) / sizeof(FuncTable[0])) |
1707 | |
1708 | |
1709 | |
1710 | static int CmpFunc (const void* Key, const void* Func) |
1711 | /* Compare function for bsearch */ |
1712 | { |
1713 | return strcmp (Key, ((const OptFuncDesc*) Func)->Name); |
1714 | } |
1715 | |
1716 | |
1717 | |
1718 | static const OptFuncDesc* FindFunc (const char* Name) |
1719 | /* Find the function with the given name. Return a pointer to the table entry |
1720 | ** or NULL if the function was not found. |
1721 | */ |
1722 | { |
1723 | return bsearch (Name, FuncTable, FUNC_COUNT, sizeof(OptFuncDesc), CmpFunc); |
1724 | } |
1725 | |
1726 | |
1727 | |
1728 | static int CmpHarmless (const void* Key, const void* Entry) |
1729 | /* Compare function for bsearch */ |
1730 | { |
1731 | return strcmp (Key, *(const char**)Entry); |
1732 | } |
1733 | |
1734 | |
1735 | |
1736 | static int HarmlessCall (const char* Name) |
1737 | /* Check if this is a call to a harmless subroutine that will not interrupt |
1738 | ** the pushax/op sequence when encountered. |
1739 | */ |
1740 | { |
1741 | static const char* const Tab[] = { |
1742 | "aslax1" , |
1743 | "aslax2" , |
1744 | "aslax3" , |
1745 | "aslax4" , |
1746 | "aslaxy" , |
1747 | "asrax1" , |
1748 | "asrax2" , |
1749 | "asrax3" , |
1750 | "asrax4" , |
1751 | "asraxy" , |
1752 | "bnegax" , |
1753 | "complax" , |
1754 | "decax1" , |
1755 | "decax2" , |
1756 | "decax3" , |
1757 | "decax4" , |
1758 | "decax5" , |
1759 | "decax6" , |
1760 | "decax7" , |
1761 | "decax8" , |
1762 | "decaxy" , |
1763 | "incax1" , |
1764 | "incax2" , |
1765 | "incax3" , |
1766 | "incax4" , |
1767 | "incax5" , |
1768 | "incax6" , |
1769 | "incax7" , |
1770 | "incax8" , |
1771 | "incaxy" , |
1772 | "ldaxidx" , |
1773 | "ldaxysp" , |
1774 | "negax" , |
1775 | "shlax1" , |
1776 | "shlax2" , |
1777 | "shlax3" , |
1778 | "shlax4" , |
1779 | "shlaxy" , |
1780 | "shrax1" , |
1781 | "shrax2" , |
1782 | "shrax3" , |
1783 | "shrax4" , |
1784 | "shraxy" , |
1785 | }; |
1786 | |
1787 | void* R = bsearch (Name, |
1788 | Tab, |
1789 | sizeof (Tab) / sizeof (Tab[0]), |
1790 | sizeof (Tab[0]), |
1791 | CmpHarmless); |
1792 | return (R != 0); |
1793 | } |
1794 | |
1795 | |
1796 | |
1797 | static void ResetStackOpData (StackOpData* Data) |
1798 | /* Reset the given data structure */ |
1799 | { |
1800 | Data->OptFunc = 0; |
1801 | Data->ZPUsage = REG_NONE; |
1802 | |
1803 | ClearLoadInfo (&Data->Lhs); |
1804 | ClearLoadInfo (&Data->Rhs); |
1805 | |
1806 | Data->PushIndex = -1; |
1807 | Data->OpIndex = -1; |
1808 | } |
1809 | |
1810 | |
1811 | |
1812 | static int PreCondOk (StackOpData* D) |
1813 | /* Check if the preconditions for a call to the optimizer subfunction are |
1814 | ** satisfied. As a side effect, this function will also choose the zero page |
1815 | ** register to use. |
1816 | */ |
1817 | { |
1818 | /* Check the flags */ |
1819 | unsigned UnusedRegs = D->OptFunc->UnusedRegs; |
1820 | if (UnusedRegs != REG_NONE && |
1821 | (GetRegInfo (D->Code, D->OpIndex+1, UnusedRegs) & UnusedRegs) != 0) { |
1822 | /* Cannot optimize */ |
1823 | return 0; |
1824 | } |
1825 | if ((D->OptFunc->Flags & OP_A_KNOWN) != 0 && |
1826 | RegValIsUnknown (D->OpEntry->RI->In.RegA)) { |
1827 | /* Cannot optimize */ |
1828 | return 0; |
1829 | } |
1830 | if ((D->OptFunc->Flags & OP_X_ZERO) != 0 && |
1831 | D->OpEntry->RI->In.RegX != 0) { |
1832 | /* Cannot optimize */ |
1833 | return 0; |
1834 | } |
1835 | if ((D->OptFunc->Flags & OP_LHS_LOAD) != 0) { |
1836 | if (D->Lhs.A.LoadIndex < 0 || D->Lhs.X.LoadIndex < 0) { |
1837 | /* Cannot optimize */ |
1838 | return 0; |
1839 | } else if ((D->OptFunc->Flags & OP_LHS_LOAD_DIRECT) != 0) { |
1840 | if ((D->Lhs.A.Flags & D->Lhs.X.Flags & LI_DIRECT) == 0) { |
1841 | /* Cannot optimize */ |
1842 | return 0; |
1843 | } |
1844 | } |
1845 | } |
1846 | if ((D->OptFunc->Flags & OP_RHS_LOAD) != 0) { |
1847 | if (D->Rhs.A.LoadIndex < 0 || D->Rhs.X.LoadIndex < 0) { |
1848 | /* Cannot optimize */ |
1849 | return 0; |
1850 | } else if ((D->OptFunc->Flags & OP_RHS_LOAD_DIRECT) != 0) { |
1851 | if ((D->Rhs.A.Flags & D->Rhs.X.Flags & LI_DIRECT) == 0) { |
1852 | /* Cannot optimize */ |
1853 | return 0; |
1854 | } |
1855 | } |
1856 | } |
1857 | if ((D->Rhs.A.Flags | D->Rhs.X.Flags) & LI_DUP_LOAD) { |
1858 | /* Cannot optimize */ |
1859 | return 0; |
1860 | } |
1861 | |
1862 | /* Determine the zero page locations to use. We've tracked the used |
1863 | ** ZP locations, so try to find some for us that are unused. |
1864 | */ |
1865 | if ((D->ZPUsage & REG_PTR1) == REG_NONE) { |
1866 | D->ZPLo = "ptr1" ; |
1867 | D->ZPHi = "ptr1+1" ; |
1868 | } else if ((D->ZPUsage & REG_SREG) == REG_NONE) { |
1869 | D->ZPLo = "sreg" ; |
1870 | D->ZPHi = "sreg+1" ; |
1871 | } else if ((D->ZPUsage & REG_PTR2) == REG_NONE) { |
1872 | D->ZPLo = "ptr2" ; |
1873 | D->ZPHi = "ptr2+1" ; |
1874 | } else { |
1875 | /* No registers available */ |
1876 | return 0; |
1877 | } |
1878 | |
1879 | /* Determine if we have a basic block */ |
1880 | return CS_IsBasicBlock (D->Code, D->PushIndex, D->OpIndex); |
1881 | } |
1882 | |
1883 | |
1884 | |
1885 | /*****************************************************************************/ |
1886 | /* Code */ |
1887 | /*****************************************************************************/ |
1888 | |
1889 | |
1890 | |
1891 | unsigned OptStackOps (CodeSeg* S) |
1892 | /* Optimize operations that take operands via the stack */ |
1893 | { |
1894 | unsigned Changes = 0; /* Number of changes in one run */ |
1895 | StackOpData Data; |
1896 | int I; |
1897 | int OldEntryCount; /* Old number of entries */ |
1898 | unsigned UsedRegs = 0; /* Registers used */ |
1899 | unsigned ChangedRegs = 0;/* Registers changed */ |
1900 | |
1901 | |
1902 | enum { |
1903 | Initialize, |
1904 | Search, |
1905 | FoundPush, |
1906 | FoundOp |
1907 | } State = Initialize; |
1908 | |
1909 | |
1910 | /* Remember the code segment in the info struct */ |
1911 | Data.Code = S; |
1912 | |
1913 | /* Look for a call to pushax followed by a call to some other function |
1914 | ** that takes it's first argument on the stack, and the second argument |
1915 | ** in the primary register. |
1916 | ** It depends on the code between the two if we can handle/transform the |
1917 | ** sequence, so check this code for the following list of things: |
1918 | ** |
1919 | ** - the range must be a basic block (one entry, one exit) |
1920 | ** - there may not be accesses to local variables with unknown |
1921 | ** offsets (because we have to adjust these offsets). |
1922 | ** - no subroutine calls |
1923 | ** - no jump labels |
1924 | ** |
1925 | ** Since we need a zero page register later, do also check the |
1926 | ** intermediate code for zero page use. |
1927 | */ |
1928 | I = 0; |
1929 | while (I < (int)CS_GetEntryCount (S)) { |
1930 | |
1931 | /* Get the next entry */ |
1932 | CodeEntry* E = CS_GetEntry (S, I); |
1933 | |
1934 | /* Actions depend on state */ |
1935 | switch (State) { |
1936 | |
1937 | case Initialize: |
1938 | ResetStackOpData (&Data); |
1939 | UsedRegs = ChangedRegs = REG_NONE; |
1940 | State = Search; |
1941 | /* FALLTHROUGH */ |
1942 | |
1943 | case Search: |
1944 | /* While searching, track register load insns, so we can tell |
1945 | ** what is in a register once pushax is encountered. |
1946 | */ |
1947 | if (CE_HasLabel (E)) { |
1948 | /* Currently we don't track across branches */ |
1949 | ClearLoadInfo (&Data.Lhs); |
1950 | } |
1951 | if (CE_IsCallTo (E, "pushax" )) { |
1952 | Data.PushIndex = I; |
1953 | State = FoundPush; |
1954 | } else { |
1955 | /* Track load insns */ |
1956 | TrackLoads (&Data.Lhs, E, I); |
1957 | } |
1958 | break; |
1959 | |
1960 | case FoundPush: |
1961 | /* We' found a pushax before. Search for a stack op that may |
1962 | ** follow and in the meantime, track zeropage usage and check |
1963 | ** for code that will disable us from translating the sequence. |
1964 | */ |
1965 | if (CE_HasLabel (E)) { |
1966 | /* Currently we don't track across branches */ |
1967 | ClearLoadInfo (&Data.Rhs); |
1968 | } |
1969 | if (E->OPC == OP65_JSR) { |
1970 | |
1971 | /* Subroutine call: Check if this is one of the functions, |
1972 | ** we're going to replace. |
1973 | */ |
1974 | Data.OptFunc = FindFunc (E->Arg); |
1975 | if (Data.OptFunc) { |
1976 | /* Remember the op index and go on */ |
1977 | Data.OpIndex = I; |
1978 | Data.OpEntry = E; |
1979 | State = FoundOp; |
1980 | break; |
1981 | } else if (!HarmlessCall (E->Arg)) { |
1982 | /* A call to an unkown subroutine: We need to start |
1983 | ** over after the last pushax. Note: This will also |
1984 | ** happen if we encounter a call to pushax! |
1985 | */ |
1986 | I = Data.PushIndex; |
1987 | State = Initialize; |
1988 | break; |
1989 | } else { |
1990 | /* Track register usage */ |
1991 | Data.ZPUsage |= (E->Use | E->Chg); |
1992 | TrackLoads (&Data.Rhs, E, I); |
1993 | } |
1994 | |
1995 | } else if (E->Info & OF_STORE && (E->Chg & REG_ZP) == 0) { |
1996 | |
1997 | /* Too dangerous - there may be a change of a variable |
1998 | ** within the sequence. |
1999 | */ |
2000 | I = Data.PushIndex; |
2001 | State = Initialize; |
2002 | break; |
2003 | |
2004 | } else if ((E->Use & REG_SP) != 0 && |
2005 | (E->AM != AM65_ZP_INDY || |
2006 | RegValIsUnknown (E->RI->In.RegY) || |
2007 | E->RI->In.RegY < 2)) { |
2008 | |
2009 | /* If we are using the stack, and we don't have "indirect Y" |
2010 | ** addressing mode, or the value of Y is unknown, or less |
2011 | ** than two, we cannot cope with this piece of code. Having |
2012 | ** an unknown value of Y means that we cannot correct the |
2013 | ** stack offset, while having an offset less than two means |
2014 | ** that the code works with the value on stack which is to |
2015 | ** be removed. |
2016 | */ |
2017 | I = Data.PushIndex; |
2018 | State = Initialize; |
2019 | break; |
2020 | |
2021 | } else { |
2022 | /* Other stuff: Track register usage */ |
2023 | Data.ZPUsage |= (E->Use | E->Chg); |
2024 | TrackLoads (&Data.Rhs, E, I); |
2025 | } |
2026 | /* If the registers from the push (A/X) are used before they're |
2027 | ** changed, we cannot change the sequence, because this would |
2028 | ** with a high probability change the register contents. |
2029 | */ |
2030 | UsedRegs |= E->Use; |
2031 | if ((UsedRegs & ~ChangedRegs) & REG_AX) { |
2032 | I = Data.PushIndex; |
2033 | State = Initialize; |
2034 | break; |
2035 | } |
2036 | ChangedRegs |= E->Chg; |
2037 | break; |
2038 | |
2039 | case FoundOp: |
2040 | /* Track zero page location usage beyond this point */ |
2041 | Data.ZPUsage |= GetRegInfo (S, I, REG_SREG | REG_PTR1 | REG_PTR2); |
2042 | |
2043 | /* Finalize the load info */ |
2044 | FinalizeLoadInfo (&Data.Lhs, S); |
2045 | FinalizeLoadInfo (&Data.Rhs, S); |
2046 | |
2047 | /* Check if the lhs loads from zeropage. If this is true, these |
2048 | ** zero page locations have to be added to ZPUsage, because |
2049 | ** they cannot be used for intermediate storage. In addition, |
2050 | ** if one of these zero page locations is destroyed between |
2051 | ** pushing the lhs and the actual operation, we cannot use the |
2052 | ** original zero page locations for the final op, but must |
2053 | ** use another ZP location to save them. |
2054 | */ |
2055 | ChangedRegs &= REG_ZP; |
2056 | if (Data.Lhs.A.LoadEntry && Data.Lhs.A.LoadEntry->AM == AM65_ZP) { |
2057 | Data.ZPUsage |= Data.Lhs.A.LoadEntry->Use; |
2058 | if ((Data.Lhs.A.LoadEntry->Use & ChangedRegs) != 0) { |
2059 | Data.Lhs.A.Flags &= ~(LI_DIRECT | LI_RELOAD_Y); |
2060 | } |
2061 | } |
2062 | if (Data.Lhs.X.LoadEntry && Data.Lhs.X.LoadEntry->AM == AM65_ZP) { |
2063 | Data.ZPUsage |= Data.Lhs.X.LoadEntry->Use; |
2064 | if ((Data.Lhs.X.LoadEntry->Use & ChangedRegs) != 0) { |
2065 | Data.Lhs.X.Flags &= ~(LI_DIRECT | LI_RELOAD_Y); |
2066 | } |
2067 | } |
2068 | |
2069 | /* Check the preconditions. If they aren't ok, reset the insn |
2070 | ** pointer to the pushax and start over. We will loose part of |
2071 | ** load tracking but at least a/x has probably lost between |
2072 | ** pushax and here and will be tracked again when restarting. |
2073 | */ |
2074 | if (!PreCondOk (&Data)) { |
2075 | I = Data.PushIndex; |
2076 | State = Initialize; |
2077 | break; |
2078 | } |
2079 | |
2080 | /* Prepare the remainder of the data structure. */ |
2081 | Data.PrevEntry = CS_GetPrevEntry (S, Data.PushIndex); |
2082 | Data.PushEntry = CS_GetEntry (S, Data.PushIndex); |
2083 | Data.OpEntry = CS_GetEntry (S, Data.OpIndex); |
2084 | Data.NextEntry = CS_GetNextEntry (S, Data.OpIndex); |
2085 | |
2086 | /* Remember the current number of code lines */ |
2087 | OldEntryCount = CS_GetEntryCount (S); |
2088 | |
2089 | /* Adjust stack offsets to account for the upcoming removal */ |
2090 | AdjustStackOffset (&Data, 2); |
2091 | |
2092 | /* Regenerate register info, since AdjustStackOffset changed |
2093 | ** the code |
2094 | */ |
2095 | CS_GenRegInfo (S); |
2096 | |
2097 | /* Call the optimizer function */ |
2098 | Changes += Data.OptFunc->Func (&Data); |
2099 | |
2100 | /* Since the function may have added or deleted entries, |
2101 | ** correct the index. |
2102 | */ |
2103 | I += CS_GetEntryCount (S) - OldEntryCount; |
2104 | |
2105 | /* Regenerate register info */ |
2106 | CS_GenRegInfo (S); |
2107 | |
2108 | /* Done */ |
2109 | State = Initialize; |
2110 | continue; |
2111 | |
2112 | } |
2113 | |
2114 | /* Next entry */ |
2115 | ++I; |
2116 | |
2117 | } |
2118 | |
2119 | /* Return the number of changes made */ |
2120 | return Changes; |
2121 | } |
2122 | |