1 | /*****************************************************************************/ |
2 | /* */ |
3 | /* codeopt.c */ |
4 | /* */ |
5 | /* Optimizer subroutines */ |
6 | /* */ |
7 | /* */ |
8 | /* */ |
9 | /* (C) 2001-2012, Ullrich von Bassewitz */ |
10 | /* Roemerstrasse 52 */ |
11 | /* D-70794 Filderstadt */ |
12 | /* EMail: uz@cc65.org */ |
13 | /* */ |
14 | /* */ |
15 | /* This software is provided 'as-is', without any expressed or implied */ |
16 | /* warranty. In no event will the authors be held liable for any damages */ |
17 | /* arising from the use of this software. */ |
18 | /* */ |
19 | /* Permission is granted to anyone to use this software for any purpose, */ |
20 | /* including commercial applications, and to alter it and redistribute it */ |
21 | /* freely, subject to the following restrictions: */ |
22 | /* */ |
23 | /* 1. The origin of this software must not be misrepresented; you must not */ |
24 | /* claim that you wrote the original software. If you use this software */ |
25 | /* in a product, an acknowledgment in the product documentation would be */ |
26 | /* appreciated but is not required. */ |
27 | /* 2. Altered source versions must be plainly marked as such, and must not */ |
28 | /* be misrepresented as being the original software. */ |
29 | /* 3. This notice may not be removed or altered from any source */ |
30 | /* distribution. */ |
31 | /* */ |
32 | /*****************************************************************************/ |
33 | |
34 | |
35 | |
36 | #include <stdlib.h> |
37 | #include <string.h> |
38 | #include <stdarg.h> |
39 | |
40 | /* common */ |
41 | #include "abend.h" |
42 | #include "chartype.h" |
43 | #include "cpu.h" |
44 | #include "debugflag.h" |
45 | #include "print.h" |
46 | #include "strbuf.h" |
47 | #include "xmalloc.h" |
48 | #include "xsprintf.h" |
49 | |
50 | /* cc65 */ |
51 | #include "asmlabel.h" |
52 | #include "codeent.h" |
53 | #include "codeinfo.h" |
54 | #include "codeopt.h" |
55 | #include "coptadd.h" |
56 | #include "coptc02.h" |
57 | #include "coptcmp.h" |
58 | #include "coptind.h" |
59 | #include "coptneg.h" |
60 | #include "coptptrload.h" |
61 | #include "coptptrstore.h" |
62 | #include "coptpush.h" |
63 | #include "coptshift.h" |
64 | #include "coptsize.h" |
65 | #include "coptstop.h" |
66 | #include "coptstore.h" |
67 | #include "coptsub.h" |
68 | #include "copttest.h" |
69 | #include "error.h" |
70 | #include "global.h" |
71 | #include "output.h" |
72 | #include "symtab.h" |
73 | |
74 | |
75 | /*****************************************************************************/ |
76 | /* Optimize loads */ |
77 | /*****************************************************************************/ |
78 | |
79 | |
80 | |
81 | static unsigned OptLoad1 (CodeSeg* S) |
82 | /* Search for a call to ldaxysp where X is not used later and replace it by |
83 | ** a load of just the A register. |
84 | */ |
85 | { |
86 | unsigned I; |
87 | unsigned Changes = 0; |
88 | |
89 | /* Walk over the entries */ |
90 | I = 0; |
91 | while (I < CS_GetEntryCount (S)) { |
92 | |
93 | CodeEntry* E; |
94 | |
95 | /* Get next entry */ |
96 | E = CS_GetEntry (S, I); |
97 | |
98 | /* Check for the sequence */ |
99 | if (CE_IsCallTo (E, "ldaxysp" ) && |
100 | RegValIsKnown (E->RI->In.RegY) && |
101 | !RegXUsed (S, I+1)) { |
102 | |
103 | CodeEntry* X; |
104 | |
105 | /* Reload the Y register */ |
106 | const char* Arg = MakeHexArg (E->RI->In.RegY - 1); |
107 | X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); |
108 | CS_InsertEntry (S, X, I+1); |
109 | |
110 | /* Load from stack */ |
111 | X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "sp" , 0, E->LI); |
112 | CS_InsertEntry (S, X, I+2); |
113 | |
114 | /* Now remove the call to the subroutine */ |
115 | CS_DelEntry (S, I); |
116 | |
117 | /* Remember, we had changes */ |
118 | ++Changes; |
119 | |
120 | } |
121 | |
122 | /* Next entry */ |
123 | ++I; |
124 | |
125 | } |
126 | |
127 | /* Return the number of changes made */ |
128 | return Changes; |
129 | } |
130 | |
131 | |
132 | |
133 | static unsigned OptLoad2 (CodeSeg* S) |
134 | /* Replace calls to ldaxysp by inline code */ |
135 | { |
136 | unsigned I; |
137 | unsigned Changes = 0; |
138 | |
139 | /* Walk over the entries */ |
140 | I = 0; |
141 | while (I < CS_GetEntryCount (S)) { |
142 | |
143 | CodeEntry* L[3]; |
144 | |
145 | /* Get next entry */ |
146 | L[0] = CS_GetEntry (S, I); |
147 | |
148 | /* Check for the sequence */ |
149 | if (CE_IsCallTo (L[0], "ldaxysp" )) { |
150 | |
151 | CodeEntry* X; |
152 | |
153 | /* Followed by sta abs/stx abs? */ |
154 | if (CS_GetEntries (S, L+1, I+1, 2) && |
155 | L[1]->OPC == OP65_STA && |
156 | L[2]->OPC == OP65_STX && |
157 | (L[1]->Arg == 0 || |
158 | L[2]->Arg == 0 || |
159 | strcmp (L[1]->Arg, L[2]->Arg) != 0) && |
160 | !CS_RangeHasLabel (S, I+1, 2) && |
161 | !RegXUsed (S, I+3)) { |
162 | |
163 | /* A/X are stored into memory somewhere and X is not used |
164 | ** later |
165 | */ |
166 | |
167 | /* lda (sp),y */ |
168 | X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "sp" , 0, L[0]->LI); |
169 | CS_InsertEntry (S, X, I+3); |
170 | |
171 | /* sta abs */ |
172 | X = NewCodeEntry (OP65_STA, L[2]->AM, L[2]->Arg, 0, L[2]->LI); |
173 | CS_InsertEntry (S, X, I+4); |
174 | |
175 | /* dey */ |
176 | X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, L[0]->LI); |
177 | CS_InsertEntry (S, X, I+5); |
178 | |
179 | /* lda (sp),y */ |
180 | X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "sp" , 0, L[0]->LI); |
181 | CS_InsertEntry (S, X, I+6); |
182 | |
183 | /* sta abs */ |
184 | X = NewCodeEntry (OP65_STA, L[1]->AM, L[1]->Arg, 0, L[1]->LI); |
185 | CS_InsertEntry (S, X, I+7); |
186 | |
187 | /* Now remove the call to the subroutine and the sta/stx */ |
188 | CS_DelEntries (S, I, 3); |
189 | |
190 | } else { |
191 | |
192 | /* Standard replacement */ |
193 | |
194 | /* lda (sp),y */ |
195 | X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "sp" , 0, L[0]->LI); |
196 | CS_InsertEntry (S, X, I+1); |
197 | |
198 | /* tax */ |
199 | X = NewCodeEntry (OP65_TAX, AM65_IMP, 0, 0, L[0]->LI); |
200 | CS_InsertEntry (S, X, I+2); |
201 | |
202 | /* dey */ |
203 | X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, L[0]->LI); |
204 | CS_InsertEntry (S, X, I+3); |
205 | |
206 | /* lda (sp),y */ |
207 | X = NewCodeEntry (OP65_LDA, AM65_ZP_INDY, "sp" , 0, L[0]->LI); |
208 | CS_InsertEntry (S, X, I+4); |
209 | |
210 | /* Now remove the call to the subroutine */ |
211 | CS_DelEntry (S, I); |
212 | |
213 | } |
214 | |
215 | /* Remember, we had changes */ |
216 | ++Changes; |
217 | |
218 | } |
219 | |
220 | /* Next entry */ |
221 | ++I; |
222 | } |
223 | |
224 | /* Return the number of changes made */ |
225 | return Changes; |
226 | } |
227 | |
228 | |
229 | |
230 | static unsigned OptLoad3 (CodeSeg* S) |
231 | /* Remove repeated loads from one and the same memory location */ |
232 | { |
233 | unsigned Changes = 0; |
234 | CodeEntry* Load = 0; |
235 | |
236 | /* Walk over the entries */ |
237 | unsigned I = 0; |
238 | while (I < CS_GetEntryCount (S)) { |
239 | |
240 | /* Get next entry */ |
241 | CodeEntry* E = CS_GetEntry (S, I); |
242 | |
243 | /* Forget a preceeding load if we have a label */ |
244 | if (Load && CE_HasLabel (E)) { |
245 | Load = 0; |
246 | } |
247 | |
248 | /* Check if this insn is a load */ |
249 | if (E->Info & OF_LOAD) { |
250 | |
251 | CodeEntry* N; |
252 | |
253 | /* If we had a preceeding load that is identical, remove this one. |
254 | ** If it is not identical, or we didn't have one, remember it. |
255 | */ |
256 | if (Load != 0 && |
257 | E->OPC == Load->OPC && |
258 | E->AM == Load->AM && |
259 | ((E->Arg == 0 && Load->Arg == 0) || |
260 | strcmp (E->Arg, Load->Arg) == 0) && |
261 | (N = CS_GetNextEntry (S, I)) != 0 && |
262 | (N->Info & OF_CBRA) == 0) { |
263 | |
264 | /* Now remove the call to the subroutine */ |
265 | CS_DelEntry (S, I); |
266 | |
267 | /* Remember, we had changes */ |
268 | ++Changes; |
269 | |
270 | /* Next insn */ |
271 | continue; |
272 | |
273 | } else { |
274 | |
275 | Load = E; |
276 | |
277 | } |
278 | |
279 | } else if ((E->Info & OF_CMP) == 0 && (E->Info & OF_CBRA) == 0) { |
280 | /* Forget the first load on occurance of any insn we don't like */ |
281 | Load = 0; |
282 | } |
283 | |
284 | /* Next entry */ |
285 | ++I; |
286 | } |
287 | |
288 | /* Return the number of changes made */ |
289 | return Changes; |
290 | } |
291 | |
292 | |
293 | |
294 | /*****************************************************************************/ |
295 | /* Decouple operations */ |
296 | /*****************************************************************************/ |
297 | |
298 | |
299 | |
300 | static unsigned OptDecouple (CodeSeg* S) |
301 | /* Decouple operations, that is, do the following replacements: |
302 | ** |
303 | ** dex -> ldx #imm |
304 | ** inx -> ldx #imm |
305 | ** dey -> ldy #imm |
306 | ** iny -> ldy #imm |
307 | ** tax -> ldx #imm |
308 | ** txa -> lda #imm |
309 | ** tay -> ldy #imm |
310 | ** tya -> lda #imm |
311 | ** lda zp -> lda #imm |
312 | ** ldx zp -> ldx #imm |
313 | ** ldy zp -> ldy #imm |
314 | ** |
315 | ** Provided that the register values are known of course. |
316 | */ |
317 | { |
318 | unsigned Changes = 0; |
319 | unsigned I; |
320 | |
321 | /* Walk over the entries */ |
322 | I = 0; |
323 | while (I < CS_GetEntryCount (S)) { |
324 | |
325 | const char* Arg; |
326 | |
327 | /* Get next entry and it's input register values */ |
328 | CodeEntry* E = CS_GetEntry (S, I); |
329 | const RegContents* In = &E->RI->In; |
330 | |
331 | /* Assume we have no replacement */ |
332 | CodeEntry* X = 0; |
333 | |
334 | /* Check the instruction */ |
335 | switch (E->OPC) { |
336 | |
337 | case OP65_DEA: |
338 | if (RegValIsKnown (In->RegA)) { |
339 | Arg = MakeHexArg ((In->RegA - 1) & 0xFF); |
340 | X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); |
341 | } |
342 | break; |
343 | |
344 | case OP65_DEX: |
345 | if (RegValIsKnown (In->RegX)) { |
346 | Arg = MakeHexArg ((In->RegX - 1) & 0xFF); |
347 | X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); |
348 | } |
349 | break; |
350 | |
351 | case OP65_DEY: |
352 | if (RegValIsKnown (In->RegY)) { |
353 | Arg = MakeHexArg ((In->RegY - 1) & 0xFF); |
354 | X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); |
355 | } |
356 | break; |
357 | |
358 | case OP65_INA: |
359 | if (RegValIsKnown (In->RegA)) { |
360 | Arg = MakeHexArg ((In->RegA + 1) & 0xFF); |
361 | X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); |
362 | } |
363 | break; |
364 | |
365 | case OP65_INX: |
366 | if (RegValIsKnown (In->RegX)) { |
367 | Arg = MakeHexArg ((In->RegX + 1) & 0xFF); |
368 | X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); |
369 | } |
370 | break; |
371 | |
372 | case OP65_INY: |
373 | if (RegValIsKnown (In->RegY)) { |
374 | Arg = MakeHexArg ((In->RegY + 1) & 0xFF); |
375 | X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); |
376 | } |
377 | break; |
378 | |
379 | case OP65_LDA: |
380 | if (E->AM == AM65_ZP) { |
381 | switch (GetKnownReg (E->Use & REG_ZP, In)) { |
382 | case REG_TMP1: |
383 | Arg = MakeHexArg (In->Tmp1); |
384 | X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); |
385 | break; |
386 | |
387 | case REG_PTR1_LO: |
388 | Arg = MakeHexArg (In->Ptr1Lo); |
389 | X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); |
390 | break; |
391 | |
392 | case REG_PTR1_HI: |
393 | Arg = MakeHexArg (In->Ptr1Hi); |
394 | X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); |
395 | break; |
396 | |
397 | case REG_SREG_LO: |
398 | Arg = MakeHexArg (In->SRegLo); |
399 | X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); |
400 | break; |
401 | |
402 | case REG_SREG_HI: |
403 | Arg = MakeHexArg (In->SRegHi); |
404 | X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); |
405 | break; |
406 | } |
407 | } |
408 | break; |
409 | |
410 | case OP65_LDX: |
411 | if (E->AM == AM65_ZP) { |
412 | switch (GetKnownReg (E->Use & REG_ZP, In)) { |
413 | case REG_TMP1: |
414 | Arg = MakeHexArg (In->Tmp1); |
415 | X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); |
416 | break; |
417 | |
418 | case REG_PTR1_LO: |
419 | Arg = MakeHexArg (In->Ptr1Lo); |
420 | X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); |
421 | break; |
422 | |
423 | case REG_PTR1_HI: |
424 | Arg = MakeHexArg (In->Ptr1Hi); |
425 | X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); |
426 | break; |
427 | |
428 | case REG_SREG_LO: |
429 | Arg = MakeHexArg (In->SRegLo); |
430 | X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); |
431 | break; |
432 | |
433 | case REG_SREG_HI: |
434 | Arg = MakeHexArg (In->SRegHi); |
435 | X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); |
436 | break; |
437 | } |
438 | } |
439 | break; |
440 | |
441 | case OP65_LDY: |
442 | if (E->AM == AM65_ZP) { |
443 | switch (GetKnownReg (E->Use, In)) { |
444 | case REG_TMP1: |
445 | Arg = MakeHexArg (In->Tmp1); |
446 | X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); |
447 | break; |
448 | |
449 | case REG_PTR1_LO: |
450 | Arg = MakeHexArg (In->Ptr1Lo); |
451 | X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); |
452 | break; |
453 | |
454 | case REG_PTR1_HI: |
455 | Arg = MakeHexArg (In->Ptr1Hi); |
456 | X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); |
457 | break; |
458 | |
459 | case REG_SREG_LO: |
460 | Arg = MakeHexArg (In->SRegLo); |
461 | X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); |
462 | break; |
463 | |
464 | case REG_SREG_HI: |
465 | Arg = MakeHexArg (In->SRegHi); |
466 | X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); |
467 | break; |
468 | } |
469 | } |
470 | break; |
471 | |
472 | case OP65_TAX: |
473 | if (E->RI->In.RegA >= 0) { |
474 | Arg = MakeHexArg (In->RegA); |
475 | X = NewCodeEntry (OP65_LDX, AM65_IMM, Arg, 0, E->LI); |
476 | } |
477 | break; |
478 | |
479 | case OP65_TAY: |
480 | if (E->RI->In.RegA >= 0) { |
481 | Arg = MakeHexArg (In->RegA); |
482 | X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, E->LI); |
483 | } |
484 | break; |
485 | |
486 | case OP65_TXA: |
487 | if (E->RI->In.RegX >= 0) { |
488 | Arg = MakeHexArg (In->RegX); |
489 | X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); |
490 | } |
491 | break; |
492 | |
493 | case OP65_TYA: |
494 | if (E->RI->In.RegY >= 0) { |
495 | Arg = MakeHexArg (In->RegY); |
496 | X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, E->LI); |
497 | } |
498 | break; |
499 | |
500 | default: |
501 | /* Avoid gcc warnings */ |
502 | break; |
503 | |
504 | } |
505 | |
506 | /* Insert the replacement if we have one */ |
507 | if (X) { |
508 | CS_InsertEntry (S, X, I+1); |
509 | CS_DelEntry (S, I); |
510 | ++Changes; |
511 | } |
512 | |
513 | /* Next entry */ |
514 | ++I; |
515 | |
516 | } |
517 | |
518 | /* Return the number of changes made */ |
519 | return Changes; |
520 | } |
521 | |
522 | |
523 | |
524 | /*****************************************************************************/ |
525 | /* Optimize stack pointer ops */ |
526 | /*****************************************************************************/ |
527 | |
528 | |
529 | |
530 | static unsigned IsDecSP (const CodeEntry* E) |
531 | /* Check if this is an insn that decrements the stack pointer. If so, return |
532 | ** the decrement. If not, return zero. |
533 | ** The function expects E to be a subroutine call. |
534 | */ |
535 | { |
536 | if (strncmp (E->Arg, "decsp" , 5) == 0) { |
537 | if (E->Arg[5] >= '1' && E->Arg[5] <= '8') { |
538 | return (E->Arg[5] - '0'); |
539 | } |
540 | } else if (strcmp (E->Arg, "subysp" ) == 0 && RegValIsKnown (E->RI->In.RegY)) { |
541 | return E->RI->In.RegY; |
542 | } |
543 | |
544 | /* If we come here, it's not a decsp op */ |
545 | return 0; |
546 | } |
547 | |
548 | |
549 | |
550 | static unsigned OptStackPtrOps (CodeSeg* S) |
551 | /* Merge adjacent calls to decsp into one. NOTE: This function won't merge all |
552 | ** known cases! |
553 | */ |
554 | { |
555 | unsigned Changes = 0; |
556 | unsigned I; |
557 | |
558 | /* Walk over the entries */ |
559 | I = 0; |
560 | while (I < CS_GetEntryCount (S)) { |
561 | |
562 | unsigned Dec1; |
563 | unsigned Dec2; |
564 | const CodeEntry* N; |
565 | |
566 | /* Get the next entry */ |
567 | const CodeEntry* E = CS_GetEntry (S, I); |
568 | |
569 | /* Check for decspn or subysp */ |
570 | if (E->OPC == OP65_JSR && |
571 | (Dec1 = IsDecSP (E)) > 0 && |
572 | (N = CS_GetNextEntry (S, I)) != 0 && |
573 | (Dec2 = IsDecSP (N)) > 0 && |
574 | (Dec1 += Dec2) <= 255 && |
575 | !CE_HasLabel (N)) { |
576 | |
577 | CodeEntry* X; |
578 | char Buf[20]; |
579 | |
580 | /* We can combine the two */ |
581 | if (Dec1 <= 8) { |
582 | /* Insert a call to decsp */ |
583 | xsprintf (Buf, sizeof (Buf), "decsp%u" , Dec1); |
584 | X = NewCodeEntry (OP65_JSR, AM65_ABS, Buf, 0, N->LI); |
585 | CS_InsertEntry (S, X, I+2); |
586 | } else { |
587 | /* Insert a call to subysp */ |
588 | const char* Arg = MakeHexArg (Dec1); |
589 | X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, N->LI); |
590 | CS_InsertEntry (S, X, I+2); |
591 | X = NewCodeEntry (OP65_JSR, AM65_ABS, "subysp" , 0, N->LI); |
592 | CS_InsertEntry (S, X, I+3); |
593 | } |
594 | |
595 | /* Delete the old code */ |
596 | CS_DelEntries (S, I, 2); |
597 | |
598 | /* Regenerate register info */ |
599 | CS_GenRegInfo (S); |
600 | |
601 | /* Remember we had changes */ |
602 | ++Changes; |
603 | |
604 | } else { |
605 | |
606 | /* Next entry */ |
607 | ++I; |
608 | } |
609 | |
610 | } |
611 | |
612 | /* Return the number of changes made */ |
613 | return Changes; |
614 | } |
615 | |
616 | static unsigned OptGotoSPAdj (CodeSeg* S) |
617 | /* Optimize SP adjustment for forward 'goto' */ |
618 | { |
619 | unsigned Changes = 0; |
620 | unsigned I; |
621 | |
622 | /* Walk over the entries */ |
623 | I = 0; |
624 | while (I < CS_GetEntryCount (S)) { |
625 | |
626 | CodeEntry* L[10], *X; |
627 | unsigned short adjustment; |
628 | const char* Arg; |
629 | |
630 | /* Get next entry */ |
631 | L[0] = CS_GetEntry (S, I); |
632 | |
633 | /* Check for the sequence generated by g_lateadjustSP */ |
634 | if (L[0]->OPC == OP65_PHA && |
635 | CS_GetEntries (S, L+1, I+1, 9) && |
636 | L[1]->OPC == OP65_LDA && |
637 | L[1]->AM == AM65_ABS && |
638 | L[2]->OPC == OP65_CLC && |
639 | L[3]->OPC == OP65_ADC && |
640 | strcmp (L[3]->Arg, "sp" ) == 0 && |
641 | L[6]->OPC == OP65_ADC && |
642 | strcmp (L[6]->Arg, "sp+1" ) == 0 && |
643 | L[9]->OPC == OP65_JMP) { |
644 | adjustment = FindSPAdjustment (L[1]->Arg); |
645 | |
646 | if (adjustment == 0) { |
647 | /* No SP adjustment needed, remove the whole sequence */ |
648 | CS_DelEntries (S, I, 9); |
649 | } |
650 | else if (adjustment >= 65536 - 8) { |
651 | /* If adjustment is in range [-8, 0) we use decsp* calls */ |
652 | char Buf[20]; |
653 | adjustment = 65536 - adjustment; |
654 | xsprintf (Buf, sizeof (Buf), "decsp%u" , adjustment); |
655 | X = NewCodeEntry (OP65_JSR, AM65_ABS, Buf, 0, L[1]->LI); |
656 | CS_InsertEntry (S, X, I + 9); |
657 | |
658 | /* Delete the old code */ |
659 | CS_DelEntries (S, I, 9); |
660 | } |
661 | else if (adjustment >= 65536 - 255) { |
662 | /* For range [-255, -8) we have ldy #, jsr subysp */ |
663 | adjustment = 65536 - adjustment; |
664 | Arg = MakeHexArg (adjustment); |
665 | X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, L[1]->LI); |
666 | CS_InsertEntry (S, X, I + 9); |
667 | X = NewCodeEntry (OP65_JSR, AM65_ABS, "subysp" , 0, L[1]->LI); |
668 | CS_InsertEntry (S, X, I + 10); |
669 | |
670 | /* Delete the old code */ |
671 | CS_DelEntries (S, I, 9); |
672 | } |
673 | else if (adjustment > 255) { |
674 | /* For ranges [-32768, 255) and (255, 32767) the only modification |
675 | ** is to replace the absolute with immediate addressing |
676 | */ |
677 | Arg = MakeHexArg (adjustment & 0xff); |
678 | X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, L[1]->LI); |
679 | CS_InsertEntry (S, X, I + 1); |
680 | Arg = MakeHexArg (adjustment >> 8); |
681 | X = NewCodeEntry (OP65_LDA, AM65_IMM, Arg, 0, L[5]->LI); |
682 | CS_InsertEntry (S, X, I + 6); |
683 | |
684 | /* Delete the old code */ |
685 | CS_DelEntry (S, I + 2); |
686 | CS_DelEntry (S, I + 6); |
687 | } |
688 | else if (adjustment > 8) { |
689 | /* For range (8, 255] we have ldy #, jsr addysp */ |
690 | Arg = MakeHexArg (adjustment & 0xff); |
691 | X = NewCodeEntry (OP65_LDY, AM65_IMM, Arg, 0, L[1]->LI); |
692 | CS_InsertEntry (S, X, I + 9); |
693 | X = NewCodeEntry (OP65_JSR, AM65_ABS, "addysp" , 0, L[1]->LI); |
694 | CS_InsertEntry (S, X, I + 10); |
695 | |
696 | /* Delete the old code */ |
697 | CS_DelEntries (S, I, 9); |
698 | } |
699 | else { |
700 | /* If adjustment is in range (0, 8] we use incsp* calls */ |
701 | char Buf[20]; |
702 | xsprintf (Buf, sizeof (Buf), "incsp%u" , adjustment); |
703 | X = NewCodeEntry (OP65_JSR, AM65_ABS, Buf, 0, L[1]->LI); |
704 | CS_InsertEntry (S, X, I + 9); |
705 | |
706 | /* Delete the old code */ |
707 | CS_DelEntries (S, I, 9); |
708 | } |
709 | /* Regenerate register info */ |
710 | CS_GenRegInfo (S); |
711 | |
712 | /* Remember we had changes */ |
713 | Changes++; |
714 | |
715 | } else { |
716 | |
717 | /* Next entry */ |
718 | ++I; |
719 | } |
720 | |
721 | } |
722 | |
723 | /* Return the number of changes made */ |
724 | return Changes; |
725 | } |
726 | |
727 | /*****************************************************************************/ |
728 | /* struct OptFunc */ |
729 | /*****************************************************************************/ |
730 | |
731 | |
732 | |
733 | typedef struct OptFunc OptFunc; |
734 | struct OptFunc { |
735 | unsigned (*Func) (CodeSeg*); /* Optimizer function */ |
736 | const char* Name; /* Name of the function/group */ |
737 | unsigned CodeSizeFactor; /* Code size factor for this opt func */ |
738 | unsigned long TotalRuns; /* Total number of runs */ |
739 | unsigned long LastRuns; /* Last number of runs */ |
740 | unsigned long TotalChanges; /* Total number of changes */ |
741 | unsigned long LastChanges; /* Last number of changes */ |
742 | char Disabled; /* True if function disabled */ |
743 | }; |
744 | |
745 | |
746 | |
747 | /*****************************************************************************/ |
748 | /* Code */ |
749 | /*****************************************************************************/ |
750 | |
751 | |
752 | |
753 | /* A list of all the function descriptions */ |
754 | static OptFunc DOpt65C02BitOps = { Opt65C02BitOps, "Opt65C02BitOps" , 66, 0, 0, 0, 0, 0 }; |
755 | static OptFunc DOpt65C02Ind = { Opt65C02Ind, "Opt65C02Ind" , 100, 0, 0, 0, 0, 0 }; |
756 | static OptFunc DOpt65C02Stores = { Opt65C02Stores, "Opt65C02Stores" , 100, 0, 0, 0, 0, 0 }; |
757 | static OptFunc DOptAdd1 = { OptAdd1, "OptAdd1" , 125, 0, 0, 0, 0, 0 }; |
758 | static OptFunc DOptAdd2 = { OptAdd2, "OptAdd2" , 200, 0, 0, 0, 0, 0 }; |
759 | static OptFunc DOptAdd3 = { OptAdd3, "OptAdd3" , 65, 0, 0, 0, 0, 0 }; |
760 | static OptFunc DOptAdd4 = { OptAdd4, "OptAdd4" , 90, 0, 0, 0, 0, 0 }; |
761 | static OptFunc DOptAdd5 = { OptAdd5, "OptAdd5" , 100, 0, 0, 0, 0, 0 }; |
762 | static OptFunc DOptAdd6 = { OptAdd6, "OptAdd6" , 40, 0, 0, 0, 0, 0 }; |
763 | static OptFunc DOptBNegA1 = { OptBNegA1, "OptBNegA1" , 100, 0, 0, 0, 0, 0 }; |
764 | static OptFunc DOptBNegA2 = { OptBNegA2, "OptBNegA2" , 100, 0, 0, 0, 0, 0 }; |
765 | static OptFunc DOptBNegAX1 = { OptBNegAX1, "OptBNegAX1" , 100, 0, 0, 0, 0, 0 }; |
766 | static OptFunc DOptBNegAX2 = { OptBNegAX2, "OptBNegAX2" , 100, 0, 0, 0, 0, 0 }; |
767 | static OptFunc DOptBNegAX3 = { OptBNegAX3, "OptBNegAX3" , 100, 0, 0, 0, 0, 0 }; |
768 | static OptFunc DOptBNegAX4 = { OptBNegAX4, "OptBNegAX4" , 100, 0, 0, 0, 0, 0 }; |
769 | static OptFunc DOptBoolTrans = { OptBoolTrans, "OptBoolTrans" , 100, 0, 0, 0, 0, 0 }; |
770 | static OptFunc DOptBranchDist = { OptBranchDist, "OptBranchDist" , 0, 0, 0, 0, 0, 0 }; |
771 | static OptFunc DOptCmp1 = { OptCmp1, "OptCmp1" , 42, 0, 0, 0, 0, 0 }; |
772 | static OptFunc DOptCmp2 = { OptCmp2, "OptCmp2" , 85, 0, 0, 0, 0, 0 }; |
773 | static OptFunc DOptCmp3 = { OptCmp3, "OptCmp3" , 75, 0, 0, 0, 0, 0 }; |
774 | static OptFunc DOptCmp4 = { OptCmp4, "OptCmp4" , 75, 0, 0, 0, 0, 0 }; |
775 | static OptFunc DOptCmp5 = { OptCmp5, "OptCmp5" , 100, 0, 0, 0, 0, 0 }; |
776 | static OptFunc DOptCmp6 = { OptCmp6, "OptCmp6" , 100, 0, 0, 0, 0, 0 }; |
777 | static OptFunc DOptCmp7 = { OptCmp7, "OptCmp7" , 85, 0, 0, 0, 0, 0 }; |
778 | static OptFunc DOptCmp8 = { OptCmp8, "OptCmp8" , 50, 0, 0, 0, 0, 0 }; |
779 | static OptFunc DOptCmp9 = { OptCmp9, "OptCmp9" , 85, 0, 0, 0, 0, 0 }; |
780 | static OptFunc DOptComplAX1 = { OptComplAX1, "OptComplAX1" , 65, 0, 0, 0, 0, 0 }; |
781 | static OptFunc DOptCondBranches1= { OptCondBranches1,"OptCondBranches1" , 80, 0, 0, 0, 0, 0 }; |
782 | static OptFunc DOptCondBranches2= { OptCondBranches2,"OptCondBranches2" , 0, 0, 0, 0, 0, 0 }; |
783 | static OptFunc DOptDeadCode = { OptDeadCode, "OptDeadCode" , 100, 0, 0, 0, 0, 0 }; |
784 | static OptFunc DOptDeadJumps = { OptDeadJumps, "OptDeadJumps" , 100, 0, 0, 0, 0, 0 }; |
785 | static OptFunc DOptDecouple = { OptDecouple, "OptDecouple" , 100, 0, 0, 0, 0, 0 }; |
786 | static OptFunc DOptDupLoads = { OptDupLoads, "OptDupLoads" , 0, 0, 0, 0, 0, 0 }; |
787 | static OptFunc DOptGotoSPAdj = { OptGotoSPAdj, "OptGotoSPAdj" , 0, 0, 0, 0, 0, 0 }; |
788 | static OptFunc DOptIndLoads1 = { OptIndLoads1, "OptIndLoads1" , 0, 0, 0, 0, 0, 0 }; |
789 | static OptFunc DOptIndLoads2 = { OptIndLoads2, "OptIndLoads2" , 0, 0, 0, 0, 0, 0 }; |
790 | static OptFunc DOptJumpCascades = { OptJumpCascades, "OptJumpCascades" , 100, 0, 0, 0, 0, 0 }; |
791 | static OptFunc DOptJumpTarget1 = { OptJumpTarget1, "OptJumpTarget1" , 100, 0, 0, 0, 0, 0 }; |
792 | static OptFunc DOptJumpTarget2 = { OptJumpTarget2, "OptJumpTarget2" , 100, 0, 0, 0, 0, 0 }; |
793 | static OptFunc DOptJumpTarget3 = { OptJumpTarget3, "OptJumpTarget3" , 100, 0, 0, 0, 0, 0 }; |
794 | static OptFunc DOptLoad1 = { OptLoad1, "OptLoad1" , 100, 0, 0, 0, 0, 0 }; |
795 | static OptFunc DOptLoad2 = { OptLoad2, "OptLoad2" , 200, 0, 0, 0, 0, 0 }; |
796 | static OptFunc DOptLoad3 = { OptLoad3, "OptLoad3" , 0, 0, 0, 0, 0, 0 }; |
797 | static OptFunc DOptNegAX1 = { OptNegAX1, "OptNegAX1" , 165, 0, 0, 0, 0, 0 }; |
798 | static OptFunc DOptNegAX2 = { OptNegAX2, "OptNegAX2" , 200, 0, 0, 0, 0, 0 }; |
799 | static OptFunc DOptRTS = { OptRTS, "OptRTS" , 100, 0, 0, 0, 0, 0 }; |
800 | static OptFunc DOptRTSJumps1 = { OptRTSJumps1, "OptRTSJumps1" , 100, 0, 0, 0, 0, 0 }; |
801 | static OptFunc DOptRTSJumps2 = { OptRTSJumps2, "OptRTSJumps2" , 100, 0, 0, 0, 0, 0 }; |
802 | static OptFunc DOptPrecalc = { OptPrecalc, "OptPrecalc" , 100, 0, 0, 0, 0, 0 }; |
803 | static OptFunc DOptPtrLoad1 = { OptPtrLoad1, "OptPtrLoad1" , 100, 0, 0, 0, 0, 0 }; |
804 | static OptFunc DOptPtrLoad2 = { OptPtrLoad2, "OptPtrLoad2" , 100, 0, 0, 0, 0, 0 }; |
805 | static OptFunc DOptPtrLoad3 = { OptPtrLoad3, "OptPtrLoad3" , 100, 0, 0, 0, 0, 0 }; |
806 | static OptFunc DOptPtrLoad4 = { OptPtrLoad4, "OptPtrLoad4" , 100, 0, 0, 0, 0, 0 }; |
807 | static OptFunc DOptPtrLoad5 = { OptPtrLoad5, "OptPtrLoad5" , 50, 0, 0, 0, 0, 0 }; |
808 | static OptFunc DOptPtrLoad6 = { OptPtrLoad6, "OptPtrLoad6" , 60, 0, 0, 0, 0, 0 }; |
809 | static OptFunc DOptPtrLoad7 = { OptPtrLoad7, "OptPtrLoad7" , 140, 0, 0, 0, 0, 0 }; |
810 | static OptFunc DOptPtrLoad11 = { OptPtrLoad11, "OptPtrLoad11" , 92, 0, 0, 0, 0, 0 }; |
811 | static OptFunc DOptPtrLoad12 = { OptPtrLoad12, "OptPtrLoad12" , 50, 0, 0, 0, 0, 0 }; |
812 | static OptFunc DOptPtrLoad13 = { OptPtrLoad13, "OptPtrLoad13" , 65, 0, 0, 0, 0, 0 }; |
813 | static OptFunc DOptPtrLoad14 = { OptPtrLoad14, "OptPtrLoad14" , 108, 0, 0, 0, 0, 0 }; |
814 | static OptFunc DOptPtrLoad15 = { OptPtrLoad15, "OptPtrLoad15" , 86, 0, 0, 0, 0, 0 }; |
815 | static OptFunc DOptPtrLoad16 = { OptPtrLoad16, "OptPtrLoad16" , 100, 0, 0, 0, 0, 0 }; |
816 | static OptFunc DOptPtrLoad17 = { OptPtrLoad17, "OptPtrLoad17" , 190, 0, 0, 0, 0, 0 }; |
817 | static OptFunc DOptPtrLoad18 = { OptPtrLoad18, "OptPtrLoad18" , 100, 0, 0, 0, 0, 0 }; |
818 | static OptFunc DOptPtrLoad19 = { OptPtrLoad19, "OptPtrLoad19" , 65, 0, 0, 0, 0, 0 }; |
819 | static OptFunc DOptPtrStore1 = { OptPtrStore1, "OptPtrStore1" , 65, 0, 0, 0, 0, 0 }; |
820 | static OptFunc DOptPtrStore2 = { OptPtrStore2, "OptPtrStore2" , 65, 0, 0, 0, 0, 0 }; |
821 | static OptFunc DOptPtrStore3 = { OptPtrStore3, "OptPtrStore3" , 100, 0, 0, 0, 0, 0 }; |
822 | static OptFunc DOptPush1 = { OptPush1, "OptPush1" , 65, 0, 0, 0, 0, 0 }; |
823 | static OptFunc DOptPush2 = { OptPush2, "OptPush2" , 50, 0, 0, 0, 0, 0 }; |
824 | static OptFunc DOptPushPop = { OptPushPop, "OptPushPop" , 0, 0, 0, 0, 0, 0 }; |
825 | static OptFunc DOptShift1 = { OptShift1, "OptShift1" , 100, 0, 0, 0, 0, 0 }; |
826 | static OptFunc DOptShift2 = { OptShift2, "OptShift2" , 100, 0, 0, 0, 0, 0 }; |
827 | static OptFunc DOptShift3 = { OptShift3, "OptShift3" , 17, 0, 0, 0, 0, 0 }; |
828 | static OptFunc DOptShift4 = { OptShift4, "OptShift4" , 100, 0, 0, 0, 0, 0 }; |
829 | static OptFunc DOptShift5 = { OptShift5, "OptShift5" , 110, 0, 0, 0, 0, 0 }; |
830 | static OptFunc DOptShift6 = { OptShift6, "OptShift6" , 200, 0, 0, 0, 0, 0 }; |
831 | static OptFunc DOptSize1 = { OptSize1, "OptSize1" , 100, 0, 0, 0, 0, 0 }; |
832 | static OptFunc DOptSize2 = { OptSize2, "OptSize2" , 100, 0, 0, 0, 0, 0 }; |
833 | static OptFunc DOptStackOps = { OptStackOps, "OptStackOps" , 100, 0, 0, 0, 0, 0 }; |
834 | static OptFunc DOptStackPtrOps = { OptStackPtrOps, "OptStackPtrOps" , 50, 0, 0, 0, 0, 0 }; |
835 | static OptFunc DOptStore1 = { OptStore1, "OptStore1" , 70, 0, 0, 0, 0, 0 }; |
836 | static OptFunc DOptStore2 = { OptStore2, "OptStore2" , 115, 0, 0, 0, 0, 0 }; |
837 | static OptFunc DOptStore3 = { OptStore3, "OptStore3" , 120, 0, 0, 0, 0, 0 }; |
838 | static OptFunc DOptStore4 = { OptStore4, "OptStore4" , 50, 0, 0, 0, 0, 0 }; |
839 | static OptFunc DOptStore5 = { OptStore5, "OptStore5" , 100, 0, 0, 0, 0, 0 }; |
840 | static OptFunc DOptStoreLoad = { OptStoreLoad, "OptStoreLoad" , 0, 0, 0, 0, 0, 0 }; |
841 | static OptFunc DOptSub1 = { OptSub1, "OptSub1" , 100, 0, 0, 0, 0, 0 }; |
842 | static OptFunc DOptSub2 = { OptSub2, "OptSub2" , 100, 0, 0, 0, 0, 0 }; |
843 | static OptFunc DOptSub3 = { OptSub3, "OptSub3" , 100, 0, 0, 0, 0, 0 }; |
844 | static OptFunc DOptTest1 = { OptTest1, "OptTest1" , 65, 0, 0, 0, 0, 0 }; |
845 | static OptFunc DOptTest2 = { OptTest2, "OptTest2" , 50, 0, 0, 0, 0, 0 }; |
846 | static OptFunc DOptTransfers1 = { OptTransfers1, "OptTransfers1" , 0, 0, 0, 0, 0, 0 }; |
847 | static OptFunc DOptTransfers2 = { OptTransfers2, "OptTransfers2" , 60, 0, 0, 0, 0, 0 }; |
848 | static OptFunc DOptTransfers3 = { OptTransfers3, "OptTransfers3" , 65, 0, 0, 0, 0, 0 }; |
849 | static OptFunc DOptTransfers4 = { OptTransfers4, "OptTransfers4" , 65, 0, 0, 0, 0, 0 }; |
850 | static OptFunc DOptUnusedLoads = { OptUnusedLoads, "OptUnusedLoads" , 0, 0, 0, 0, 0, 0 }; |
851 | static OptFunc DOptUnusedStores = { OptUnusedStores, "OptUnusedStores" , 0, 0, 0, 0, 0, 0 }; |
852 | |
853 | |
854 | /* Table containing all the steps in alphabetical order */ |
855 | static OptFunc* OptFuncs[] = { |
856 | &DOpt65C02BitOps, |
857 | &DOpt65C02Ind, |
858 | &DOpt65C02Stores, |
859 | &DOptAdd1, |
860 | &DOptAdd2, |
861 | &DOptAdd3, |
862 | &DOptAdd4, |
863 | &DOptAdd5, |
864 | &DOptAdd6, |
865 | &DOptBNegA1, |
866 | &DOptBNegA2, |
867 | &DOptBNegAX1, |
868 | &DOptBNegAX2, |
869 | &DOptBNegAX3, |
870 | &DOptBNegAX4, |
871 | &DOptBoolTrans, |
872 | &DOptBranchDist, |
873 | &DOptCmp1, |
874 | &DOptCmp2, |
875 | &DOptCmp3, |
876 | &DOptCmp4, |
877 | &DOptCmp5, |
878 | &DOptCmp6, |
879 | &DOptCmp7, |
880 | &DOptCmp8, |
881 | &DOptCmp9, |
882 | &DOptComplAX1, |
883 | &DOptCondBranches1, |
884 | &DOptCondBranches2, |
885 | &DOptDeadCode, |
886 | &DOptDeadJumps, |
887 | &DOptDecouple, |
888 | &DOptDupLoads, |
889 | &DOptGotoSPAdj, |
890 | &DOptIndLoads1, |
891 | &DOptIndLoads2, |
892 | &DOptJumpCascades, |
893 | &DOptJumpTarget1, |
894 | &DOptJumpTarget2, |
895 | &DOptJumpTarget3, |
896 | &DOptLoad1, |
897 | &DOptLoad2, |
898 | &DOptLoad3, |
899 | &DOptNegAX1, |
900 | &DOptNegAX2, |
901 | &DOptPrecalc, |
902 | &DOptPtrLoad1, |
903 | &DOptPtrLoad11, |
904 | &DOptPtrLoad12, |
905 | &DOptPtrLoad13, |
906 | &DOptPtrLoad14, |
907 | &DOptPtrLoad15, |
908 | &DOptPtrLoad16, |
909 | &DOptPtrLoad17, |
910 | &DOptPtrLoad18, |
911 | &DOptPtrLoad19, |
912 | &DOptPtrLoad2, |
913 | &DOptPtrLoad3, |
914 | &DOptPtrLoad4, |
915 | &DOptPtrLoad5, |
916 | &DOptPtrLoad6, |
917 | &DOptPtrLoad7, |
918 | &DOptPtrStore1, |
919 | &DOptPtrStore2, |
920 | &DOptPtrStore3, |
921 | &DOptPush1, |
922 | &DOptPush2, |
923 | &DOptPushPop, |
924 | &DOptRTS, |
925 | &DOptRTSJumps1, |
926 | &DOptRTSJumps2, |
927 | &DOptShift1, |
928 | &DOptShift2, |
929 | &DOptShift3, |
930 | &DOptShift4, |
931 | &DOptShift5, |
932 | &DOptShift6, |
933 | &DOptSize1, |
934 | &DOptSize2, |
935 | &DOptStackOps, |
936 | &DOptStackPtrOps, |
937 | &DOptStore1, |
938 | &DOptStore2, |
939 | &DOptStore3, |
940 | &DOptStore4, |
941 | &DOptStore5, |
942 | &DOptStoreLoad, |
943 | &DOptSub1, |
944 | &DOptSub2, |
945 | &DOptSub3, |
946 | &DOptTest1, |
947 | &DOptTest2, |
948 | &DOptTransfers1, |
949 | &DOptTransfers2, |
950 | &DOptTransfers3, |
951 | &DOptTransfers4, |
952 | &DOptUnusedLoads, |
953 | &DOptUnusedStores, |
954 | }; |
955 | #define OPTFUNC_COUNT (sizeof(OptFuncs) / sizeof(OptFuncs[0])) |
956 | |
957 | |
958 | |
959 | static int CmpOptStep (const void* Key, const void* Func) |
960 | /* Compare function for bsearch */ |
961 | { |
962 | return strcmp (Key, (*(const OptFunc**)Func)->Name); |
963 | } |
964 | |
965 | |
966 | |
967 | static OptFunc* FindOptFunc (const char* Name) |
968 | /* Find an optimizer step by name in the table and return a pointer. Return |
969 | ** NULL if no such step is found. |
970 | */ |
971 | { |
972 | /* Search for the function in the list */ |
973 | OptFunc** O = bsearch (Name, OptFuncs, OPTFUNC_COUNT, sizeof (OptFuncs[0]), CmpOptStep); |
974 | return O? *O : 0; |
975 | } |
976 | |
977 | |
978 | |
979 | static OptFunc* GetOptFunc (const char* Name) |
980 | /* Find an optimizer step by name in the table and return a pointer. Print an |
981 | ** error and call AbEnd if not found. |
982 | */ |
983 | { |
984 | /* Search for the function in the list */ |
985 | OptFunc* F = FindOptFunc (Name); |
986 | if (F == 0) { |
987 | /* Not found */ |
988 | AbEnd ("Optimization step '%s' not found" , Name); |
989 | } |
990 | return F; |
991 | } |
992 | |
993 | |
994 | |
995 | void DisableOpt (const char* Name) |
996 | /* Disable the optimization with the given name */ |
997 | { |
998 | if (strcmp (Name, "any" ) == 0) { |
999 | unsigned I; |
1000 | for (I = 0; I < OPTFUNC_COUNT; ++I) { |
1001 | OptFuncs[I]->Disabled = 1; |
1002 | } |
1003 | } else { |
1004 | GetOptFunc(Name)->Disabled = 1; |
1005 | } |
1006 | } |
1007 | |
1008 | |
1009 | |
1010 | void EnableOpt (const char* Name) |
1011 | /* Enable the optimization with the given name */ |
1012 | { |
1013 | if (strcmp (Name, "any" ) == 0) { |
1014 | unsigned I; |
1015 | for (I = 0; I < OPTFUNC_COUNT; ++I) { |
1016 | OptFuncs[I]->Disabled = 0; |
1017 | } |
1018 | } else { |
1019 | GetOptFunc(Name)->Disabled = 0; |
1020 | } |
1021 | } |
1022 | |
1023 | |
1024 | |
1025 | void ListOptSteps (FILE* F) |
1026 | /* List all optimization steps */ |
1027 | { |
1028 | unsigned I; |
1029 | |
1030 | fprintf (F, "any\n" ); |
1031 | for (I = 0; I < OPTFUNC_COUNT; ++I) { |
1032 | fprintf (F, "%s\n" , OptFuncs[I]->Name); |
1033 | } |
1034 | } |
1035 | |
1036 | |
1037 | |
1038 | static void ReadOptStats (const char* Name) |
1039 | /* Read the optimizer statistics file */ |
1040 | { |
1041 | char Buf [256]; |
1042 | unsigned Lines; |
1043 | |
1044 | /* Try to open the file */ |
1045 | FILE* F = fopen (Name, "r" ); |
1046 | if (F == 0) { |
1047 | /* Ignore the error */ |
1048 | return; |
1049 | } |
1050 | |
1051 | /* Read and parse the lines */ |
1052 | Lines = 0; |
1053 | while (fgets (Buf, sizeof (Buf), F) != 0) { |
1054 | |
1055 | char* B; |
1056 | unsigned Len; |
1057 | OptFunc* Func; |
1058 | |
1059 | /* Fields */ |
1060 | char Name[32]; |
1061 | unsigned long TotalRuns; |
1062 | unsigned long TotalChanges; |
1063 | |
1064 | /* Count lines */ |
1065 | ++Lines; |
1066 | |
1067 | /* Remove trailing white space including the line terminator */ |
1068 | B = Buf; |
1069 | Len = strlen (B); |
1070 | while (Len > 0 && IsSpace (B[Len-1])) { |
1071 | --Len; |
1072 | } |
1073 | B[Len] = '\0'; |
1074 | |
1075 | /* Remove leading whitespace */ |
1076 | while (IsSpace (*B)) { |
1077 | ++B; |
1078 | } |
1079 | |
1080 | /* Check for empty and comment lines */ |
1081 | if (*B == '\0' || *B == ';' || *B == '#') { |
1082 | continue; |
1083 | } |
1084 | |
1085 | /* Parse the line */ |
1086 | if (sscanf (B, "%31s %lu %*u %lu %*u" , Name, &TotalRuns, &TotalChanges) != 3) { |
1087 | /* Syntax error */ |
1088 | continue; |
1089 | } |
1090 | |
1091 | /* Search for the optimizer step. */ |
1092 | Func = FindOptFunc (Name); |
1093 | if (Func == 0) { |
1094 | /* Not found */ |
1095 | continue; |
1096 | } |
1097 | |
1098 | /* Found the step, set the fields */ |
1099 | Func->TotalRuns = TotalRuns; |
1100 | Func->TotalChanges = TotalChanges; |
1101 | |
1102 | } |
1103 | |
1104 | /* Close the file, ignore errors here. */ |
1105 | fclose (F); |
1106 | } |
1107 | |
1108 | |
1109 | |
1110 | static void WriteOptStats (const char* Name) |
1111 | /* Write the optimizer statistics file */ |
1112 | { |
1113 | unsigned I; |
1114 | |
1115 | /* Try to open the file */ |
1116 | FILE* F = fopen (Name, "w" ); |
1117 | if (F == 0) { |
1118 | /* Ignore the error */ |
1119 | return; |
1120 | } |
1121 | |
1122 | /* Write a header */ |
1123 | fprintf (F, |
1124 | "; Optimizer Total Last Total Last\n" |
1125 | "; Step Runs Runs Chg Chg\n" ); |
1126 | |
1127 | |
1128 | /* Write the data */ |
1129 | for (I = 0; I < OPTFUNC_COUNT; ++I) { |
1130 | const OptFunc* O = OptFuncs[I]; |
1131 | fprintf (F, |
1132 | "%-20s %10lu %10lu %10lu %10lu\n" , |
1133 | O->Name, |
1134 | O->TotalRuns, |
1135 | O->LastRuns, |
1136 | O->TotalChanges, |
1137 | O->LastChanges); |
1138 | } |
1139 | |
1140 | /* Close the file, ignore errors here. */ |
1141 | fclose (F); |
1142 | } |
1143 | |
1144 | |
1145 | |
1146 | static void OpenDebugFile (const CodeSeg* S) |
1147 | /* Open the debug file for the given segment if the flag is on */ |
1148 | { |
1149 | if (DebugOptOutput) { |
1150 | StrBuf Name = AUTO_STRBUF_INITIALIZER; |
1151 | if (S->Func) { |
1152 | SB_CopyStr (&Name, S->Func->Name); |
1153 | } else { |
1154 | SB_CopyStr (&Name, "global" ); |
1155 | } |
1156 | SB_AppendStr (&Name, ".opt" ); |
1157 | SB_Terminate (&Name); |
1158 | OpenDebugOutputFile (SB_GetConstBuf (&Name)); |
1159 | SB_Done (&Name); |
1160 | } |
1161 | } |
1162 | |
1163 | |
1164 | |
1165 | static void WriteDebugOutput (CodeSeg* S, const char* Step) |
1166 | /* Write a separator line into the debug file if the flag is on */ |
1167 | { |
1168 | if (DebugOptOutput) { |
1169 | /* Output a separator */ |
1170 | WriteOutput ("=========================================================================\n" ); |
1171 | |
1172 | /* Output a header line */ |
1173 | if (Step == 0) { |
1174 | /* Initial output */ |
1175 | WriteOutput ("Initial code for function '%s':\n" , |
1176 | S->Func? S->Func->Name : "<global>" ); |
1177 | } else { |
1178 | WriteOutput ("Code after applying '%s':\n" , Step); |
1179 | } |
1180 | |
1181 | /* Output the code segment */ |
1182 | CS_Output (S); |
1183 | } |
1184 | } |
1185 | |
1186 | |
1187 | |
1188 | static unsigned RunOptFunc (CodeSeg* S, OptFunc* F, unsigned Max) |
1189 | /* Run one optimizer function Max times or until there are no more changes */ |
1190 | { |
1191 | unsigned Changes, C; |
1192 | |
1193 | /* Don't run the function if it is disabled or if it is prohibited by the |
1194 | ** code size factor |
1195 | */ |
1196 | if (F->Disabled || F->CodeSizeFactor > S->CodeSizeFactor) { |
1197 | return 0; |
1198 | } |
1199 | |
1200 | /* Run this until there are no more changes */ |
1201 | Changes = 0; |
1202 | do { |
1203 | |
1204 | /* Run the function */ |
1205 | C = F->Func (S); |
1206 | Changes += C; |
1207 | |
1208 | /* Do statistics */ |
1209 | ++F->TotalRuns; |
1210 | ++F->LastRuns; |
1211 | F->TotalChanges += C; |
1212 | F->LastChanges += C; |
1213 | |
1214 | /* If we had changes, output stuff and regenerate register info */ |
1215 | if (C) { |
1216 | if (Debug) { |
1217 | printf ("Applied %s: %u changes\n" , F->Name, C); |
1218 | } |
1219 | WriteDebugOutput (S, F->Name); |
1220 | CS_GenRegInfo (S); |
1221 | } |
1222 | |
1223 | } while (--Max && C > 0); |
1224 | |
1225 | /* Return the number of changes */ |
1226 | return Changes; |
1227 | } |
1228 | |
1229 | |
1230 | |
1231 | static unsigned RunOptGroup1 (CodeSeg* S) |
1232 | /* Run the first group of optimization steps. These steps translate known |
1233 | ** patterns emitted by the code generator into more optimal patterns. Order |
1234 | ** of the steps is important, because some of the steps done earlier cover |
1235 | ** the same patterns as later steps as subpatterns. |
1236 | */ |
1237 | { |
1238 | unsigned Changes = 0; |
1239 | |
1240 | Changes += RunOptFunc (S, &DOptGotoSPAdj, 1); |
1241 | Changes += RunOptFunc (S, &DOptStackPtrOps, 5); |
1242 | Changes += RunOptFunc (S, &DOptPtrStore1, 1); |
1243 | Changes += RunOptFunc (S, &DOptPtrStore2, 1); |
1244 | Changes += RunOptFunc (S, &DOptPtrStore3, 1); |
1245 | Changes += RunOptFunc (S, &DOptAdd3, 1); /* Before OptPtrLoad5! */ |
1246 | Changes += RunOptFunc (S, &DOptPtrLoad1, 1); |
1247 | Changes += RunOptFunc (S, &DOptPtrLoad2, 1); |
1248 | Changes += RunOptFunc (S, &DOptPtrLoad3, 1); |
1249 | Changes += RunOptFunc (S, &DOptPtrLoad4, 1); |
1250 | Changes += RunOptFunc (S, &DOptPtrLoad5, 1); |
1251 | Changes += RunOptFunc (S, &DOptPtrLoad6, 1); |
1252 | Changes += RunOptFunc (S, &DOptPtrLoad18, 1); /* Before OptPtrLoad7 */ |
1253 | Changes += RunOptFunc (S, &DOptPtrLoad19, 1); /* Before OptPtrLoad7 */ |
1254 | Changes += RunOptFunc (S, &DOptPtrLoad7, 1); |
1255 | Changes += RunOptFunc (S, &DOptPtrLoad11, 1); |
1256 | Changes += RunOptFunc (S, &DOptPtrLoad12, 1); |
1257 | Changes += RunOptFunc (S, &DOptPtrLoad13, 1); |
1258 | Changes += RunOptFunc (S, &DOptPtrLoad14, 1); |
1259 | Changes += RunOptFunc (S, &DOptPtrLoad15, 1); |
1260 | Changes += RunOptFunc (S, &DOptPtrLoad16, 1); |
1261 | Changes += RunOptFunc (S, &DOptPtrLoad17, 1); |
1262 | Changes += RunOptFunc (S, &DOptBNegAX1, 1); |
1263 | Changes += RunOptFunc (S, &DOptBNegAX2, 1); |
1264 | Changes += RunOptFunc (S, &DOptBNegAX3, 1); |
1265 | Changes += RunOptFunc (S, &DOptBNegAX4, 1); |
1266 | Changes += RunOptFunc (S, &DOptAdd1, 1); |
1267 | Changes += RunOptFunc (S, &DOptAdd2, 1); |
1268 | Changes += RunOptFunc (S, &DOptAdd4, 1); |
1269 | Changes += RunOptFunc (S, &DOptAdd5, 1); |
1270 | Changes += RunOptFunc (S, &DOptAdd6, 1); |
1271 | Changes += RunOptFunc (S, &DOptSub1, 1); |
1272 | Changes += RunOptFunc (S, &DOptSub3, 1); |
1273 | Changes += RunOptFunc (S, &DOptStore4, 1); |
1274 | Changes += RunOptFunc (S, &DOptStore5, 1); |
1275 | Changes += RunOptFunc (S, &DOptShift1, 1); |
1276 | Changes += RunOptFunc (S, &DOptShift2, 1); |
1277 | Changes += RunOptFunc (S, &DOptShift5, 1); |
1278 | Changes += RunOptFunc (S, &DOptShift6, 1); |
1279 | Changes += RunOptFunc (S, &DOptStore1, 1); |
1280 | Changes += RunOptFunc (S, &DOptStore2, 5); |
1281 | Changes += RunOptFunc (S, &DOptStore3, 5); |
1282 | |
1283 | /* Return the number of changes */ |
1284 | return Changes; |
1285 | } |
1286 | |
1287 | |
1288 | |
1289 | static unsigned RunOptGroup2 (CodeSeg* S) |
1290 | /* Run one group of optimization steps. This step involves just decoupling |
1291 | ** instructions by replacing them by instructions that do not depend on |
1292 | ** previous instructions. This makes it easier to find instructions that |
1293 | ** aren't used. |
1294 | */ |
1295 | { |
1296 | unsigned Changes = 0; |
1297 | |
1298 | Changes += RunOptFunc (S, &DOptDecouple, 1); |
1299 | |
1300 | /* Return the number of changes */ |
1301 | return Changes; |
1302 | } |
1303 | |
1304 | |
1305 | |
1306 | static unsigned RunOptGroup3 (CodeSeg* S) |
1307 | /* Run one group of optimization steps. These steps depend on each other, |
1308 | ** that means that one step may allow another step to do additional work, |
1309 | ** so we will repeat the steps as long as we see any changes. |
1310 | */ |
1311 | { |
1312 | unsigned Changes, C; |
1313 | |
1314 | Changes = 0; |
1315 | do { |
1316 | C = 0; |
1317 | |
1318 | C += RunOptFunc (S, &DOptBNegA1, 1); |
1319 | C += RunOptFunc (S, &DOptBNegA2, 1); |
1320 | C += RunOptFunc (S, &DOptNegAX1, 1); |
1321 | C += RunOptFunc (S, &DOptNegAX2, 1); |
1322 | C += RunOptFunc (S, &DOptStackOps, 3); |
1323 | C += RunOptFunc (S, &DOptShift1, 1); |
1324 | C += RunOptFunc (S, &DOptShift4, 1); |
1325 | C += RunOptFunc (S, &DOptComplAX1, 1); |
1326 | C += RunOptFunc (S, &DOptSub1, 1); |
1327 | C += RunOptFunc (S, &DOptSub2, 1); |
1328 | C += RunOptFunc (S, &DOptSub3, 1); |
1329 | C += RunOptFunc (S, &DOptAdd5, 1); |
1330 | C += RunOptFunc (S, &DOptAdd6, 1); |
1331 | C += RunOptFunc (S, &DOptJumpCascades, 1); |
1332 | C += RunOptFunc (S, &DOptDeadJumps, 1); |
1333 | C += RunOptFunc (S, &DOptRTS, 1); |
1334 | C += RunOptFunc (S, &DOptDeadCode, 1); |
1335 | C += RunOptFunc (S, &DOptBoolTrans, 1); |
1336 | C += RunOptFunc (S, &DOptJumpTarget1, 1); |
1337 | C += RunOptFunc (S, &DOptJumpTarget2, 1); |
1338 | C += RunOptFunc (S, &DOptCondBranches1, 1); |
1339 | C += RunOptFunc (S, &DOptCondBranches2, 1); |
1340 | C += RunOptFunc (S, &DOptRTSJumps1, 1); |
1341 | C += RunOptFunc (S, &DOptCmp1, 1); |
1342 | C += RunOptFunc (S, &DOptCmp2, 1); |
1343 | C += RunOptFunc (S, &DOptCmp8, 1); /* Must run before OptCmp3 */ |
1344 | C += RunOptFunc (S, &DOptCmp3, 1); |
1345 | C += RunOptFunc (S, &DOptCmp4, 1); |
1346 | C += RunOptFunc (S, &DOptCmp5, 1); |
1347 | C += RunOptFunc (S, &DOptCmp6, 1); |
1348 | C += RunOptFunc (S, &DOptCmp7, 1); |
1349 | C += RunOptFunc (S, &DOptCmp9, 1); |
1350 | C += RunOptFunc (S, &DOptTest1, 1); |
1351 | C += RunOptFunc (S, &DOptLoad1, 1); |
1352 | C += RunOptFunc (S, &DOptJumpTarget3, 1); /* After OptCondBranches2 */ |
1353 | C += RunOptFunc (S, &DOptUnusedLoads, 1); |
1354 | C += RunOptFunc (S, &DOptUnusedStores, 1); |
1355 | C += RunOptFunc (S, &DOptDupLoads, 1); |
1356 | C += RunOptFunc (S, &DOptStoreLoad, 1); |
1357 | C += RunOptFunc (S, &DOptTransfers1, 1); |
1358 | C += RunOptFunc (S, &DOptTransfers3, 1); |
1359 | C += RunOptFunc (S, &DOptTransfers4, 1); |
1360 | C += RunOptFunc (S, &DOptStore1, 1); |
1361 | C += RunOptFunc (S, &DOptStore5, 1); |
1362 | C += RunOptFunc (S, &DOptPushPop, 1); |
1363 | C += RunOptFunc (S, &DOptPrecalc, 1); |
1364 | |
1365 | Changes += C; |
1366 | |
1367 | } while (C); |
1368 | |
1369 | /* Return the number of changes */ |
1370 | return Changes; |
1371 | } |
1372 | |
1373 | |
1374 | |
1375 | static unsigned RunOptGroup4 (CodeSeg* S) |
1376 | /* Run another round of pattern replacements. These are done late, since there |
1377 | ** may be better replacements before. |
1378 | */ |
1379 | { |
1380 | unsigned Changes = 0; |
1381 | |
1382 | /* Repeat some of the steps here */ |
1383 | Changes += RunOptFunc (S, &DOptShift3, 1); |
1384 | Changes += RunOptFunc (S, &DOptPush1, 1); |
1385 | Changes += RunOptFunc (S, &DOptPush2, 1); |
1386 | Changes += RunOptFunc (S, &DOptUnusedLoads, 1); |
1387 | Changes += RunOptFunc (S, &DOptTest2, 1); |
1388 | Changes += RunOptFunc (S, &DOptTransfers2, 1); |
1389 | Changes += RunOptFunc (S, &DOptLoad2, 1); |
1390 | Changes += RunOptFunc (S, &DOptLoad3, 1); |
1391 | Changes += RunOptFunc (S, &DOptDupLoads, 1); |
1392 | |
1393 | /* Return the number of changes */ |
1394 | return Changes; |
1395 | } |
1396 | |
1397 | |
1398 | |
1399 | static unsigned RunOptGroup5 (CodeSeg* S) |
1400 | /* 65C02 specific optimizations. */ |
1401 | { |
1402 | unsigned Changes = 0; |
1403 | |
1404 | if (CPUIsets[CPU] & CPU_ISET_65SC02) { |
1405 | Changes += RunOptFunc (S, &DOpt65C02BitOps, 1); |
1406 | Changes += RunOptFunc (S, &DOpt65C02Ind, 1); |
1407 | Changes += RunOptFunc (S, &DOpt65C02Stores, 1); |
1408 | if (Changes) { |
1409 | /* The 65C02 replacement codes do often make the use of a register |
1410 | ** value unnecessary, so if we have changes, run another load |
1411 | ** removal pass. |
1412 | */ |
1413 | Changes += RunOptFunc (S, &DOptUnusedLoads, 1); |
1414 | } |
1415 | } |
1416 | |
1417 | /* Return the number of changes */ |
1418 | return Changes; |
1419 | } |
1420 | |
1421 | |
1422 | |
1423 | static unsigned RunOptGroup6 (CodeSeg* S) |
1424 | /* This one is quite special. It tries to replace "lda (sp),y" by "lda (sp,x)". |
1425 | ** The latter is ony cycle slower, but if we're able to remove the necessary |
1426 | ** load of the Y register, because X is zero anyway, we gain 1 cycle and |
1427 | ** shorten the code by one (transfer) or two bytes (load). So what we do is |
1428 | ** to replace the insns, remove unused loads, and then change back all insns |
1429 | ** where Y is still zero (meaning that the load has not been removed). |
1430 | */ |
1431 | { |
1432 | unsigned Changes = 0; |
1433 | |
1434 | /* This group will only run for a standard 6502, because the 65C02 has a |
1435 | ** better addressing mode that covers this case. |
1436 | */ |
1437 | if ((CPUIsets[CPU] & CPU_ISET_65SC02) == 0) { |
1438 | Changes += RunOptFunc (S, &DOptIndLoads1, 1); |
1439 | Changes += RunOptFunc (S, &DOptUnusedLoads, 1); |
1440 | Changes += RunOptFunc (S, &DOptIndLoads2, 1); |
1441 | } |
1442 | |
1443 | /* Return the number of changes */ |
1444 | return Changes; |
1445 | } |
1446 | |
1447 | |
1448 | |
1449 | static unsigned RunOptGroup7 (CodeSeg* S) |
1450 | /* The last group of optimization steps. Adjust branches, do size optimizations. |
1451 | */ |
1452 | { |
1453 | unsigned Changes = 0; |
1454 | unsigned C; |
1455 | |
1456 | /* Optimize for size, that is replace operations by shorter ones, even |
1457 | ** if this does hinder further optimizations (no problem since we're |
1458 | ** done soon). |
1459 | */ |
1460 | C = RunOptFunc (S, &DOptSize1, 1); |
1461 | if (C) { |
1462 | Changes += C; |
1463 | /* Run some optimization passes again, since the size optimizations |
1464 | ** may have opened new oportunities. |
1465 | */ |
1466 | Changes += RunOptFunc (S, &DOptUnusedLoads, 1); |
1467 | Changes += RunOptFunc (S, &DOptUnusedStores, 1); |
1468 | Changes += RunOptFunc (S, &DOptJumpTarget1, 5); |
1469 | Changes += RunOptFunc (S, &DOptStore5, 1); |
1470 | } |
1471 | |
1472 | C = RunOptFunc (S, &DOptSize2, 1); |
1473 | if (C) { |
1474 | Changes += C; |
1475 | /* Run some optimization passes again, since the size optimizations |
1476 | ** may have opened new oportunities. |
1477 | */ |
1478 | Changes += RunOptFunc (S, &DOptUnusedLoads, 1); |
1479 | Changes += RunOptFunc (S, &DOptJumpTarget1, 5); |
1480 | Changes += RunOptFunc (S, &DOptStore5, 1); |
1481 | Changes += RunOptFunc (S, &DOptTransfers1, 1); |
1482 | Changes += RunOptFunc (S, &DOptTransfers3, 1); |
1483 | } |
1484 | |
1485 | /* Adjust branch distances */ |
1486 | Changes += RunOptFunc (S, &DOptBranchDist, 3); |
1487 | |
1488 | /* Replace conditional branches to RTS. If we had changes, we must run dead |
1489 | ** code elimination again, since the change may have introduced dead code. |
1490 | */ |
1491 | C = RunOptFunc (S, &DOptRTSJumps2, 1); |
1492 | Changes += C; |
1493 | if (C) { |
1494 | Changes += RunOptFunc (S, &DOptDeadCode, 1); |
1495 | } |
1496 | |
1497 | /* Return the number of changes */ |
1498 | return Changes; |
1499 | } |
1500 | |
1501 | |
1502 | |
1503 | void RunOpt (CodeSeg* S) |
1504 | /* Run the optimizer */ |
1505 | { |
1506 | const char* StatFileName; |
1507 | |
1508 | /* If we shouldn't run the optimizer, bail out */ |
1509 | if (!S->Optimize) { |
1510 | return; |
1511 | } |
1512 | |
1513 | /* Check if we are requested to write optimizer statistics */ |
1514 | StatFileName = getenv ("CC65_OPTSTATS" ); |
1515 | if (StatFileName) { |
1516 | ReadOptStats (StatFileName); |
1517 | } |
1518 | |
1519 | /* Print the name of the function we are working on */ |
1520 | if (S->Func) { |
1521 | Print (stdout, 1, "Running optimizer for function '%s'\n" , S->Func->Name); |
1522 | } else { |
1523 | Print (stdout, 1, "Running optimizer for global code segment\n" ); |
1524 | } |
1525 | |
1526 | /* If requested, open an output file */ |
1527 | OpenDebugFile (S); |
1528 | WriteDebugOutput (S, 0); |
1529 | |
1530 | /* Generate register info for all instructions */ |
1531 | CS_GenRegInfo (S); |
1532 | |
1533 | /* Run groups of optimizations */ |
1534 | RunOptGroup1 (S); |
1535 | RunOptGroup2 (S); |
1536 | RunOptGroup3 (S); |
1537 | RunOptGroup4 (S); |
1538 | RunOptGroup5 (S); |
1539 | RunOptGroup6 (S); |
1540 | RunOptGroup7 (S); |
1541 | |
1542 | /* Free register info */ |
1543 | CS_FreeRegInfo (S); |
1544 | |
1545 | /* Close output file if necessary */ |
1546 | if (DebugOptOutput) { |
1547 | CloseOutputFile (); |
1548 | } |
1549 | |
1550 | /* Write statistics */ |
1551 | if (StatFileName) { |
1552 | WriteOptStats (StatFileName); |
1553 | } |
1554 | } |
1555 | |