1/*****************************************************************************/
2/* */
3/* coptshift.c */
4/* */
5/* Optimize shifts */
6/* */
7/* */
8/* */
9/* (C) 2001-2012, Ullrich von Bassewitz */
10/* Roemerstrasse 52 */
11/* D-70794 Filderstadt */
12/* EMail: uz@cc65.org */
13/* */
14/* */
15/* This software is provided 'as-is', without any expressed or implied */
16/* warranty. In no event will the authors be held liable for any damages */
17/* arising from the use of this software. */
18/* */
19/* Permission is granted to anyone to use this software for any purpose, */
20/* including commercial applications, and to alter it and redistribute it */
21/* freely, subject to the following restrictions: */
22/* */
23/* 1. The origin of this software must not be misrepresented; you must not */
24/* claim that you wrote the original software. If you use this software */
25/* in a product, an acknowledgment in the product documentation would be */
26/* appreciated but is not required. */
27/* 2. Altered source versions must be plainly marked as such, and must not */
28/* be misrepresented as being the original software. */
29/* 3. This notice may not be removed or altered from any source */
30/* distribution. */
31/* */
32/*****************************************************************************/
33
34
35
36/* common */
37#include "chartype.h"
38
39/* cc65 */
40#include "codeent.h"
41#include "codeinfo.h"
42#include "coptshift.h"
43
44
45
46/*****************************************************************************/
47/* Data */
48/*****************************************************************************/
49
50
51
52/* Shift types. Shift type is in the first byte, shift count in the second */
53enum {
54 SHIFT_NONE = 0x0000,
55
56 /* Masks */
57 SHIFT_MASK_COUNT = 0x00FF,
58 SHIFT_MASK_DIR = 0x0F00,
59 SHIFT_MASK_MODE = 0xF000, /* Arithmetic or logical */
60 SHIFT_MASK_TYPE = SHIFT_MASK_DIR | SHIFT_MASK_MODE,
61
62 /* Shift counts */
63 SHIFT_COUNT_Y = 0x0000, /* Count is in Y register */
64 SHIFT_COUNT_1 = 0x0001,
65 SHIFT_COUNT_2 = 0x0002,
66 SHIFT_COUNT_3 = 0x0003,
67 SHIFT_COUNT_4 = 0x0004,
68 SHIFT_COUNT_5 = 0x0005,
69 SHIFT_COUNT_6 = 0x0006,
70 SHIFT_COUNT_7 = 0x0007,
71
72 /* Shift directions */
73 SHIFT_DIR_LEFT = 0x0100,
74 SHIFT_DIR_RIGHT = 0x0200,
75
76 /* Shift modes */
77 SHIFT_MODE_ARITH = 0x1000,
78 SHIFT_MODE_LOGICAL = 0x2000,
79
80 /* Shift types */
81 SHIFT_TYPE_ASL = SHIFT_DIR_LEFT | SHIFT_MODE_ARITH,
82 SHIFT_TYPE_ASR = SHIFT_DIR_RIGHT | SHIFT_MODE_ARITH,
83 SHIFT_TYPE_LSL = SHIFT_DIR_LEFT | SHIFT_MODE_LOGICAL,
84 SHIFT_TYPE_LSR = SHIFT_DIR_RIGHT | SHIFT_MODE_LOGICAL,
85
86 /* Complete specs */
87 SHIFT_ASL_Y = SHIFT_TYPE_ASL | SHIFT_COUNT_Y,
88 SHIFT_ASR_Y = SHIFT_TYPE_ASR | SHIFT_COUNT_Y,
89 SHIFT_LSL_Y = SHIFT_TYPE_LSL | SHIFT_COUNT_Y,
90 SHIFT_LSR_Y = SHIFT_TYPE_LSR | SHIFT_COUNT_Y,
91
92 SHIFT_ASL_1 = SHIFT_TYPE_ASL | SHIFT_COUNT_1,
93 SHIFT_ASR_1 = SHIFT_TYPE_ASR | SHIFT_COUNT_1,
94 SHIFT_LSL_1 = SHIFT_TYPE_LSL | SHIFT_COUNT_1,
95 SHIFT_LSR_1 = SHIFT_TYPE_LSR | SHIFT_COUNT_1,
96
97 SHIFT_ASL_2 = SHIFT_TYPE_ASL | SHIFT_COUNT_2,
98 SHIFT_ASR_2 = SHIFT_TYPE_ASR | SHIFT_COUNT_2,
99 SHIFT_LSL_2 = SHIFT_TYPE_LSL | SHIFT_COUNT_2,
100 SHIFT_LSR_2 = SHIFT_TYPE_LSR | SHIFT_COUNT_2,
101
102 SHIFT_ASL_3 = SHIFT_TYPE_ASL | SHIFT_COUNT_3,
103 SHIFT_ASR_3 = SHIFT_TYPE_ASR | SHIFT_COUNT_3,
104 SHIFT_LSL_3 = SHIFT_TYPE_LSL | SHIFT_COUNT_3,
105 SHIFT_LSR_3 = SHIFT_TYPE_LSR | SHIFT_COUNT_3,
106
107 SHIFT_ASL_4 = SHIFT_TYPE_ASL | SHIFT_COUNT_4,
108 SHIFT_ASR_4 = SHIFT_TYPE_ASR | SHIFT_COUNT_4,
109 SHIFT_LSL_4 = SHIFT_TYPE_LSL | SHIFT_COUNT_4,
110 SHIFT_LSR_4 = SHIFT_TYPE_LSR | SHIFT_COUNT_4,
111
112 SHIFT_ASL_5 = SHIFT_TYPE_ASL | SHIFT_COUNT_5,
113 SHIFT_ASR_5 = SHIFT_TYPE_ASR | SHIFT_COUNT_5,
114 SHIFT_LSL_5 = SHIFT_TYPE_LSL | SHIFT_COUNT_5,
115 SHIFT_LSR_5 = SHIFT_TYPE_LSR | SHIFT_COUNT_5,
116
117 SHIFT_ASL_6 = SHIFT_TYPE_ASL | SHIFT_COUNT_6,
118 SHIFT_ASR_6 = SHIFT_TYPE_ASR | SHIFT_COUNT_6,
119 SHIFT_LSL_6 = SHIFT_TYPE_LSL | SHIFT_COUNT_6,
120 SHIFT_LSR_6 = SHIFT_TYPE_LSR | SHIFT_COUNT_6,
121
122 SHIFT_ASL_7 = SHIFT_TYPE_ASL | SHIFT_COUNT_7,
123 SHIFT_ASR_7 = SHIFT_TYPE_ASR | SHIFT_COUNT_7,
124 SHIFT_LSL_7 = SHIFT_TYPE_LSL | SHIFT_COUNT_7,
125 SHIFT_LSR_7 = SHIFT_TYPE_LSR | SHIFT_COUNT_7,
126};
127
128
129
130/* Macros to extract values from a shift type */
131#define SHIFT_COUNT(S) ((S) & SHIFT_MASK_COUNT)
132#define SHIFT_DIR(S) ((S) & SHIFT_MASK_DIR)
133#define SHIFT_MODE(S) ((S) & SHIFT_MASK_MODE)
134#define SHIFT_TYPE(S) ((S) & SHIFT_MASK_TYPE)
135
136
137
138/*****************************************************************************/
139/* Helper routines */
140/*****************************************************************************/
141
142
143
144static unsigned GetShift (const char* Name)
145/* Determine the shift from the name of the subroutine */
146{
147 unsigned Type;
148
149 if (strncmp (Name, "aslax", 5) == 0) {
150 Type = SHIFT_TYPE_ASL;
151 } else if (strncmp (Name, "asrax", 5) == 0) {
152 Type = SHIFT_TYPE_ASR;
153 } else if (strncmp (Name, "shlax", 5) == 0) {
154 Type = SHIFT_TYPE_LSL;
155 } else if (strncmp (Name, "shrax", 5) == 0) {
156 Type = SHIFT_TYPE_LSR;
157 } else {
158 /* Nothing we know */
159 return SHIFT_NONE;
160 }
161
162 /* Get the count */
163 switch (Name[5]) {
164 case 'y': Type |= SHIFT_COUNT_Y; break;
165 case '1': Type |= SHIFT_COUNT_1; break;
166 case '2': Type |= SHIFT_COUNT_2; break;
167 case '3': Type |= SHIFT_COUNT_3; break;
168 case '4': Type |= SHIFT_COUNT_4; break;
169 case '5': Type |= SHIFT_COUNT_5; break;
170 case '6': Type |= SHIFT_COUNT_6; break;
171 case '7': Type |= SHIFT_COUNT_7; break;
172 default: return SHIFT_NONE;
173 }
174
175 /* Make sure nothing follows */
176 if (Name[6] == '\0') {
177 return Type;
178 } else {
179 return SHIFT_NONE;
180 }
181}
182
183
184
185/*****************************************************************************/
186/* Optimize shifts */
187/*****************************************************************************/
188
189
190
191unsigned OptShift1 (CodeSeg* S)
192/* A call to the shlaxN routine may get replaced by one or more asl insns
193** if the value of X is not used later. If X is used later, but it is zero
194** on entry and it's a shift by one, it may get replaced by:
195**
196** asl a
197** bcc L1
198** inx
199** L1:
200*/
201{
202 unsigned Changes = 0;
203 unsigned I;
204
205 /* Walk over the entries */
206 I = 0;
207 while (I < CS_GetEntryCount (S)) {
208
209 unsigned Shift;
210 CodeEntry* N;
211 CodeEntry* X;
212 CodeLabel* L;
213
214 /* Get next entry */
215 CodeEntry* E = CS_GetEntry (S, I);
216
217 /* Check for the sequence */
218 if (E->OPC == OP65_JSR &&
219 (Shift = GetShift (E->Arg)) != SHIFT_NONE &&
220 SHIFT_DIR (Shift) == SHIFT_DIR_LEFT) {
221
222
223 unsigned Count = SHIFT_COUNT (Shift);
224 if (!RegXUsed (S, I+1)) {
225
226 if (Count == SHIFT_COUNT_Y) {
227
228 CodeLabel* L;
229
230 if (S->CodeSizeFactor < 200) {
231 goto NextEntry;
232 }
233
234 /* Change into
235 **
236 ** L1: asl a
237 ** dey
238 ** bpl L1
239 ** ror a
240 */
241
242 /* asl a */
243 X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI);
244 CS_InsertEntry (S, X, I+1);
245 L = CS_GenLabel (S, X);
246
247 /* dey */
248 X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, E->LI);
249 CS_InsertEntry (S, X, I+2);
250
251 /* bpl L1 */
252 X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, E->LI);
253 CS_InsertEntry (S, X, I+3);
254
255 /* ror a */
256 X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, E->LI);
257 CS_InsertEntry (S, X, I+4);
258
259 } else {
260 /* Insert shift insns */
261 while (Count--) {
262 X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI);
263 CS_InsertEntry (S, X, I+1);
264 }
265 }
266
267 } else if (E->RI->In.RegX == 0 &&
268 Count == 1 &&
269 (N = CS_GetNextEntry (S, I)) != 0) {
270
271 /* asl a */
272 X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI);
273 CS_InsertEntry (S, X, I+1);
274
275 /* bcc L1 */
276 L = CS_GenLabel (S, N);
277 X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, E->LI);
278 CS_InsertEntry (S, X, I+2);
279
280 /* inx */
281 X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, E->LI);
282 CS_InsertEntry (S, X, I+3);
283
284 } else {
285
286 /* We won't handle this one */
287 goto NextEntry;
288
289 }
290
291 /* Delete the call to shlax */
292 CS_DelEntry (S, I);
293
294 /* Remember, we had changes */
295 ++Changes;
296 }
297
298NextEntry:
299 /* Next entry */
300 ++I;
301
302 }
303
304 /* Return the number of changes made */
305 return Changes;
306}
307
308
309
310unsigned OptShift2 (CodeSeg* S)
311/* The sequence
312**
313** bpl L
314** dex
315** L: jsr asraxN
316**
317** might be replaced by N copies of
318**
319** cmp #$80
320** ror a
321**
322** if X is not used later (X is assumed to be zero on entry).
323** If the sequence is followed immediately by another
324**
325** jsr asraxN
326**
327** then their shifts are combined.
328*/
329{
330 unsigned Changes = 0;
331 unsigned I = 0;
332
333 /* Walk over the entries */
334 while (I < CS_GetEntryCount (S)) {
335 unsigned Shift;
336 unsigned Count, Count2;
337 unsigned K;
338 CodeEntry* L[4];
339
340 /* Get next entry */
341 L[0] = CS_GetEntry (S, I);
342
343 /* Check for the sequence */
344 if ((L[0]->OPC == OP65_BPL || L[0]->OPC == OP65_BCC) &&
345 L[0]->JumpTo != 0 &&
346 CS_GetEntries (S, L+1, I+1, 3) &&
347 L[1]->OPC == OP65_DEX &&
348 L[0]->JumpTo->Owner == L[2] &&
349 !CS_RangeHasLabel (S, I, 2) &&
350 L[2]->OPC == OP65_JSR &&
351 SHIFT_TYPE (Shift = GetShift (L[2]->Arg)) == SHIFT_TYPE_ASR &&
352 (Count = SHIFT_COUNT (Shift)) > 0) {
353
354 if (L[3]->OPC == OP65_JSR &&
355 SHIFT_TYPE (Shift = GetShift (L[3]->Arg)) == SHIFT_TYPE_ASR &&
356 (Count2 = SHIFT_COUNT (Shift)) > 0) {
357
358 /* Found a second jsr asraxN */
359 Count += Count2;
360 K = 4;
361 } else {
362 K = 3;
363 }
364 if (Count * 100 <= S->CodeSizeFactor &&
365 !RegXUsed (S, I+K)) {
366
367 CodeEntry* X;
368 unsigned J = I+K;
369
370 /* Generate the replacement sequence */
371 do {
372 /* cmp #$80 */
373 X = NewCodeEntry (OP65_CMP, AM65_IMM, "$80", 0, L[2]->LI);
374 CS_InsertEntry (S, X, J++);
375
376 /* ror a */
377 X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI);
378 CS_InsertEntry (S, X, J++);
379 } while (--Count);
380
381 /* Remove the bpl/dex/jsr */
382 CS_DelEntries (S, I, K);
383
384 /* Remember, we had changes */
385 ++Changes;
386 }
387 }
388
389 /* Next entry */
390 ++I;
391 }
392
393 /* Return the number of changes made */
394 return Changes;
395}
396
397
398
399unsigned OptShift3 (CodeSeg* S)
400/* The sequence
401**
402** bcc L
403** inx
404** L: jsr shrax1
405**
406** may get replaced by
407**
408** ror a
409**
410** if X is zero on entry. For shift counts > 1, more
411**
412** shr a
413**
414** must be added.
415*/
416{
417 unsigned Changes = 0;
418 unsigned I;
419
420 /* Walk over the entries */
421 I = 0;
422 while (I < CS_GetEntryCount (S)) {
423
424 unsigned Shift;
425 unsigned Count;
426 CodeEntry* L[3];
427
428 /* Get next entry */
429 L[0] = CS_GetEntry (S, I);
430
431 /* Check for the sequence */
432 if ((L[0]->OPC == OP65_BCC || L[0]->OPC == OP65_JCC) &&
433 L[0]->JumpTo != 0 &&
434 L[0]->RI->In.RegX == 0 &&
435 CS_GetEntries (S, L+1, I+1, 2) &&
436 L[1]->OPC == OP65_INX &&
437 L[0]->JumpTo->Owner == L[2] &&
438 !CS_RangeHasLabel (S, I, 2) &&
439 L[2]->OPC == OP65_JSR &&
440 (Shift = GetShift (L[2]->Arg)) != SHIFT_NONE &&
441 SHIFT_DIR (Shift) == SHIFT_DIR_RIGHT &&
442 (Count = SHIFT_COUNT (Shift)) > 0) {
443
444 /* Add the replacement insn instead */
445 CodeEntry* X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI);
446 CS_InsertEntry (S, X, I+3);
447 while (--Count) {
448 X = NewCodeEntry (OP65_LSR, AM65_ACC, "a", 0, L[2]->LI);
449 CS_InsertEntry (S, X, I+4);
450 }
451
452 /* Remove the bcc/inx/jsr */
453 CS_DelEntries (S, I, 3);
454
455 /* Remember, we had changes */
456 ++Changes;
457
458 }
459
460 /* Next entry */
461 ++I;
462
463 }
464
465 /* Return the number of changes made */
466 return Changes;
467}
468
469
470
471unsigned OptShift4 (CodeSeg* S)
472/* Calls to the asraxN or shraxN routines may get replaced by one or more lsr
473** insns if the value of X is zero.
474*/
475{
476 unsigned Changes = 0;
477 unsigned I;
478
479 /* Walk over the entries */
480 I = 0;
481 while (I < CS_GetEntryCount (S)) {
482
483 unsigned Shift;
484 unsigned Count;
485
486 /* Get next entry */
487 CodeEntry* E = CS_GetEntry (S, I);
488
489 /* Check for the sequence */
490 if (E->OPC == OP65_JSR &&
491 (Shift = GetShift (E->Arg)) != SHIFT_NONE &&
492 SHIFT_DIR (Shift) == SHIFT_DIR_RIGHT &&
493 E->RI->In.RegX == 0) {
494
495 CodeEntry* X;
496
497 /* Shift count may be in Y */
498 Count = SHIFT_COUNT (Shift);
499 if (Count == SHIFT_COUNT_Y) {
500
501 CodeLabel* L;
502
503 if (S->CodeSizeFactor < 200) {
504 /* Not acceptable */
505 goto NextEntry;
506 }
507
508 /* Generate:
509 **
510 ** L1: lsr a
511 ** dey
512 ** bpl L1
513 ** rol a
514 **
515 ** A negative shift count or one that is greater or equal than
516 ** the bit width of the left operand (which is promoted to
517 ** integer before the operation) causes undefined behaviour, so
518 ** above transformation is safe.
519 */
520
521 /* lsr a */
522 X = NewCodeEntry (OP65_LSR, AM65_ACC, "a", 0, E->LI);
523 CS_InsertEntry (S, X, I+1);
524 L = CS_GenLabel (S, X);
525
526 /* dey */
527 X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, E->LI);
528 CS_InsertEntry (S, X, I+2);
529
530 /* bpl L1 */
531 X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, E->LI);
532 CS_InsertEntry (S, X, I+3);
533
534 /* rol a */
535 X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, E->LI);
536 CS_InsertEntry (S, X, I+4);
537
538 } else {
539 /* Insert shift insns */
540 while (Count--) {
541 X = NewCodeEntry (OP65_LSR, AM65_ACC, "a", 0, E->LI);
542 CS_InsertEntry (S, X, I+1);
543 }
544
545 }
546
547 /* Delete the call to shrax */
548 CS_DelEntry (S, I);
549
550 /* Remember, we had changes */
551 ++Changes;
552
553 }
554
555NextEntry:
556 /* Next entry */
557 ++I;
558
559 }
560
561 /* Return the number of changes made */
562 return Changes;
563}
564
565
566
567unsigned OptShift5 (CodeSeg* S)
568/* Search for the sequence
569**
570** lda xxx
571** ldx yyy
572** jsr aslax1/asrax1/shlax1/shrax1
573** sta aaa
574** stx bbb
575**
576** and replace it by
577**
578** lda xxx
579** asl a
580** sta aaa
581** lda yyy
582** rol a
583** sta bbb
584**
585** or similar, provided that a/x is not used later
586*/
587{
588 unsigned Changes = 0;
589
590 /* Walk over the entries */
591 unsigned I = 0;
592 while (I < CS_GetEntryCount (S)) {
593
594 unsigned ShiftType;
595 CodeEntry* L[5];
596
597 /* Get next entry */
598 L[0] = CS_GetEntry (S, I);
599
600 /* Check for the sequence */
601 if (L[0]->OPC == OP65_LDA &&
602 (L[0]->AM == AM65_ABS || L[0]->AM == AM65_ZP) &&
603 CS_GetEntries (S, L+1, I+1, 4) &&
604 !CS_RangeHasLabel (S, I+1, 4) &&
605 L[1]->OPC == OP65_LDX &&
606 (L[1]->AM == AM65_ABS || L[1]->AM == AM65_ZP) &&
607 L[2]->OPC == OP65_JSR &&
608 (ShiftType = GetShift (L[2]->Arg)) != SHIFT_NONE &&
609 SHIFT_COUNT(ShiftType) == 1 &&
610 L[3]->OPC == OP65_STA &&
611 (L[3]->AM == AM65_ABS || L[3]->AM == AM65_ZP) &&
612 L[4]->OPC == OP65_STX &&
613 (L[4]->AM == AM65_ABS || L[4]->AM == AM65_ZP) &&
614 !RegAXUsed (S, I+5)) {
615
616 CodeEntry* X;
617
618 /* Handle the four shift types differently */
619 switch (ShiftType) {
620
621 case SHIFT_ASR_1:
622 X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
623 CS_InsertEntry (S, X, I+5);
624 X = NewCodeEntry (OP65_CMP, AM65_IMM, "$80", 0, L[2]->LI);
625 CS_InsertEntry (S, X, I+6);
626 X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI);
627 CS_InsertEntry (S, X, I+7);
628 X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI);
629 CS_InsertEntry (S, X, I+8);
630 X = NewCodeEntry (OP65_LDA, L[0]->AM, L[0]->Arg, 0, L[0]->LI);
631 CS_InsertEntry (S, X, I+9);
632 X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI);
633 CS_InsertEntry (S, X, I+10);
634 X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI);
635 CS_InsertEntry (S, X, I+11);
636 CS_DelEntries (S, I, 5);
637 break;
638
639 case SHIFT_LSR_1:
640 X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
641 CS_InsertEntry (S, X, I+5);
642 X = NewCodeEntry (OP65_LSR, AM65_ACC, "a", 0, L[2]->LI);
643 CS_InsertEntry (S, X, I+6);
644 X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI);
645 CS_InsertEntry (S, X, I+7);
646 X = NewCodeEntry (OP65_LDA, L[0]->AM, L[0]->Arg, 0, L[0]->LI);
647 CS_InsertEntry (S, X, I+8);
648 X = NewCodeEntry (OP65_ROR, AM65_ACC, "a", 0, L[2]->LI);
649 CS_InsertEntry (S, X, I+9);
650 X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI);
651 CS_InsertEntry (S, X, I+10);
652 CS_DelEntries (S, I, 5);
653 break;
654
655 case SHIFT_LSL_1:
656 case SHIFT_ASL_1:
657 /* These two are identical */
658 X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, L[2]->LI);
659 CS_InsertEntry (S, X, I+1);
660 X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI);
661 CS_InsertEntry (S, X, I+2);
662 X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI);
663 CS_InsertEntry (S, X, I+3);
664 X = NewCodeEntry (OP65_ROL, AM65_ACC, "a", 0, L[2]->LI);
665 CS_InsertEntry (S, X, I+4);
666 X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI);
667 CS_InsertEntry (S, X, I+5);
668 CS_DelEntries (S, I+6, 4);
669 break;
670
671 }
672
673 /* Remember, we had changes */
674 ++Changes;
675
676 }
677
678 /* Next entry */
679 ++I;
680
681 }
682
683 /* Return the number of changes made */
684 return Changes;
685}
686
687
688
689unsigned OptShift6 (CodeSeg* S)
690/* Inline the shift subroutines. */
691{
692 unsigned Changes = 0;
693
694 /* Walk over the entries */
695 unsigned I = 0;
696 while (I < CS_GetEntryCount (S)) {
697
698 unsigned Shift;
699 unsigned Count;
700 CodeEntry* X;
701 unsigned IP;
702
703 /* Get next entry */
704 CodeEntry* E = CS_GetEntry (S, I);
705
706 /* Check for a call to one of the shift routine */
707 if (E->OPC == OP65_JSR &&
708 (Shift = GetShift (E->Arg)) != SHIFT_NONE &&
709 SHIFT_DIR (Shift) == SHIFT_DIR_LEFT &&
710 (Count = SHIFT_COUNT (Shift)) > 0) {
711
712 /* Code is:
713 **
714 ** stx tmp1
715 ** asl a
716 ** rol tmp1
717 ** (repeat ShiftCount-1 times)
718 ** ldx tmp1
719 **
720 ** which makes 4 + 3 * ShiftCount bytes, compared to the original
721 ** 3 bytes for the subroutine call. However, in most cases, the
722 ** final load of the X register gets merged with some other insn
723 ** and replaces a txa, so for a shift count of 1, we get a factor
724 ** of 200, which matches nicely the CodeSizeFactor enabled with -Oi
725 */
726 if (Count > 1 || S->CodeSizeFactor > 200) {
727 unsigned Size = 4 + 3 * Count;
728 if ((Size * 100 / 3) > S->CodeSizeFactor) {
729 /* Not acceptable */
730 goto NextEntry;
731 }
732 }
733
734 /* Inline the code. Insertion point is behind the subroutine call */
735 IP = (I + 1);
736
737 /* stx tmp1 */
738 X = NewCodeEntry (OP65_STX, AM65_ZP, "tmp1", 0, E->LI);
739 CS_InsertEntry (S, X, IP++);
740
741 while (Count--) {
742 /* asl a */
743 X = NewCodeEntry (OP65_ASL, AM65_ACC, "a", 0, E->LI);
744 CS_InsertEntry (S, X, IP++);
745
746 /* rol tmp1 */
747 X = NewCodeEntry (OP65_ROL, AM65_ZP, "tmp1", 0, E->LI);
748 CS_InsertEntry (S, X, IP++);
749 }
750
751 /* ldx tmp1 */
752 X = NewCodeEntry (OP65_LDX, AM65_ZP, "tmp1", 0, E->LI);
753 CS_InsertEntry (S, X, IP++);
754
755 /* Remove the subroutine call */
756 CS_DelEntry (S, I);
757
758 /* Remember, we had changes */
759 ++Changes;
760 }
761
762NextEntry:
763 /* Next entry */
764 ++I;
765
766 }
767
768 /* Return the number of changes made */
769 return Changes;
770}
771