1 | /*****************************************************************************/ |
2 | /* */ |
3 | /* coptshift.c */ |
4 | /* */ |
5 | /* Optimize shifts */ |
6 | /* */ |
7 | /* */ |
8 | /* */ |
9 | /* (C) 2001-2012, Ullrich von Bassewitz */ |
10 | /* Roemerstrasse 52 */ |
11 | /* D-70794 Filderstadt */ |
12 | /* EMail: uz@cc65.org */ |
13 | /* */ |
14 | /* */ |
15 | /* This software is provided 'as-is', without any expressed or implied */ |
16 | /* warranty. In no event will the authors be held liable for any damages */ |
17 | /* arising from the use of this software. */ |
18 | /* */ |
19 | /* Permission is granted to anyone to use this software for any purpose, */ |
20 | /* including commercial applications, and to alter it and redistribute it */ |
21 | /* freely, subject to the following restrictions: */ |
22 | /* */ |
23 | /* 1. The origin of this software must not be misrepresented; you must not */ |
24 | /* claim that you wrote the original software. If you use this software */ |
25 | /* in a product, an acknowledgment in the product documentation would be */ |
26 | /* appreciated but is not required. */ |
27 | /* 2. Altered source versions must be plainly marked as such, and must not */ |
28 | /* be misrepresented as being the original software. */ |
29 | /* 3. This notice may not be removed or altered from any source */ |
30 | /* distribution. */ |
31 | /* */ |
32 | /*****************************************************************************/ |
33 | |
34 | |
35 | |
36 | /* common */ |
37 | #include "chartype.h" |
38 | |
39 | /* cc65 */ |
40 | #include "codeent.h" |
41 | #include "codeinfo.h" |
42 | #include "coptshift.h" |
43 | |
44 | |
45 | |
46 | /*****************************************************************************/ |
47 | /* Data */ |
48 | /*****************************************************************************/ |
49 | |
50 | |
51 | |
52 | /* Shift types. Shift type is in the first byte, shift count in the second */ |
53 | enum { |
54 | SHIFT_NONE = 0x0000, |
55 | |
56 | /* Masks */ |
57 | SHIFT_MASK_COUNT = 0x00FF, |
58 | SHIFT_MASK_DIR = 0x0F00, |
59 | SHIFT_MASK_MODE = 0xF000, /* Arithmetic or logical */ |
60 | SHIFT_MASK_TYPE = SHIFT_MASK_DIR | SHIFT_MASK_MODE, |
61 | |
62 | /* Shift counts */ |
63 | SHIFT_COUNT_Y = 0x0000, /* Count is in Y register */ |
64 | SHIFT_COUNT_1 = 0x0001, |
65 | SHIFT_COUNT_2 = 0x0002, |
66 | SHIFT_COUNT_3 = 0x0003, |
67 | SHIFT_COUNT_4 = 0x0004, |
68 | SHIFT_COUNT_5 = 0x0005, |
69 | SHIFT_COUNT_6 = 0x0006, |
70 | SHIFT_COUNT_7 = 0x0007, |
71 | |
72 | /* Shift directions */ |
73 | SHIFT_DIR_LEFT = 0x0100, |
74 | SHIFT_DIR_RIGHT = 0x0200, |
75 | |
76 | /* Shift modes */ |
77 | SHIFT_MODE_ARITH = 0x1000, |
78 | SHIFT_MODE_LOGICAL = 0x2000, |
79 | |
80 | /* Shift types */ |
81 | SHIFT_TYPE_ASL = SHIFT_DIR_LEFT | SHIFT_MODE_ARITH, |
82 | SHIFT_TYPE_ASR = SHIFT_DIR_RIGHT | SHIFT_MODE_ARITH, |
83 | SHIFT_TYPE_LSL = SHIFT_DIR_LEFT | SHIFT_MODE_LOGICAL, |
84 | SHIFT_TYPE_LSR = SHIFT_DIR_RIGHT | SHIFT_MODE_LOGICAL, |
85 | |
86 | /* Complete specs */ |
87 | SHIFT_ASL_Y = SHIFT_TYPE_ASL | SHIFT_COUNT_Y, |
88 | SHIFT_ASR_Y = SHIFT_TYPE_ASR | SHIFT_COUNT_Y, |
89 | SHIFT_LSL_Y = SHIFT_TYPE_LSL | SHIFT_COUNT_Y, |
90 | SHIFT_LSR_Y = SHIFT_TYPE_LSR | SHIFT_COUNT_Y, |
91 | |
92 | SHIFT_ASL_1 = SHIFT_TYPE_ASL | SHIFT_COUNT_1, |
93 | SHIFT_ASR_1 = SHIFT_TYPE_ASR | SHIFT_COUNT_1, |
94 | SHIFT_LSL_1 = SHIFT_TYPE_LSL | SHIFT_COUNT_1, |
95 | SHIFT_LSR_1 = SHIFT_TYPE_LSR | SHIFT_COUNT_1, |
96 | |
97 | SHIFT_ASL_2 = SHIFT_TYPE_ASL | SHIFT_COUNT_2, |
98 | SHIFT_ASR_2 = SHIFT_TYPE_ASR | SHIFT_COUNT_2, |
99 | SHIFT_LSL_2 = SHIFT_TYPE_LSL | SHIFT_COUNT_2, |
100 | SHIFT_LSR_2 = SHIFT_TYPE_LSR | SHIFT_COUNT_2, |
101 | |
102 | SHIFT_ASL_3 = SHIFT_TYPE_ASL | SHIFT_COUNT_3, |
103 | SHIFT_ASR_3 = SHIFT_TYPE_ASR | SHIFT_COUNT_3, |
104 | SHIFT_LSL_3 = SHIFT_TYPE_LSL | SHIFT_COUNT_3, |
105 | SHIFT_LSR_3 = SHIFT_TYPE_LSR | SHIFT_COUNT_3, |
106 | |
107 | SHIFT_ASL_4 = SHIFT_TYPE_ASL | SHIFT_COUNT_4, |
108 | SHIFT_ASR_4 = SHIFT_TYPE_ASR | SHIFT_COUNT_4, |
109 | SHIFT_LSL_4 = SHIFT_TYPE_LSL | SHIFT_COUNT_4, |
110 | SHIFT_LSR_4 = SHIFT_TYPE_LSR | SHIFT_COUNT_4, |
111 | |
112 | SHIFT_ASL_5 = SHIFT_TYPE_ASL | SHIFT_COUNT_5, |
113 | SHIFT_ASR_5 = SHIFT_TYPE_ASR | SHIFT_COUNT_5, |
114 | SHIFT_LSL_5 = SHIFT_TYPE_LSL | SHIFT_COUNT_5, |
115 | SHIFT_LSR_5 = SHIFT_TYPE_LSR | SHIFT_COUNT_5, |
116 | |
117 | SHIFT_ASL_6 = SHIFT_TYPE_ASL | SHIFT_COUNT_6, |
118 | SHIFT_ASR_6 = SHIFT_TYPE_ASR | SHIFT_COUNT_6, |
119 | SHIFT_LSL_6 = SHIFT_TYPE_LSL | SHIFT_COUNT_6, |
120 | SHIFT_LSR_6 = SHIFT_TYPE_LSR | SHIFT_COUNT_6, |
121 | |
122 | SHIFT_ASL_7 = SHIFT_TYPE_ASL | SHIFT_COUNT_7, |
123 | SHIFT_ASR_7 = SHIFT_TYPE_ASR | SHIFT_COUNT_7, |
124 | SHIFT_LSL_7 = SHIFT_TYPE_LSL | SHIFT_COUNT_7, |
125 | SHIFT_LSR_7 = SHIFT_TYPE_LSR | SHIFT_COUNT_7, |
126 | }; |
127 | |
128 | |
129 | |
130 | /* Macros to extract values from a shift type */ |
131 | #define SHIFT_COUNT(S) ((S) & SHIFT_MASK_COUNT) |
132 | #define SHIFT_DIR(S) ((S) & SHIFT_MASK_DIR) |
133 | #define SHIFT_MODE(S) ((S) & SHIFT_MASK_MODE) |
134 | #define SHIFT_TYPE(S) ((S) & SHIFT_MASK_TYPE) |
135 | |
136 | |
137 | |
138 | /*****************************************************************************/ |
139 | /* Helper routines */ |
140 | /*****************************************************************************/ |
141 | |
142 | |
143 | |
144 | static unsigned GetShift (const char* Name) |
145 | /* Determine the shift from the name of the subroutine */ |
146 | { |
147 | unsigned Type; |
148 | |
149 | if (strncmp (Name, "aslax" , 5) == 0) { |
150 | Type = SHIFT_TYPE_ASL; |
151 | } else if (strncmp (Name, "asrax" , 5) == 0) { |
152 | Type = SHIFT_TYPE_ASR; |
153 | } else if (strncmp (Name, "shlax" , 5) == 0) { |
154 | Type = SHIFT_TYPE_LSL; |
155 | } else if (strncmp (Name, "shrax" , 5) == 0) { |
156 | Type = SHIFT_TYPE_LSR; |
157 | } else { |
158 | /* Nothing we know */ |
159 | return SHIFT_NONE; |
160 | } |
161 | |
162 | /* Get the count */ |
163 | switch (Name[5]) { |
164 | case 'y': Type |= SHIFT_COUNT_Y; break; |
165 | case '1': Type |= SHIFT_COUNT_1; break; |
166 | case '2': Type |= SHIFT_COUNT_2; break; |
167 | case '3': Type |= SHIFT_COUNT_3; break; |
168 | case '4': Type |= SHIFT_COUNT_4; break; |
169 | case '5': Type |= SHIFT_COUNT_5; break; |
170 | case '6': Type |= SHIFT_COUNT_6; break; |
171 | case '7': Type |= SHIFT_COUNT_7; break; |
172 | default: return SHIFT_NONE; |
173 | } |
174 | |
175 | /* Make sure nothing follows */ |
176 | if (Name[6] == '\0') { |
177 | return Type; |
178 | } else { |
179 | return SHIFT_NONE; |
180 | } |
181 | } |
182 | |
183 | |
184 | |
185 | /*****************************************************************************/ |
186 | /* Optimize shifts */ |
187 | /*****************************************************************************/ |
188 | |
189 | |
190 | |
191 | unsigned OptShift1 (CodeSeg* S) |
192 | /* A call to the shlaxN routine may get replaced by one or more asl insns |
193 | ** if the value of X is not used later. If X is used later, but it is zero |
194 | ** on entry and it's a shift by one, it may get replaced by: |
195 | ** |
196 | ** asl a |
197 | ** bcc L1 |
198 | ** inx |
199 | ** L1: |
200 | */ |
201 | { |
202 | unsigned Changes = 0; |
203 | unsigned I; |
204 | |
205 | /* Walk over the entries */ |
206 | I = 0; |
207 | while (I < CS_GetEntryCount (S)) { |
208 | |
209 | unsigned Shift; |
210 | CodeEntry* N; |
211 | CodeEntry* X; |
212 | CodeLabel* L; |
213 | |
214 | /* Get next entry */ |
215 | CodeEntry* E = CS_GetEntry (S, I); |
216 | |
217 | /* Check for the sequence */ |
218 | if (E->OPC == OP65_JSR && |
219 | (Shift = GetShift (E->Arg)) != SHIFT_NONE && |
220 | SHIFT_DIR (Shift) == SHIFT_DIR_LEFT) { |
221 | |
222 | |
223 | unsigned Count = SHIFT_COUNT (Shift); |
224 | if (!RegXUsed (S, I+1)) { |
225 | |
226 | if (Count == SHIFT_COUNT_Y) { |
227 | |
228 | CodeLabel* L; |
229 | |
230 | if (S->CodeSizeFactor < 200) { |
231 | goto NextEntry; |
232 | } |
233 | |
234 | /* Change into |
235 | ** |
236 | ** L1: asl a |
237 | ** dey |
238 | ** bpl L1 |
239 | ** ror a |
240 | */ |
241 | |
242 | /* asl a */ |
243 | X = NewCodeEntry (OP65_ASL, AM65_ACC, "a" , 0, E->LI); |
244 | CS_InsertEntry (S, X, I+1); |
245 | L = CS_GenLabel (S, X); |
246 | |
247 | /* dey */ |
248 | X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, E->LI); |
249 | CS_InsertEntry (S, X, I+2); |
250 | |
251 | /* bpl L1 */ |
252 | X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, E->LI); |
253 | CS_InsertEntry (S, X, I+3); |
254 | |
255 | /* ror a */ |
256 | X = NewCodeEntry (OP65_ROR, AM65_ACC, "a" , 0, E->LI); |
257 | CS_InsertEntry (S, X, I+4); |
258 | |
259 | } else { |
260 | /* Insert shift insns */ |
261 | while (Count--) { |
262 | X = NewCodeEntry (OP65_ASL, AM65_ACC, "a" , 0, E->LI); |
263 | CS_InsertEntry (S, X, I+1); |
264 | } |
265 | } |
266 | |
267 | } else if (E->RI->In.RegX == 0 && |
268 | Count == 1 && |
269 | (N = CS_GetNextEntry (S, I)) != 0) { |
270 | |
271 | /* asl a */ |
272 | X = NewCodeEntry (OP65_ASL, AM65_ACC, "a" , 0, E->LI); |
273 | CS_InsertEntry (S, X, I+1); |
274 | |
275 | /* bcc L1 */ |
276 | L = CS_GenLabel (S, N); |
277 | X = NewCodeEntry (OP65_BCC, AM65_BRA, L->Name, L, E->LI); |
278 | CS_InsertEntry (S, X, I+2); |
279 | |
280 | /* inx */ |
281 | X = NewCodeEntry (OP65_INX, AM65_IMP, 0, 0, E->LI); |
282 | CS_InsertEntry (S, X, I+3); |
283 | |
284 | } else { |
285 | |
286 | /* We won't handle this one */ |
287 | goto NextEntry; |
288 | |
289 | } |
290 | |
291 | /* Delete the call to shlax */ |
292 | CS_DelEntry (S, I); |
293 | |
294 | /* Remember, we had changes */ |
295 | ++Changes; |
296 | } |
297 | |
298 | NextEntry: |
299 | /* Next entry */ |
300 | ++I; |
301 | |
302 | } |
303 | |
304 | /* Return the number of changes made */ |
305 | return Changes; |
306 | } |
307 | |
308 | |
309 | |
310 | unsigned OptShift2 (CodeSeg* S) |
311 | /* The sequence |
312 | ** |
313 | ** bpl L |
314 | ** dex |
315 | ** L: jsr asraxN |
316 | ** |
317 | ** might be replaced by N copies of |
318 | ** |
319 | ** cmp #$80 |
320 | ** ror a |
321 | ** |
322 | ** if X is not used later (X is assumed to be zero on entry). |
323 | ** If the sequence is followed immediately by another |
324 | ** |
325 | ** jsr asraxN |
326 | ** |
327 | ** then their shifts are combined. |
328 | */ |
329 | { |
330 | unsigned Changes = 0; |
331 | unsigned I = 0; |
332 | |
333 | /* Walk over the entries */ |
334 | while (I < CS_GetEntryCount (S)) { |
335 | unsigned Shift; |
336 | unsigned Count, Count2; |
337 | unsigned K; |
338 | CodeEntry* L[4]; |
339 | |
340 | /* Get next entry */ |
341 | L[0] = CS_GetEntry (S, I); |
342 | |
343 | /* Check for the sequence */ |
344 | if ((L[0]->OPC == OP65_BPL || L[0]->OPC == OP65_BCC) && |
345 | L[0]->JumpTo != 0 && |
346 | CS_GetEntries (S, L+1, I+1, 3) && |
347 | L[1]->OPC == OP65_DEX && |
348 | L[0]->JumpTo->Owner == L[2] && |
349 | !CS_RangeHasLabel (S, I, 2) && |
350 | L[2]->OPC == OP65_JSR && |
351 | SHIFT_TYPE (Shift = GetShift (L[2]->Arg)) == SHIFT_TYPE_ASR && |
352 | (Count = SHIFT_COUNT (Shift)) > 0) { |
353 | |
354 | if (L[3]->OPC == OP65_JSR && |
355 | SHIFT_TYPE (Shift = GetShift (L[3]->Arg)) == SHIFT_TYPE_ASR && |
356 | (Count2 = SHIFT_COUNT (Shift)) > 0) { |
357 | |
358 | /* Found a second jsr asraxN */ |
359 | Count += Count2; |
360 | K = 4; |
361 | } else { |
362 | K = 3; |
363 | } |
364 | if (Count * 100 <= S->CodeSizeFactor && |
365 | !RegXUsed (S, I+K)) { |
366 | |
367 | CodeEntry* X; |
368 | unsigned J = I+K; |
369 | |
370 | /* Generate the replacement sequence */ |
371 | do { |
372 | /* cmp #$80 */ |
373 | X = NewCodeEntry (OP65_CMP, AM65_IMM, "$80" , 0, L[2]->LI); |
374 | CS_InsertEntry (S, X, J++); |
375 | |
376 | /* ror a */ |
377 | X = NewCodeEntry (OP65_ROR, AM65_ACC, "a" , 0, L[2]->LI); |
378 | CS_InsertEntry (S, X, J++); |
379 | } while (--Count); |
380 | |
381 | /* Remove the bpl/dex/jsr */ |
382 | CS_DelEntries (S, I, K); |
383 | |
384 | /* Remember, we had changes */ |
385 | ++Changes; |
386 | } |
387 | } |
388 | |
389 | /* Next entry */ |
390 | ++I; |
391 | } |
392 | |
393 | /* Return the number of changes made */ |
394 | return Changes; |
395 | } |
396 | |
397 | |
398 | |
399 | unsigned OptShift3 (CodeSeg* S) |
400 | /* The sequence |
401 | ** |
402 | ** bcc L |
403 | ** inx |
404 | ** L: jsr shrax1 |
405 | ** |
406 | ** may get replaced by |
407 | ** |
408 | ** ror a |
409 | ** |
410 | ** if X is zero on entry. For shift counts > 1, more |
411 | ** |
412 | ** shr a |
413 | ** |
414 | ** must be added. |
415 | */ |
416 | { |
417 | unsigned Changes = 0; |
418 | unsigned I; |
419 | |
420 | /* Walk over the entries */ |
421 | I = 0; |
422 | while (I < CS_GetEntryCount (S)) { |
423 | |
424 | unsigned Shift; |
425 | unsigned Count; |
426 | CodeEntry* L[3]; |
427 | |
428 | /* Get next entry */ |
429 | L[0] = CS_GetEntry (S, I); |
430 | |
431 | /* Check for the sequence */ |
432 | if ((L[0]->OPC == OP65_BCC || L[0]->OPC == OP65_JCC) && |
433 | L[0]->JumpTo != 0 && |
434 | L[0]->RI->In.RegX == 0 && |
435 | CS_GetEntries (S, L+1, I+1, 2) && |
436 | L[1]->OPC == OP65_INX && |
437 | L[0]->JumpTo->Owner == L[2] && |
438 | !CS_RangeHasLabel (S, I, 2) && |
439 | L[2]->OPC == OP65_JSR && |
440 | (Shift = GetShift (L[2]->Arg)) != SHIFT_NONE && |
441 | SHIFT_DIR (Shift) == SHIFT_DIR_RIGHT && |
442 | (Count = SHIFT_COUNT (Shift)) > 0) { |
443 | |
444 | /* Add the replacement insn instead */ |
445 | CodeEntry* X = NewCodeEntry (OP65_ROR, AM65_ACC, "a" , 0, L[2]->LI); |
446 | CS_InsertEntry (S, X, I+3); |
447 | while (--Count) { |
448 | X = NewCodeEntry (OP65_LSR, AM65_ACC, "a" , 0, L[2]->LI); |
449 | CS_InsertEntry (S, X, I+4); |
450 | } |
451 | |
452 | /* Remove the bcc/inx/jsr */ |
453 | CS_DelEntries (S, I, 3); |
454 | |
455 | /* Remember, we had changes */ |
456 | ++Changes; |
457 | |
458 | } |
459 | |
460 | /* Next entry */ |
461 | ++I; |
462 | |
463 | } |
464 | |
465 | /* Return the number of changes made */ |
466 | return Changes; |
467 | } |
468 | |
469 | |
470 | |
471 | unsigned OptShift4 (CodeSeg* S) |
472 | /* Calls to the asraxN or shraxN routines may get replaced by one or more lsr |
473 | ** insns if the value of X is zero. |
474 | */ |
475 | { |
476 | unsigned Changes = 0; |
477 | unsigned I; |
478 | |
479 | /* Walk over the entries */ |
480 | I = 0; |
481 | while (I < CS_GetEntryCount (S)) { |
482 | |
483 | unsigned Shift; |
484 | unsigned Count; |
485 | |
486 | /* Get next entry */ |
487 | CodeEntry* E = CS_GetEntry (S, I); |
488 | |
489 | /* Check for the sequence */ |
490 | if (E->OPC == OP65_JSR && |
491 | (Shift = GetShift (E->Arg)) != SHIFT_NONE && |
492 | SHIFT_DIR (Shift) == SHIFT_DIR_RIGHT && |
493 | E->RI->In.RegX == 0) { |
494 | |
495 | CodeEntry* X; |
496 | |
497 | /* Shift count may be in Y */ |
498 | Count = SHIFT_COUNT (Shift); |
499 | if (Count == SHIFT_COUNT_Y) { |
500 | |
501 | CodeLabel* L; |
502 | |
503 | if (S->CodeSizeFactor < 200) { |
504 | /* Not acceptable */ |
505 | goto NextEntry; |
506 | } |
507 | |
508 | /* Generate: |
509 | ** |
510 | ** L1: lsr a |
511 | ** dey |
512 | ** bpl L1 |
513 | ** rol a |
514 | ** |
515 | ** A negative shift count or one that is greater or equal than |
516 | ** the bit width of the left operand (which is promoted to |
517 | ** integer before the operation) causes undefined behaviour, so |
518 | ** above transformation is safe. |
519 | */ |
520 | |
521 | /* lsr a */ |
522 | X = NewCodeEntry (OP65_LSR, AM65_ACC, "a" , 0, E->LI); |
523 | CS_InsertEntry (S, X, I+1); |
524 | L = CS_GenLabel (S, X); |
525 | |
526 | /* dey */ |
527 | X = NewCodeEntry (OP65_DEY, AM65_IMP, 0, 0, E->LI); |
528 | CS_InsertEntry (S, X, I+2); |
529 | |
530 | /* bpl L1 */ |
531 | X = NewCodeEntry (OP65_BPL, AM65_BRA, L->Name, L, E->LI); |
532 | CS_InsertEntry (S, X, I+3); |
533 | |
534 | /* rol a */ |
535 | X = NewCodeEntry (OP65_ROL, AM65_ACC, "a" , 0, E->LI); |
536 | CS_InsertEntry (S, X, I+4); |
537 | |
538 | } else { |
539 | /* Insert shift insns */ |
540 | while (Count--) { |
541 | X = NewCodeEntry (OP65_LSR, AM65_ACC, "a" , 0, E->LI); |
542 | CS_InsertEntry (S, X, I+1); |
543 | } |
544 | |
545 | } |
546 | |
547 | /* Delete the call to shrax */ |
548 | CS_DelEntry (S, I); |
549 | |
550 | /* Remember, we had changes */ |
551 | ++Changes; |
552 | |
553 | } |
554 | |
555 | NextEntry: |
556 | /* Next entry */ |
557 | ++I; |
558 | |
559 | } |
560 | |
561 | /* Return the number of changes made */ |
562 | return Changes; |
563 | } |
564 | |
565 | |
566 | |
567 | unsigned OptShift5 (CodeSeg* S) |
568 | /* Search for the sequence |
569 | ** |
570 | ** lda xxx |
571 | ** ldx yyy |
572 | ** jsr aslax1/asrax1/shlax1/shrax1 |
573 | ** sta aaa |
574 | ** stx bbb |
575 | ** |
576 | ** and replace it by |
577 | ** |
578 | ** lda xxx |
579 | ** asl a |
580 | ** sta aaa |
581 | ** lda yyy |
582 | ** rol a |
583 | ** sta bbb |
584 | ** |
585 | ** or similar, provided that a/x is not used later |
586 | */ |
587 | { |
588 | unsigned Changes = 0; |
589 | |
590 | /* Walk over the entries */ |
591 | unsigned I = 0; |
592 | while (I < CS_GetEntryCount (S)) { |
593 | |
594 | unsigned ShiftType; |
595 | CodeEntry* L[5]; |
596 | |
597 | /* Get next entry */ |
598 | L[0] = CS_GetEntry (S, I); |
599 | |
600 | /* Check for the sequence */ |
601 | if (L[0]->OPC == OP65_LDA && |
602 | (L[0]->AM == AM65_ABS || L[0]->AM == AM65_ZP) && |
603 | CS_GetEntries (S, L+1, I+1, 4) && |
604 | !CS_RangeHasLabel (S, I+1, 4) && |
605 | L[1]->OPC == OP65_LDX && |
606 | (L[1]->AM == AM65_ABS || L[1]->AM == AM65_ZP) && |
607 | L[2]->OPC == OP65_JSR && |
608 | (ShiftType = GetShift (L[2]->Arg)) != SHIFT_NONE && |
609 | SHIFT_COUNT(ShiftType) == 1 && |
610 | L[3]->OPC == OP65_STA && |
611 | (L[3]->AM == AM65_ABS || L[3]->AM == AM65_ZP) && |
612 | L[4]->OPC == OP65_STX && |
613 | (L[4]->AM == AM65_ABS || L[4]->AM == AM65_ZP) && |
614 | !RegAXUsed (S, I+5)) { |
615 | |
616 | CodeEntry* X; |
617 | |
618 | /* Handle the four shift types differently */ |
619 | switch (ShiftType) { |
620 | |
621 | case SHIFT_ASR_1: |
622 | X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI); |
623 | CS_InsertEntry (S, X, I+5); |
624 | X = NewCodeEntry (OP65_CMP, AM65_IMM, "$80" , 0, L[2]->LI); |
625 | CS_InsertEntry (S, X, I+6); |
626 | X = NewCodeEntry (OP65_ROR, AM65_ACC, "a" , 0, L[2]->LI); |
627 | CS_InsertEntry (S, X, I+7); |
628 | X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI); |
629 | CS_InsertEntry (S, X, I+8); |
630 | X = NewCodeEntry (OP65_LDA, L[0]->AM, L[0]->Arg, 0, L[0]->LI); |
631 | CS_InsertEntry (S, X, I+9); |
632 | X = NewCodeEntry (OP65_ROR, AM65_ACC, "a" , 0, L[2]->LI); |
633 | CS_InsertEntry (S, X, I+10); |
634 | X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI); |
635 | CS_InsertEntry (S, X, I+11); |
636 | CS_DelEntries (S, I, 5); |
637 | break; |
638 | |
639 | case SHIFT_LSR_1: |
640 | X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI); |
641 | CS_InsertEntry (S, X, I+5); |
642 | X = NewCodeEntry (OP65_LSR, AM65_ACC, "a" , 0, L[2]->LI); |
643 | CS_InsertEntry (S, X, I+6); |
644 | X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI); |
645 | CS_InsertEntry (S, X, I+7); |
646 | X = NewCodeEntry (OP65_LDA, L[0]->AM, L[0]->Arg, 0, L[0]->LI); |
647 | CS_InsertEntry (S, X, I+8); |
648 | X = NewCodeEntry (OP65_ROR, AM65_ACC, "a" , 0, L[2]->LI); |
649 | CS_InsertEntry (S, X, I+9); |
650 | X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI); |
651 | CS_InsertEntry (S, X, I+10); |
652 | CS_DelEntries (S, I, 5); |
653 | break; |
654 | |
655 | case SHIFT_LSL_1: |
656 | case SHIFT_ASL_1: |
657 | /* These two are identical */ |
658 | X = NewCodeEntry (OP65_ASL, AM65_ACC, "a" , 0, L[2]->LI); |
659 | CS_InsertEntry (S, X, I+1); |
660 | X = NewCodeEntry (OP65_STA, L[3]->AM, L[3]->Arg, 0, L[3]->LI); |
661 | CS_InsertEntry (S, X, I+2); |
662 | X = NewCodeEntry (OP65_LDA, L[1]->AM, L[1]->Arg, 0, L[1]->LI); |
663 | CS_InsertEntry (S, X, I+3); |
664 | X = NewCodeEntry (OP65_ROL, AM65_ACC, "a" , 0, L[2]->LI); |
665 | CS_InsertEntry (S, X, I+4); |
666 | X = NewCodeEntry (OP65_STA, L[4]->AM, L[4]->Arg, 0, L[4]->LI); |
667 | CS_InsertEntry (S, X, I+5); |
668 | CS_DelEntries (S, I+6, 4); |
669 | break; |
670 | |
671 | } |
672 | |
673 | /* Remember, we had changes */ |
674 | ++Changes; |
675 | |
676 | } |
677 | |
678 | /* Next entry */ |
679 | ++I; |
680 | |
681 | } |
682 | |
683 | /* Return the number of changes made */ |
684 | return Changes; |
685 | } |
686 | |
687 | |
688 | |
689 | unsigned OptShift6 (CodeSeg* S) |
690 | /* Inline the shift subroutines. */ |
691 | { |
692 | unsigned Changes = 0; |
693 | |
694 | /* Walk over the entries */ |
695 | unsigned I = 0; |
696 | while (I < CS_GetEntryCount (S)) { |
697 | |
698 | unsigned Shift; |
699 | unsigned Count; |
700 | CodeEntry* X; |
701 | unsigned IP; |
702 | |
703 | /* Get next entry */ |
704 | CodeEntry* E = CS_GetEntry (S, I); |
705 | |
706 | /* Check for a call to one of the shift routine */ |
707 | if (E->OPC == OP65_JSR && |
708 | (Shift = GetShift (E->Arg)) != SHIFT_NONE && |
709 | SHIFT_DIR (Shift) == SHIFT_DIR_LEFT && |
710 | (Count = SHIFT_COUNT (Shift)) > 0) { |
711 | |
712 | /* Code is: |
713 | ** |
714 | ** stx tmp1 |
715 | ** asl a |
716 | ** rol tmp1 |
717 | ** (repeat ShiftCount-1 times) |
718 | ** ldx tmp1 |
719 | ** |
720 | ** which makes 4 + 3 * ShiftCount bytes, compared to the original |
721 | ** 3 bytes for the subroutine call. However, in most cases, the |
722 | ** final load of the X register gets merged with some other insn |
723 | ** and replaces a txa, so for a shift count of 1, we get a factor |
724 | ** of 200, which matches nicely the CodeSizeFactor enabled with -Oi |
725 | */ |
726 | if (Count > 1 || S->CodeSizeFactor > 200) { |
727 | unsigned Size = 4 + 3 * Count; |
728 | if ((Size * 100 / 3) > S->CodeSizeFactor) { |
729 | /* Not acceptable */ |
730 | goto NextEntry; |
731 | } |
732 | } |
733 | |
734 | /* Inline the code. Insertion point is behind the subroutine call */ |
735 | IP = (I + 1); |
736 | |
737 | /* stx tmp1 */ |
738 | X = NewCodeEntry (OP65_STX, AM65_ZP, "tmp1" , 0, E->LI); |
739 | CS_InsertEntry (S, X, IP++); |
740 | |
741 | while (Count--) { |
742 | /* asl a */ |
743 | X = NewCodeEntry (OP65_ASL, AM65_ACC, "a" , 0, E->LI); |
744 | CS_InsertEntry (S, X, IP++); |
745 | |
746 | /* rol tmp1 */ |
747 | X = NewCodeEntry (OP65_ROL, AM65_ZP, "tmp1" , 0, E->LI); |
748 | CS_InsertEntry (S, X, IP++); |
749 | } |
750 | |
751 | /* ldx tmp1 */ |
752 | X = NewCodeEntry (OP65_LDX, AM65_ZP, "tmp1" , 0, E->LI); |
753 | CS_InsertEntry (S, X, IP++); |
754 | |
755 | /* Remove the subroutine call */ |
756 | CS_DelEntry (S, I); |
757 | |
758 | /* Remember, we had changes */ |
759 | ++Changes; |
760 | } |
761 | |
762 | NextEntry: |
763 | /* Next entry */ |
764 | ++I; |
765 | |
766 | } |
767 | |
768 | /* Return the number of changes made */ |
769 | return Changes; |
770 | } |
771 | |