1/*
2** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3** ABCelim: Array Bounds Check Elimination.
4** CSE: Common-Subexpression Elimination.
5** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h
6*/
7
8#define lj_opt_fold_c
9#define LUA_CORE
10
11#include <math.h>
12
13#include "lj_obj.h"
14
15#if LJ_HASJIT
16
17#include "lj_str.h"
18#include "lj_tab.h"
19#include "lj_ir.h"
20#include "lj_jit.h"
21#include "lj_iropt.h"
22#include "lj_trace.h"
23#if LJ_HASFFI
24#include "lj_ctype.h"
25#endif
26#include "lj_carith.h"
27#include "lj_vm.h"
28#include "lj_strscan.h"
29
30/* Here's a short description how the FOLD engine processes instructions:
31**
32** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
33** The instruction and its operands are used to select matching fold rules.
34** These are applied iteratively until a fixed point is reached.
35**
36** The 8 bit opcode of the instruction itself plus the opcodes of the
37** two instructions referenced by its operands form a 24 bit key
38** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
39**
40** This key is used for partial matching against the fold rules. The
41** left/right operand fields of the key are successively masked with
42** the 'any' wildcard, from most specific to least specific:
43**
44** ins left right
45** ins any right
46** ins left any
47** ins any any
48**
49** The masked key is used to lookup a matching fold rule in a semi-perfect
50** hash table. If a matching rule is found, the related fold function is run.
51** Multiple rules can share the same fold function. A fold rule may return
52** one of several special values:
53**
54** - NEXTFOLD means no folding was applied, because an additional test
55** inside the fold function failed. Matching continues against less
56** specific fold rules. Finally the instruction is passed on to CSE.
57**
58** - RETRYFOLD means the instruction was modified in-place. Folding is
59** retried as if this instruction had just been received.
60**
61** All other return values are terminal actions -- no further folding is
62** applied:
63**
64** - INTFOLD(i) returns a reference to the integer constant i.
65**
66** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
67** without emitting an instruction.
68**
69** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
70** it without passing through any further optimizations.
71**
72** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
73** no result (e.g. guarded assertions): FAILFOLD means the guard would
74** always fail, i.e. the current trace is pointless. DROPFOLD means
75** the guard is always true and has been eliminated. CONDFOLD is a
76** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
77**
78** - Any other return value is interpreted as an IRRef or TRef. This
79** can be a reference to an existing or a newly created instruction.
80** Only the least-significant 16 bits (IRRef1) are used to form a TRef
81** which is finally returned to the caller.
82**
83** The FOLD engine receives instructions both from the trace recorder and
84** substituted instructions from LOOP unrolling. This means all types
85** of instructions may end up here, even though the recorder bypasses
86** FOLD in some cases. Thus all loads, stores and allocations must have
87** an any/any rule to avoid being passed on to CSE.
88**
89** Carefully read the following requirements before adding or modifying
90** any fold rules:
91**
92** Requirement #1: All fold rules must preserve their destination type.
93**
94** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
95** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
96**
97** Requirement #2: Fold rules should not create *new* instructions which
98** reference operands *across* PHIs.
99**
100** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
101** left operand is a PHI. Then fleft->op1 would point across the PHI
102** frontier to an invariant instruction. Adding a PHI for this instruction
103** would be counterproductive. The solution is to add a barrier which
104** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
105** The only exception is for recurrences with high latencies like
106** repeated int->num->int conversions.
107**
108** One could relax this condition a bit if the referenced instruction is
109** a PHI, too. But this often leads to worse code due to excessive
110** register shuffling.
111**
112** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
113** Even returning fleft->op1 would be ok, because a new PHI will added,
114** if needed. But again, this leads to excessive register shuffling and
115** should be avoided.
116**
117** Requirement #3: The set of all fold rules must be monotonic to guarantee
118** termination.
119**
120** The goal is optimization, so one primarily wants to add strength-reducing
121** rules. This means eliminating an instruction or replacing an instruction
122** with one or more simpler instructions. Don't add fold rules which point
123** into the other direction.
124**
125** Some rules (like commutativity) do not directly reduce the strength of
126** an instruction, but enable other fold rules (e.g. by moving constants
127** to the right operand). These rules must be made unidirectional to avoid
128** cycles.
129**
130** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
131*/
132
133/* Some local macros to save typing. Undef'd at the end. */
134#define IR(ref) (&J->cur.ir[(ref)])
135#define fins (&J->fold.ins)
136#define fleft (&J->fold.left)
137#define fright (&J->fold.right)
138#define knumleft (ir_knum(fleft)->n)
139#define knumright (ir_knum(fright)->n)
140
141/* Pass IR on to next optimization in chain (FOLD). */
142#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
143
144/* Fold function type. Fastcall on x86 significantly reduces their size. */
145typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
146
147/* Macros for the fold specs, so buildvm can recognize them. */
148#define LJFOLD(x)
149#define LJFOLDX(x)
150#define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J)
151/* Note: They must be at the start of a line or buildvm ignores them! */
152
153/* Barrier to prevent using operands across PHIs. */
154#define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
155
156/* Barrier to prevent folding across a GC step.
157** GC steps can only happen at the head of a trace and at LOOP.
158** And the GC is only driven forward if there is at least one allocation.
159*/
160#define gcstep_barrier(J, ref) \
161 ((ref) < J->chain[IR_LOOP] && \
162 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
163 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
164 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR]))
165
166/* -- Constant folding for FP numbers ------------------------------------- */
167
168LJFOLD(ADD KNUM KNUM)
169LJFOLD(SUB KNUM KNUM)
170LJFOLD(MUL KNUM KNUM)
171LJFOLD(DIV KNUM KNUM)
172LJFOLD(NEG KNUM KNUM)
173LJFOLD(ABS KNUM KNUM)
174LJFOLD(ATAN2 KNUM KNUM)
175LJFOLD(LDEXP KNUM KNUM)
176LJFOLD(MIN KNUM KNUM)
177LJFOLD(MAX KNUM KNUM)
178LJFOLDF(kfold_numarith)
179{
180 lua_Number a = knumleft;
181 lua_Number b = knumright;
182 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
183 return lj_ir_knum(J, y);
184}
185
186LJFOLD(LDEXP KNUM KINT)
187LJFOLDF(kfold_ldexp)
188{
189#if LJ_TARGET_X86ORX64
190 UNUSED(J);
191 return NEXTFOLD;
192#else
193 return lj_ir_knum(J, ldexp(knumleft, fright->i));
194#endif
195}
196
197LJFOLD(FPMATH KNUM any)
198LJFOLDF(kfold_fpmath)
199{
200 lua_Number a = knumleft;
201 lua_Number y = lj_vm_foldfpm(a, fins->op2);
202 return lj_ir_knum(J, y);
203}
204
205LJFOLD(POW KNUM KINT)
206LJFOLDF(kfold_numpow)
207{
208 lua_Number a = knumleft;
209 lua_Number b = (lua_Number)fright->i;
210 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
211 return lj_ir_knum(J, y);
212}
213
214/* Must not use kfold_kref for numbers (could be NaN). */
215LJFOLD(EQ KNUM KNUM)
216LJFOLD(NE KNUM KNUM)
217LJFOLD(LT KNUM KNUM)
218LJFOLD(GE KNUM KNUM)
219LJFOLD(LE KNUM KNUM)
220LJFOLD(GT KNUM KNUM)
221LJFOLD(ULT KNUM KNUM)
222LJFOLD(UGE KNUM KNUM)
223LJFOLD(ULE KNUM KNUM)
224LJFOLD(UGT KNUM KNUM)
225LJFOLDF(kfold_numcomp)
226{
227 return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o));
228}
229
230/* -- Constant folding for 32 bit integers -------------------------------- */
231
232static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
233{
234 switch (op) {
235 case IR_ADD: k1 += k2; break;
236 case IR_SUB: k1 -= k2; break;
237 case IR_MUL: k1 *= k2; break;
238 case IR_MOD: k1 = lj_vm_modi(k1, k2); break;
239 case IR_NEG: k1 = -k1; break;
240 case IR_BAND: k1 &= k2; break;
241 case IR_BOR: k1 |= k2; break;
242 case IR_BXOR: k1 ^= k2; break;
243 case IR_BSHL: k1 <<= (k2 & 31); break;
244 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break;
245 case IR_BSAR: k1 >>= (k2 & 31); break;
246 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break;
247 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
248 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
249 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
250 default: lua_assert(0); break;
251 }
252 return k1;
253}
254
255LJFOLD(ADD KINT KINT)
256LJFOLD(SUB KINT KINT)
257LJFOLD(MUL KINT KINT)
258LJFOLD(MOD KINT KINT)
259LJFOLD(NEG KINT KINT)
260LJFOLD(BAND KINT KINT)
261LJFOLD(BOR KINT KINT)
262LJFOLD(BXOR KINT KINT)
263LJFOLD(BSHL KINT KINT)
264LJFOLD(BSHR KINT KINT)
265LJFOLD(BSAR KINT KINT)
266LJFOLD(BROL KINT KINT)
267LJFOLD(BROR KINT KINT)
268LJFOLD(MIN KINT KINT)
269LJFOLD(MAX KINT KINT)
270LJFOLDF(kfold_intarith)
271{
272 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
273}
274
275LJFOLD(ADDOV KINT KINT)
276LJFOLD(SUBOV KINT KINT)
277LJFOLD(MULOV KINT KINT)
278LJFOLDF(kfold_intovarith)
279{
280 lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i,
281 fins->o - IR_ADDOV);
282 int32_t k = lj_num2int(n);
283 if (n != (lua_Number)k)
284 return FAILFOLD;
285 return INTFOLD(k);
286}
287
288LJFOLD(BNOT KINT)
289LJFOLDF(kfold_bnot)
290{
291 return INTFOLD(~fleft->i);
292}
293
294LJFOLD(BSWAP KINT)
295LJFOLDF(kfold_bswap)
296{
297 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i));
298}
299
300LJFOLD(LT KINT KINT)
301LJFOLD(GE KINT KINT)
302LJFOLD(LE KINT KINT)
303LJFOLD(GT KINT KINT)
304LJFOLD(ULT KINT KINT)
305LJFOLD(UGE KINT KINT)
306LJFOLD(ULE KINT KINT)
307LJFOLD(UGT KINT KINT)
308LJFOLD(ABC KINT KINT)
309LJFOLDF(kfold_intcomp)
310{
311 int32_t a = fleft->i, b = fright->i;
312 switch ((IROp)fins->o) {
313 case IR_LT: return CONDFOLD(a < b);
314 case IR_GE: return CONDFOLD(a >= b);
315 case IR_LE: return CONDFOLD(a <= b);
316 case IR_GT: return CONDFOLD(a > b);
317 case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b);
318 case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b);
319 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
320 case IR_ABC:
321 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
322 default: lua_assert(0); return FAILFOLD;
323 }
324}
325
326LJFOLD(UGE any KINT)
327LJFOLDF(kfold_intcomp0)
328{
329 if (fright->i == 0)
330 return DROPFOLD;
331 return NEXTFOLD;
332}
333
334/* -- Constant folding for 64 bit integers -------------------------------- */
335
336static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
337{
338 switch (op) {
339#if LJ_64 || LJ_HASFFI
340 case IR_ADD: k1 += k2; break;
341 case IR_SUB: k1 -= k2; break;
342#endif
343#if LJ_HASFFI
344 case IR_MUL: k1 *= k2; break;
345 case IR_BAND: k1 &= k2; break;
346 case IR_BOR: k1 |= k2; break;
347 case IR_BXOR: k1 ^= k2; break;
348#endif
349 default: UNUSED(k2); lua_assert(0); break;
350 }
351 return k1;
352}
353
354LJFOLD(ADD KINT64 KINT64)
355LJFOLD(SUB KINT64 KINT64)
356LJFOLD(MUL KINT64 KINT64)
357LJFOLD(BAND KINT64 KINT64)
358LJFOLD(BOR KINT64 KINT64)
359LJFOLD(BXOR KINT64 KINT64)
360LJFOLDF(kfold_int64arith)
361{
362 return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64,
363 ir_k64(fright)->u64, (IROp)fins->o));
364}
365
366LJFOLD(DIV KINT64 KINT64)
367LJFOLD(MOD KINT64 KINT64)
368LJFOLD(POW KINT64 KINT64)
369LJFOLDF(kfold_int64arith2)
370{
371#if LJ_HASFFI
372 uint64_t k1 = ir_k64(fleft)->u64, k2 = ir_k64(fright)->u64;
373 if (irt_isi64(fins->t)) {
374 k1 = fins->o == IR_DIV ? lj_carith_divi64((int64_t)k1, (int64_t)k2) :
375 fins->o == IR_MOD ? lj_carith_modi64((int64_t)k1, (int64_t)k2) :
376 lj_carith_powi64((int64_t)k1, (int64_t)k2);
377 } else {
378 k1 = fins->o == IR_DIV ? lj_carith_divu64(k1, k2) :
379 fins->o == IR_MOD ? lj_carith_modu64(k1, k2) :
380 lj_carith_powu64(k1, k2);
381 }
382 return INT64FOLD(k1);
383#else
384 UNUSED(J); lua_assert(0); return FAILFOLD;
385#endif
386}
387
388LJFOLD(BSHL KINT64 KINT)
389LJFOLD(BSHR KINT64 KINT)
390LJFOLD(BSAR KINT64 KINT)
391LJFOLD(BROL KINT64 KINT)
392LJFOLD(BROR KINT64 KINT)
393LJFOLDF(kfold_int64shift)
394{
395#if LJ_HASFFI || LJ_64
396 uint64_t k = ir_k64(fleft)->u64;
397 int32_t sh = (fright->i & 63);
398 switch ((IROp)fins->o) {
399 case IR_BSHL: k <<= sh; break;
400#if LJ_HASFFI
401 case IR_BSHR: k >>= sh; break;
402 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
403 case IR_BROL: k = lj_rol(k, sh); break;
404 case IR_BROR: k = lj_ror(k, sh); break;
405#endif
406 default: lua_assert(0); break;
407 }
408 return INT64FOLD(k);
409#else
410 UNUSED(J); lua_assert(0); return FAILFOLD;
411#endif
412}
413
414LJFOLD(BNOT KINT64)
415LJFOLDF(kfold_bnot64)
416{
417#if LJ_HASFFI
418 return INT64FOLD(~ir_k64(fleft)->u64);
419#else
420 UNUSED(J); lua_assert(0); return FAILFOLD;
421#endif
422}
423
424LJFOLD(BSWAP KINT64)
425LJFOLDF(kfold_bswap64)
426{
427#if LJ_HASFFI
428 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
429#else
430 UNUSED(J); lua_assert(0); return FAILFOLD;
431#endif
432}
433
434LJFOLD(LT KINT64 KINT64)
435LJFOLD(GE KINT64 KINT64)
436LJFOLD(LE KINT64 KINT64)
437LJFOLD(GT KINT64 KINT64)
438LJFOLD(ULT KINT64 KINT64)
439LJFOLD(UGE KINT64 KINT64)
440LJFOLD(ULE KINT64 KINT64)
441LJFOLD(UGT KINT64 KINT64)
442LJFOLDF(kfold_int64comp)
443{
444#if LJ_HASFFI
445 uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64;
446 switch ((IROp)fins->o) {
447 case IR_LT: return CONDFOLD(a < b);
448 case IR_GE: return CONDFOLD(a >= b);
449 case IR_LE: return CONDFOLD(a <= b);
450 case IR_GT: return CONDFOLD(a > b);
451 case IR_ULT: return CONDFOLD((uint64_t)a < (uint64_t)b);
452 case IR_UGE: return CONDFOLD((uint64_t)a >= (uint64_t)b);
453 case IR_ULE: return CONDFOLD((uint64_t)a <= (uint64_t)b);
454 case IR_UGT: return CONDFOLD((uint64_t)a > (uint64_t)b);
455 default: lua_assert(0); return FAILFOLD;
456 }
457#else
458 UNUSED(J); lua_assert(0); return FAILFOLD;
459#endif
460}
461
462LJFOLD(UGE any KINT64)
463LJFOLDF(kfold_int64comp0)
464{
465#if LJ_HASFFI
466 if (ir_k64(fright)->u64 == 0)
467 return DROPFOLD;
468 return NEXTFOLD;
469#else
470 UNUSED(J); lua_assert(0); return FAILFOLD;
471#endif
472}
473
474/* -- Constant folding for strings ---------------------------------------- */
475
476LJFOLD(SNEW KKPTR KINT)
477LJFOLDF(kfold_snew_kptr)
478{
479 GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
480 return lj_ir_kstr(J, s);
481}
482
483LJFOLD(SNEW any KINT)
484LJFOLDF(kfold_snew_empty)
485{
486 if (fright->i == 0)
487 return lj_ir_kstr(J, &J2G(J)->strempty);
488 return NEXTFOLD;
489}
490
491LJFOLD(STRREF KGC KINT)
492LJFOLDF(kfold_strref)
493{
494 GCstr *str = ir_kstr(fleft);
495 lua_assert((MSize)fright->i <= str->len);
496 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
497}
498
499LJFOLD(STRREF SNEW any)
500LJFOLDF(kfold_strref_snew)
501{
502 PHIBARRIER(fleft);
503 if (irref_isk(fins->op2) && fright->i == 0) {
504 return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */
505 } else {
506 /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
507 IRIns *ir = IR(fleft->op1);
508 IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */
509 lua_assert(ir->o == IR_STRREF);
510 PHIBARRIER(ir);
511 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
512 fins->op1 = str;
513 fins->ot = IRT(IR_STRREF, IRT_P32);
514 return RETRYFOLD;
515 }
516 return NEXTFOLD;
517}
518
519LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
520LJFOLDF(kfold_strcmp)
521{
522 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
523 GCstr *a = ir_kstr(IR(fleft->op1));
524 GCstr *b = ir_kstr(IR(fleft->op2));
525 return INTFOLD(lj_str_cmp(a, b));
526 }
527 return NEXTFOLD;
528}
529
530/* -- Constant folding of pointer arithmetic ------------------------------ */
531
532LJFOLD(ADD KGC KINT)
533LJFOLD(ADD KGC KINT64)
534LJFOLDF(kfold_add_kgc)
535{
536 GCobj *o = ir_kgc(fleft);
537#if LJ_64
538 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
539#else
540 ptrdiff_t ofs = fright->i;
541#endif
542#if LJ_HASFFI
543 if (irt_iscdata(fleft->t)) {
544 CType *ct = ctype_raw(ctype_ctsG(J2G(J)), gco2cd(o)->ctypeid);
545 if (ctype_isnum(ct->info) || ctype_isenum(ct->info) ||
546 ctype_isptr(ct->info) || ctype_isfunc(ct->info) ||
547 ctype_iscomplex(ct->info) || ctype_isvector(ct->info))
548 return lj_ir_kkptr(J, (char *)o + ofs);
549 }
550#endif
551 return lj_ir_kptr(J, (char *)o + ofs);
552}
553
554LJFOLD(ADD KPTR KINT)
555LJFOLD(ADD KPTR KINT64)
556LJFOLD(ADD KKPTR KINT)
557LJFOLD(ADD KKPTR KINT64)
558LJFOLDF(kfold_add_kptr)
559{
560 void *p = ir_kptr(fleft);
561#if LJ_64
562 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
563#else
564 ptrdiff_t ofs = fright->i;
565#endif
566 return lj_ir_kptr_(J, fleft->o, (char *)p + ofs);
567}
568
569LJFOLD(ADD any KGC)
570LJFOLD(ADD any KPTR)
571LJFOLD(ADD any KKPTR)
572LJFOLDF(kfold_add_kright)
573{
574 if (fleft->o == IR_KINT || fleft->o == IR_KINT64) {
575 IRRef1 tmp = fins->op1; fins->op1 = fins->op2; fins->op2 = tmp;
576 return RETRYFOLD;
577 }
578 return NEXTFOLD;
579}
580
581/* -- Constant folding of conversions ------------------------------------- */
582
583LJFOLD(TOBIT KNUM KNUM)
584LJFOLDF(kfold_tobit)
585{
586 return INTFOLD(lj_num2bit(knumleft));
587}
588
589LJFOLD(CONV KINT IRCONV_NUM_INT)
590LJFOLDF(kfold_conv_kint_num)
591{
592 return lj_ir_knum(J, (lua_Number)fleft->i);
593}
594
595LJFOLD(CONV KINT IRCONV_NUM_U32)
596LJFOLDF(kfold_conv_kintu32_num)
597{
598 return lj_ir_knum(J, (lua_Number)(uint32_t)fleft->i);
599}
600
601LJFOLD(CONV KINT IRCONV_INT_I8)
602LJFOLD(CONV KINT IRCONV_INT_U8)
603LJFOLD(CONV KINT IRCONV_INT_I16)
604LJFOLD(CONV KINT IRCONV_INT_U16)
605LJFOLDF(kfold_conv_kint_ext)
606{
607 int32_t k = fleft->i;
608 if ((fins->op2 & IRCONV_SRCMASK) == IRT_I8) k = (int8_t)k;
609 else if ((fins->op2 & IRCONV_SRCMASK) == IRT_U8) k = (uint8_t)k;
610 else if ((fins->op2 & IRCONV_SRCMASK) == IRT_I16) k = (int16_t)k;
611 else k = (uint16_t)k;
612 return INTFOLD(k);
613}
614
615LJFOLD(CONV KINT IRCONV_I64_INT)
616LJFOLD(CONV KINT IRCONV_U64_INT)
617LJFOLD(CONV KINT IRCONV_I64_U32)
618LJFOLD(CONV KINT IRCONV_U64_U32)
619LJFOLDF(kfold_conv_kint_i64)
620{
621 if ((fins->op2 & IRCONV_SEXT))
622 return INT64FOLD((uint64_t)(int64_t)fleft->i);
623 else
624 return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i);
625}
626
627LJFOLD(CONV KINT64 IRCONV_NUM_I64)
628LJFOLDF(kfold_conv_kint64_num_i64)
629{
630 return lj_ir_knum(J, (lua_Number)(int64_t)ir_kint64(fleft)->u64);
631}
632
633LJFOLD(CONV KINT64 IRCONV_NUM_U64)
634LJFOLDF(kfold_conv_kint64_num_u64)
635{
636 return lj_ir_knum(J, (lua_Number)ir_kint64(fleft)->u64);
637}
638
639LJFOLD(CONV KINT64 IRCONV_INT_I64)
640LJFOLD(CONV KINT64 IRCONV_U32_I64)
641LJFOLDF(kfold_conv_kint64_int_i64)
642{
643 return INTFOLD((int32_t)ir_kint64(fleft)->u64);
644}
645
646LJFOLD(CONV KNUM IRCONV_INT_NUM)
647LJFOLDF(kfold_conv_knum_int_num)
648{
649 lua_Number n = knumleft;
650 if (!(fins->op2 & IRCONV_TRUNC)) {
651 int32_t k = lj_num2int(n);
652 if (irt_isguard(fins->t) && n != (lua_Number)k) {
653 /* We're about to create a guard which always fails, like CONV +1.5.
654 ** Some pathological loops cause this during LICM, e.g.:
655 ** local x,k,t = 0,1.5,{1,[1.5]=2}
656 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
657 ** assert(x == 300)
658 */
659 return FAILFOLD;
660 }
661 return INTFOLD(k);
662 } else {
663 return INTFOLD((int32_t)n);
664 }
665}
666
667LJFOLD(CONV KNUM IRCONV_U32_NUM)
668LJFOLDF(kfold_conv_knum_u32_num)
669{
670 lua_assert((fins->op2 & IRCONV_TRUNC));
671#ifdef _MSC_VER
672 { /* Workaround for MSVC bug. */
673 volatile uint32_t u = (uint32_t)knumleft;
674 return INTFOLD((int32_t)u);
675 }
676#else
677 return INTFOLD((int32_t)(uint32_t)knumleft);
678#endif
679}
680
681LJFOLD(CONV KNUM IRCONV_I64_NUM)
682LJFOLDF(kfold_conv_knum_i64_num)
683{
684 lua_assert((fins->op2 & IRCONV_TRUNC));
685 return INT64FOLD((uint64_t)(int64_t)knumleft);
686}
687
688LJFOLD(CONV KNUM IRCONV_U64_NUM)
689LJFOLDF(kfold_conv_knum_u64_num)
690{
691 lua_assert((fins->op2 & IRCONV_TRUNC));
692 return INT64FOLD(lj_num2u64(knumleft));
693}
694
695LJFOLD(TOSTR KNUM)
696LJFOLDF(kfold_tostr_knum)
697{
698 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft));
699}
700
701LJFOLD(TOSTR KINT)
702LJFOLDF(kfold_tostr_kint)
703{
704 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i));
705}
706
707LJFOLD(STRTO KGC)
708LJFOLDF(kfold_strto)
709{
710 TValue n;
711 if (lj_strscan_num(ir_kstr(fleft), &n))
712 return lj_ir_knum(J, numV(&n));
713 return FAILFOLD;
714}
715
716/* -- Constant folding of equality checks --------------------------------- */
717
718/* Don't constant-fold away FLOAD checks against KNULL. */
719LJFOLD(EQ FLOAD KNULL)
720LJFOLD(NE FLOAD KNULL)
721LJFOLDX(lj_opt_cse)
722
723/* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
724LJFOLD(EQ any KNULL)
725LJFOLD(NE any KNULL)
726LJFOLD(EQ KNULL any)
727LJFOLD(NE KNULL any)
728LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */
729LJFOLD(NE KINT KINT)
730LJFOLD(EQ KINT64 KINT64)
731LJFOLD(NE KINT64 KINT64)
732LJFOLD(EQ KGC KGC)
733LJFOLD(NE KGC KGC)
734LJFOLDF(kfold_kref)
735{
736 return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE));
737}
738
739/* -- Algebraic shortcuts ------------------------------------------------- */
740
741LJFOLD(FPMATH FPMATH IRFPM_FLOOR)
742LJFOLD(FPMATH FPMATH IRFPM_CEIL)
743LJFOLD(FPMATH FPMATH IRFPM_TRUNC)
744LJFOLDF(shortcut_round)
745{
746 IRFPMathOp op = (IRFPMathOp)fleft->op2;
747 if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC)
748 return LEFTFOLD; /* round(round_left(x)) = round_left(x) */
749 return NEXTFOLD;
750}
751
752LJFOLD(ABS ABS KNUM)
753LJFOLDF(shortcut_left)
754{
755 return LEFTFOLD; /* f(g(x)) ==> g(x) */
756}
757
758LJFOLD(ABS NEG KNUM)
759LJFOLDF(shortcut_dropleft)
760{
761 PHIBARRIER(fleft);
762 fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */
763 return RETRYFOLD;
764}
765
766/* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
767LJFOLD(NEG NEG any)
768LJFOLD(BNOT BNOT)
769LJFOLD(BSWAP BSWAP)
770LJFOLDF(shortcut_leftleft)
771{
772 PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */
773 return fleft->op1; /* f(g(x)) ==> x */
774}
775
776/* -- FP algebraic simplifications ---------------------------------------- */
777
778/* FP arithmetic is tricky -- there's not much to simplify.
779** Please note the following common pitfalls before sending "improvements":
780** x+0 ==> x is INVALID for x=-0
781** 0-x ==> -x is INVALID for x=+0
782** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
783*/
784
785LJFOLD(ADD NEG any)
786LJFOLDF(simplify_numadd_negx)
787{
788 PHIBARRIER(fleft);
789 fins->o = IR_SUB; /* (-a) + b ==> b - a */
790 fins->op1 = fins->op2;
791 fins->op2 = fleft->op1;
792 return RETRYFOLD;
793}
794
795LJFOLD(ADD any NEG)
796LJFOLDF(simplify_numadd_xneg)
797{
798 PHIBARRIER(fright);
799 fins->o = IR_SUB; /* a + (-b) ==> a - b */
800 fins->op2 = fright->op1;
801 return RETRYFOLD;
802}
803
804LJFOLD(SUB any KNUM)
805LJFOLDF(simplify_numsub_k)
806{
807 lua_Number n = knumright;
808 if (n == 0.0) /* x - (+-0) ==> x */
809 return LEFTFOLD;
810 return NEXTFOLD;
811}
812
813LJFOLD(SUB NEG KNUM)
814LJFOLDF(simplify_numsub_negk)
815{
816 PHIBARRIER(fleft);
817 fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */
818 fins->op1 = (IRRef1)lj_ir_knum(J, -knumright);
819 return RETRYFOLD;
820}
821
822LJFOLD(SUB any NEG)
823LJFOLDF(simplify_numsub_xneg)
824{
825 PHIBARRIER(fright);
826 fins->o = IR_ADD; /* a - (-b) ==> a + b */
827 fins->op2 = fright->op1;
828 return RETRYFOLD;
829}
830
831LJFOLD(MUL any KNUM)
832LJFOLD(DIV any KNUM)
833LJFOLDF(simplify_nummuldiv_k)
834{
835 lua_Number n = knumright;
836 if (n == 1.0) { /* x o 1 ==> x */
837 return LEFTFOLD;
838 } else if (n == -1.0) { /* x o -1 ==> -x */
839 fins->o = IR_NEG;
840 fins->op2 = (IRRef1)lj_ir_knum_neg(J);
841 return RETRYFOLD;
842 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
843 fins->o = IR_ADD;
844 fins->op2 = fins->op1;
845 return RETRYFOLD;
846 } else if (fins->o == IR_DIV) { /* x / 2^k ==> x * 2^-k */
847 uint64_t u = ir_knum(fright)->u64;
848 uint32_t ex = ((uint32_t)(u >> 52) & 0x7ff);
849 if ((u & U64x(000fffff,ffffffff)) == 0 && ex - 1 < 0x7fd) {
850 u = (u & ((uint64_t)1 << 63)) | ((uint64_t)(0x7fe - ex) << 52);
851 fins->o = IR_MUL; /* Multiply by exact reciprocal. */
852 fins->op2 = lj_ir_knum_u64(J, u);
853 return RETRYFOLD;
854 }
855 }
856 return NEXTFOLD;
857}
858
859LJFOLD(MUL NEG KNUM)
860LJFOLD(DIV NEG KNUM)
861LJFOLDF(simplify_nummuldiv_negk)
862{
863 PHIBARRIER(fleft);
864 fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */
865 fins->op2 = (IRRef1)lj_ir_knum(J, -knumright);
866 return RETRYFOLD;
867}
868
869LJFOLD(MUL NEG NEG)
870LJFOLD(DIV NEG NEG)
871LJFOLDF(simplify_nummuldiv_negneg)
872{
873 PHIBARRIER(fleft);
874 PHIBARRIER(fright);
875 fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */
876 fins->op2 = fright->op1;
877 return RETRYFOLD;
878}
879
880LJFOLD(POW any KINT)
881LJFOLDF(simplify_numpow_xk)
882{
883 int32_t k = fright->i;
884 TRef ref = fins->op1;
885 if (k == 0) /* x ^ 0 ==> 1 */
886 return lj_ir_knum_one(J); /* Result must be a number, not an int. */
887 if (k == 1) /* x ^ 1 ==> x */
888 return LEFTFOLD;
889 if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */
890 return NEXTFOLD;
891 if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
892 ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
893 k = -k;
894 }
895 /* Unroll x^k for 1 <= k <= 65536. */
896 for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
897 ref = emitir(IRTN(IR_MUL), ref, ref);
898 if ((k >>= 1) != 0) { /* Handle trailing bits. */
899 TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
900 for (; k != 1; k >>= 1) {
901 if (k & 1)
902 ref = emitir(IRTN(IR_MUL), ref, tmp);
903 tmp = emitir(IRTN(IR_MUL), tmp, tmp);
904 }
905 ref = emitir(IRTN(IR_MUL), ref, tmp);
906 }
907 return ref;
908}
909
910LJFOLD(POW KNUM any)
911LJFOLDF(simplify_numpow_kx)
912{
913 lua_Number n = knumleft;
914 if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
915 fins->o = IR_CONV;
916#if LJ_TARGET_X86ORX64
917 fins->op1 = fins->op2;
918 fins->op2 = IRCONV_NUM_INT;
919 fins->op2 = (IRRef1)lj_opt_fold(J);
920#endif
921 fins->op1 = (IRRef1)lj_ir_knum_one(J);
922 fins->o = IR_LDEXP;
923 return RETRYFOLD;
924 }
925 return NEXTFOLD;
926}
927
928/* -- Simplify conversions ------------------------------------------------ */
929
930LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */
931LJFOLDF(shortcut_conv_num_int)
932{
933 PHIBARRIER(fleft);
934 /* Only safe with a guarded conversion to int. */
935 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t))
936 return fleft->op1; /* f(g(x)) ==> x */
937 return NEXTFOLD;
938}
939
940LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
941LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/
942LJFOLDF(simplify_conv_int_num)
943{
944 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
945 if ((fleft->op2 & IRCONV_SRCMASK) ==
946 ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH))
947 return fleft->op1;
948 return NEXTFOLD;
949}
950
951LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32 */
952LJFOLD(CONV CONV IRCONV_U64_NUM) /* _INT or _U32 */
953LJFOLDF(simplify_conv_i64_num)
954{
955 PHIBARRIER(fleft);
956 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
957 /* Reduce to a sign-extension. */
958 fins->op1 = fleft->op1;
959 fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
960 return RETRYFOLD;
961 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
962#if LJ_TARGET_X64
963 return fleft->op1;
964#else
965 /* Reduce to a zero-extension. */
966 fins->op1 = fleft->op1;
967 fins->op2 = (IRT_I64<<5)|IRT_U32;
968 return RETRYFOLD;
969#endif
970 }
971 return NEXTFOLD;
972}
973
974LJFOLD(CONV CONV IRCONV_INT_I64) /* _INT or _U32 */
975LJFOLD(CONV CONV IRCONV_INT_U64) /* _INT or _U32 */
976LJFOLD(CONV CONV IRCONV_U32_I64) /* _INT or _U32 */
977LJFOLD(CONV CONV IRCONV_U32_U64) /* _INT or _U32 */
978LJFOLDF(simplify_conv_int_i64)
979{
980 int src;
981 PHIBARRIER(fleft);
982 src = (fleft->op2 & IRCONV_SRCMASK);
983 if (src == IRT_INT || src == IRT_U32) {
984 if (src == ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH)) {
985 return fleft->op1;
986 } else {
987 fins->op2 = ((fins->op2 & IRCONV_DSTMASK) | src);
988 fins->op1 = fleft->op1;
989 return RETRYFOLD;
990 }
991 }
992 return NEXTFOLD;
993}
994
995LJFOLD(CONV CONV IRCONV_FLOAT_NUM) /* _FLOAT */
996LJFOLDF(simplify_conv_flt_num)
997{
998 PHIBARRIER(fleft);
999 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_FLOAT)
1000 return fleft->op1;
1001 return NEXTFOLD;
1002}
1003
1004/* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1005LJFOLD(TOBIT CONV KNUM)
1006LJFOLDF(simplify_tobit_conv)
1007{
1008 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
1009 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1010 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1011 lua_assert(irt_isnum(fleft->t));
1012 return fleft->op1;
1013 }
1014 return NEXTFOLD;
1015}
1016
1017/* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1018LJFOLD(FPMATH CONV IRFPM_FLOOR)
1019LJFOLD(FPMATH CONV IRFPM_CEIL)
1020LJFOLD(FPMATH CONV IRFPM_TRUNC)
1021LJFOLDF(simplify_floor_conv)
1022{
1023 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
1024 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
1025 return LEFTFOLD;
1026 return NEXTFOLD;
1027}
1028
1029/* Strength reduction of widening. */
1030LJFOLD(CONV any IRCONV_I64_INT)
1031LJFOLD(CONV any IRCONV_U64_INT)
1032LJFOLDF(simplify_conv_sext)
1033{
1034 IRRef ref = fins->op1;
1035 int64_t ofs = 0;
1036 if (!(fins->op2 & IRCONV_SEXT))
1037 return NEXTFOLD;
1038 PHIBARRIER(fleft);
1039 if (fleft->o == IR_XLOAD && (irt_isu8(fleft->t) || irt_isu16(fleft->t)))
1040 goto ok_reduce;
1041 if (fleft->o == IR_ADD && irref_isk(fleft->op2)) {
1042 ofs = (int64_t)IR(fleft->op2)->i;
1043 ref = fleft->op1;
1044 }
1045 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
1046 if (ref == J->scev.idx) {
1047 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
1048 lua_assert(irt_isint(J->scev.t));
1049 if (lo && IR(lo)->i + ofs >= 0) {
1050 ok_reduce:
1051#if LJ_TARGET_X64
1052 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
1053 return LEFTFOLD;
1054#else
1055 /* Reduce to a (cheaper) zero-extension. */
1056 fins->op2 &= ~IRCONV_SEXT;
1057 return RETRYFOLD;
1058#endif
1059 }
1060 }
1061 return NEXTFOLD;
1062}
1063
1064/* Strength reduction of narrowing. */
1065LJFOLD(CONV ADD IRCONV_INT_I64)
1066LJFOLD(CONV SUB IRCONV_INT_I64)
1067LJFOLD(CONV MUL IRCONV_INT_I64)
1068LJFOLD(CONV ADD IRCONV_INT_U64)
1069LJFOLD(CONV SUB IRCONV_INT_U64)
1070LJFOLD(CONV MUL IRCONV_INT_U64)
1071LJFOLD(CONV ADD IRCONV_U32_I64)
1072LJFOLD(CONV SUB IRCONV_U32_I64)
1073LJFOLD(CONV MUL IRCONV_U32_I64)
1074LJFOLD(CONV ADD IRCONV_U32_U64)
1075LJFOLD(CONV SUB IRCONV_U32_U64)
1076LJFOLD(CONV MUL IRCONV_U32_U64)
1077LJFOLDF(simplify_conv_narrow)
1078{
1079 IROp op = (IROp)fleft->o;
1080 IRType t = irt_type(fins->t);
1081 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
1082 PHIBARRIER(fleft);
1083 op1 = emitir(IRTI(IR_CONV), op1, mode);
1084 op2 = emitir(IRTI(IR_CONV), op2, mode);
1085 fins->ot = IRT(op, t);
1086 fins->op1 = op1;
1087 fins->op2 = op2;
1088 return RETRYFOLD;
1089}
1090
1091/* Special CSE rule for CONV. */
1092LJFOLD(CONV any any)
1093LJFOLDF(cse_conv)
1094{
1095 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1096 IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
1097 uint8_t guard = irt_isguard(fins->t);
1098 IRRef ref = J->chain[IR_CONV];
1099 while (ref > op1) {
1100 IRIns *ir = IR(ref);
1101 /* Commoning with stronger checks is ok. */
1102 if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
1103 irt_isguard(ir->t) >= guard)
1104 return ref;
1105 ref = ir->prev;
1106 }
1107 }
1108 return EMITFOLD; /* No fallthrough to regular CSE. */
1109}
1110
1111/* FP conversion narrowing. */
1112LJFOLD(TOBIT ADD KNUM)
1113LJFOLD(TOBIT SUB KNUM)
1114LJFOLD(CONV ADD IRCONV_INT_NUM)
1115LJFOLD(CONV SUB IRCONV_INT_NUM)
1116LJFOLD(CONV ADD IRCONV_I64_NUM)
1117LJFOLD(CONV SUB IRCONV_I64_NUM)
1118LJFOLDF(narrow_convert)
1119{
1120 PHIBARRIER(fleft);
1121 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1122 if (J->chain[IR_LOOP])
1123 return NEXTFOLD;
1124 lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT);
1125 return lj_opt_narrow_convert(J);
1126}
1127
1128/* -- Integer algebraic simplifications ----------------------------------- */
1129
1130LJFOLD(ADD any KINT)
1131LJFOLD(ADDOV any KINT)
1132LJFOLD(SUBOV any KINT)
1133LJFOLDF(simplify_intadd_k)
1134{
1135 if (fright->i == 0) /* i o 0 ==> i */
1136 return LEFTFOLD;
1137 return NEXTFOLD;
1138}
1139
1140LJFOLD(MULOV any KINT)
1141LJFOLDF(simplify_intmul_k)
1142{
1143 if (fright->i == 0) /* i * 0 ==> 0 */
1144 return RIGHTFOLD;
1145 if (fright->i == 1) /* i * 1 ==> i */
1146 return LEFTFOLD;
1147 if (fright->i == 2) { /* i * 2 ==> i + i */
1148 fins->o = IR_ADDOV;
1149 fins->op2 = fins->op1;
1150 return RETRYFOLD;
1151 }
1152 return NEXTFOLD;
1153}
1154
1155LJFOLD(SUB any KINT)
1156LJFOLDF(simplify_intsub_k)
1157{
1158 if (fright->i == 0) /* i - 0 ==> i */
1159 return LEFTFOLD;
1160 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1161 fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */
1162 return RETRYFOLD;
1163}
1164
1165LJFOLD(SUB KINT any)
1166LJFOLD(SUB KINT64 any)
1167LJFOLDF(simplify_intsub_kleft)
1168{
1169 if (fleft->o == IR_KINT ? (fleft->i == 0) : (ir_kint64(fleft)->u64 == 0)) {
1170 fins->o = IR_NEG; /* 0 - i ==> -i */
1171 fins->op1 = fins->op2;
1172 return RETRYFOLD;
1173 }
1174 return NEXTFOLD;
1175}
1176
1177LJFOLD(ADD any KINT64)
1178LJFOLDF(simplify_intadd_k64)
1179{
1180 if (ir_kint64(fright)->u64 == 0) /* i + 0 ==> i */
1181 return LEFTFOLD;
1182 return NEXTFOLD;
1183}
1184
1185LJFOLD(SUB any KINT64)
1186LJFOLDF(simplify_intsub_k64)
1187{
1188 uint64_t k = ir_kint64(fright)->u64;
1189 if (k == 0) /* i - 0 ==> i */
1190 return LEFTFOLD;
1191 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1192 fins->op2 = (IRRef1)lj_ir_kint64(J, (uint64_t)-(int64_t)k);
1193 return RETRYFOLD;
1194}
1195
1196static TRef simplify_intmul_k(jit_State *J, int32_t k)
1197{
1198 /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
1199 ** But this is mainly intended for simple address arithmetic.
1200 ** Also it's easier for the backend to optimize the original multiplies.
1201 */
1202 if (k == 1) { /* i * 1 ==> i */
1203 return LEFTFOLD;
1204 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1205 fins->o = IR_BSHL;
1206 fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k));
1207 return RETRYFOLD;
1208 }
1209 return NEXTFOLD;
1210}
1211
1212LJFOLD(MUL any KINT)
1213LJFOLDF(simplify_intmul_k32)
1214{
1215 if (fright->i == 0) /* i * 0 ==> 0 */
1216 return INTFOLD(0);
1217 else if (fright->i > 0)
1218 return simplify_intmul_k(J, fright->i);
1219 return NEXTFOLD;
1220}
1221
1222LJFOLD(MUL any KINT64)
1223LJFOLDF(simplify_intmul_k64)
1224{
1225 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */
1226 return INT64FOLD(0);
1227#if LJ_64
1228 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1229 else if (ir_kint64(fright)->u64 < 0x80000000u)
1230 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1231#endif
1232 return NEXTFOLD;
1233}
1234
1235LJFOLD(MOD any KINT)
1236LJFOLDF(simplify_intmod_k)
1237{
1238 int32_t k = fright->i;
1239 lua_assert(k != 0);
1240 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
1241 fins->o = IR_BAND;
1242 fins->op2 = lj_ir_kint(J, k-1);
1243 return RETRYFOLD;
1244 }
1245 return NEXTFOLD;
1246}
1247
1248LJFOLD(MOD KINT any)
1249LJFOLDF(simplify_intmod_kleft)
1250{
1251 if (fleft->i == 0)
1252 return INTFOLD(0);
1253 return NEXTFOLD;
1254}
1255
1256LJFOLD(SUB any any)
1257LJFOLD(SUBOV any any)
1258LJFOLDF(simplify_intsub)
1259{
1260 if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */
1261 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1262 return NEXTFOLD;
1263}
1264
1265LJFOLD(SUB ADD any)
1266LJFOLDF(simplify_intsubadd_leftcancel)
1267{
1268 if (!irt_isnum(fins->t)) {
1269 PHIBARRIER(fleft);
1270 if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */
1271 return fleft->op2;
1272 if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */
1273 return fleft->op1;
1274 }
1275 return NEXTFOLD;
1276}
1277
1278LJFOLD(SUB SUB any)
1279LJFOLDF(simplify_intsubsub_leftcancel)
1280{
1281 if (!irt_isnum(fins->t)) {
1282 PHIBARRIER(fleft);
1283 if (fins->op2 == fleft->op1) { /* (i - j) - i ==> 0 - j */
1284 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1285 fins->op2 = fleft->op2;
1286 return RETRYFOLD;
1287 }
1288 }
1289 return NEXTFOLD;
1290}
1291
1292LJFOLD(SUB any SUB)
1293LJFOLDF(simplify_intsubsub_rightcancel)
1294{
1295 if (!irt_isnum(fins->t)) {
1296 PHIBARRIER(fright);
1297 if (fins->op1 == fright->op1) /* i - (i - j) ==> j */
1298 return fright->op2;
1299 }
1300 return NEXTFOLD;
1301}
1302
1303LJFOLD(SUB any ADD)
1304LJFOLDF(simplify_intsubadd_rightcancel)
1305{
1306 if (!irt_isnum(fins->t)) {
1307 PHIBARRIER(fright);
1308 if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */
1309 fins->op2 = fright->op2;
1310 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1311 return RETRYFOLD;
1312 }
1313 if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */
1314 fins->op2 = fright->op1;
1315 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1316 return RETRYFOLD;
1317 }
1318 }
1319 return NEXTFOLD;
1320}
1321
1322LJFOLD(SUB ADD ADD)
1323LJFOLDF(simplify_intsubaddadd_cancel)
1324{
1325 if (!irt_isnum(fins->t)) {
1326 PHIBARRIER(fleft);
1327 PHIBARRIER(fright);
1328 if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */
1329 fins->op1 = fleft->op2;
1330 fins->op2 = fright->op2;
1331 return RETRYFOLD;
1332 }
1333 if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
1334 fins->op1 = fleft->op2;
1335 fins->op2 = fright->op1;
1336 return RETRYFOLD;
1337 }
1338 if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
1339 fins->op1 = fleft->op1;
1340 fins->op2 = fright->op2;
1341 return RETRYFOLD;
1342 }
1343 if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
1344 fins->op1 = fleft->op1;
1345 fins->op2 = fright->op1;
1346 return RETRYFOLD;
1347 }
1348 }
1349 return NEXTFOLD;
1350}
1351
1352LJFOLD(BAND any KINT)
1353LJFOLD(BAND any KINT64)
1354LJFOLDF(simplify_band_k)
1355{
1356 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1357 (int64_t)ir_k64(fright)->u64;
1358 if (k == 0) /* i & 0 ==> 0 */
1359 return RIGHTFOLD;
1360 if (k == -1) /* i & -1 ==> i */
1361 return LEFTFOLD;
1362 return NEXTFOLD;
1363}
1364
1365LJFOLD(BOR any KINT)
1366LJFOLD(BOR any KINT64)
1367LJFOLDF(simplify_bor_k)
1368{
1369 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1370 (int64_t)ir_k64(fright)->u64;
1371 if (k == 0) /* i | 0 ==> i */
1372 return LEFTFOLD;
1373 if (k == -1) /* i | -1 ==> -1 */
1374 return RIGHTFOLD;
1375 return NEXTFOLD;
1376}
1377
1378LJFOLD(BXOR any KINT)
1379LJFOLD(BXOR any KINT64)
1380LJFOLDF(simplify_bxor_k)
1381{
1382 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1383 (int64_t)ir_k64(fright)->u64;
1384 if (k == 0) /* i xor 0 ==> i */
1385 return LEFTFOLD;
1386 if (k == -1) { /* i xor -1 ==> ~i */
1387 fins->o = IR_BNOT;
1388 fins->op2 = 0;
1389 return RETRYFOLD;
1390 }
1391 return NEXTFOLD;
1392}
1393
1394LJFOLD(BSHL any KINT)
1395LJFOLD(BSHR any KINT)
1396LJFOLD(BSAR any KINT)
1397LJFOLD(BROL any KINT)
1398LJFOLD(BROR any KINT)
1399LJFOLDF(simplify_shift_ik)
1400{
1401 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1402 int32_t k = (fright->i & mask);
1403 if (k == 0) /* i o 0 ==> i */
1404 return LEFTFOLD;
1405 if (k == 1 && fins->o == IR_BSHL) { /* i << 1 ==> i + i */
1406 fins->o = IR_ADD;
1407 fins->op2 = fins->op1;
1408 return RETRYFOLD;
1409 }
1410 if (k != fright->i) { /* i o k ==> i o (k & mask) */
1411 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1412 return RETRYFOLD;
1413 }
1414#ifndef LJ_TARGET_UNIFYROT
1415 if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */
1416 fins->o = IR_BROL;
1417 fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask);
1418 return RETRYFOLD;
1419 }
1420#endif
1421 return NEXTFOLD;
1422}
1423
1424LJFOLD(BSHL any BAND)
1425LJFOLD(BSHR any BAND)
1426LJFOLD(BSAR any BAND)
1427LJFOLD(BROL any BAND)
1428LJFOLD(BROR any BAND)
1429LJFOLDF(simplify_shift_andk)
1430{
1431 IRIns *irk = IR(fright->op2);
1432 PHIBARRIER(fright);
1433 if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1434 irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */
1435 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1436 int32_t k = irk->i & mask;
1437 if (k == mask) {
1438 fins->op2 = fright->op1;
1439 return RETRYFOLD;
1440 }
1441 }
1442 return NEXTFOLD;
1443}
1444
1445LJFOLD(BSHL KINT any)
1446LJFOLD(BSHR KINT any)
1447LJFOLD(BSHL KINT64 any)
1448LJFOLD(BSHR KINT64 any)
1449LJFOLDF(simplify_shift1_ki)
1450{
1451 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1452 (int64_t)ir_k64(fleft)->u64;
1453 if (k == 0) /* 0 o i ==> 0 */
1454 return LEFTFOLD;
1455 return NEXTFOLD;
1456}
1457
1458LJFOLD(BSAR KINT any)
1459LJFOLD(BROL KINT any)
1460LJFOLD(BROR KINT any)
1461LJFOLD(BSAR KINT64 any)
1462LJFOLD(BROL KINT64 any)
1463LJFOLD(BROR KINT64 any)
1464LJFOLDF(simplify_shift2_ki)
1465{
1466 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1467 (int64_t)ir_k64(fleft)->u64;
1468 if (k == 0 || k == -1) /* 0 o i ==> 0; -1 o i ==> -1 */
1469 return LEFTFOLD;
1470 return NEXTFOLD;
1471}
1472
1473LJFOLD(BSHL BAND KINT)
1474LJFOLD(BSHR BAND KINT)
1475LJFOLD(BROL BAND KINT)
1476LJFOLD(BROR BAND KINT)
1477LJFOLDF(simplify_shiftk_andk)
1478{
1479 IRIns *irk = IR(fleft->op2);
1480 PHIBARRIER(fleft);
1481 if (irk->o == IR_KINT) { /* (i & k1) o k2 ==> (i o k2) & (k1 o k2) */
1482 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1483 fins->op1 = fleft->op1;
1484 fins->op1 = (IRRef1)lj_opt_fold(J);
1485 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1486 fins->ot = IRTI(IR_BAND);
1487 return RETRYFOLD;
1488 }
1489 return NEXTFOLD;
1490}
1491
1492LJFOLD(BAND BSHL KINT)
1493LJFOLD(BAND BSHR KINT)
1494LJFOLDF(simplify_andk_shiftk)
1495{
1496 IRIns *irk = IR(fleft->op2);
1497 if (irk->o == IR_KINT &&
1498 kfold_intop(-1, irk->i, (IROp)fleft->o) == fright->i)
1499 return LEFTFOLD; /* (i o k1) & k2 ==> i, if (-1 o k1) == k2 */
1500 return NEXTFOLD;
1501}
1502
1503/* -- Reassociation ------------------------------------------------------- */
1504
1505LJFOLD(ADD ADD KINT)
1506LJFOLD(MUL MUL KINT)
1507LJFOLD(BAND BAND KINT)
1508LJFOLD(BOR BOR KINT)
1509LJFOLD(BXOR BXOR KINT)
1510LJFOLDF(reassoc_intarith_k)
1511{
1512 IRIns *irk = IR(fleft->op2);
1513 if (irk->o == IR_KINT) {
1514 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1515 if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
1516 return LEFTFOLD;
1517 PHIBARRIER(fleft);
1518 fins->op1 = fleft->op1;
1519 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1520 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1521 }
1522 return NEXTFOLD;
1523}
1524
1525LJFOLD(ADD ADD KINT64)
1526LJFOLD(MUL MUL KINT64)
1527LJFOLD(BAND BAND KINT64)
1528LJFOLD(BOR BOR KINT64)
1529LJFOLD(BXOR BXOR KINT64)
1530LJFOLDF(reassoc_intarith_k64)
1531{
1532#if LJ_HASFFI || LJ_64
1533 IRIns *irk = IR(fleft->op2);
1534 if (irk->o == IR_KINT64) {
1535 uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
1536 ir_k64(fright)->u64, (IROp)fins->o);
1537 PHIBARRIER(fleft);
1538 fins->op1 = fleft->op1;
1539 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1540 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1541 }
1542 return NEXTFOLD;
1543#else
1544 UNUSED(J); lua_assert(0); return FAILFOLD;
1545#endif
1546}
1547
1548LJFOLD(MIN MIN any)
1549LJFOLD(MAX MAX any)
1550LJFOLD(BAND BAND any)
1551LJFOLD(BOR BOR any)
1552LJFOLDF(reassoc_dup)
1553{
1554 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1555 return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
1556 return NEXTFOLD;
1557}
1558
1559LJFOLD(BXOR BXOR any)
1560LJFOLDF(reassoc_bxor)
1561{
1562 PHIBARRIER(fleft);
1563 if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */
1564 return fleft->op2;
1565 if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */
1566 return fleft->op1;
1567 return NEXTFOLD;
1568}
1569
1570LJFOLD(BSHL BSHL KINT)
1571LJFOLD(BSHR BSHR KINT)
1572LJFOLD(BSAR BSAR KINT)
1573LJFOLD(BROL BROL KINT)
1574LJFOLD(BROR BROR KINT)
1575LJFOLDF(reassoc_shift)
1576{
1577 IRIns *irk = IR(fleft->op2);
1578 PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
1579 if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
1580 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1581 int32_t k = (irk->i & mask) + (fright->i & mask);
1582 if (k > mask) { /* Combined shift too wide? */
1583 if (fins->o == IR_BSHL || fins->o == IR_BSHR)
1584 return mask == 31 ? INTFOLD(0) : INT64FOLD(0);
1585 else if (fins->o == IR_BSAR)
1586 k = mask;
1587 else
1588 k &= mask;
1589 }
1590 fins->op1 = fleft->op1;
1591 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1592 return RETRYFOLD;
1593 }
1594 return NEXTFOLD;
1595}
1596
1597LJFOLD(MIN MIN KNUM)
1598LJFOLD(MAX MAX KNUM)
1599LJFOLD(MIN MIN KINT)
1600LJFOLD(MAX MAX KINT)
1601LJFOLDF(reassoc_minmax_k)
1602{
1603 IRIns *irk = IR(fleft->op2);
1604 if (irk->o == IR_KNUM) {
1605 lua_Number a = ir_knum(irk)->n;
1606 lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD);
1607 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1608 return LEFTFOLD;
1609 PHIBARRIER(fleft);
1610 fins->op1 = fleft->op1;
1611 fins->op2 = (IRRef1)lj_ir_knum(J, y);
1612 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1613 } else if (irk->o == IR_KINT) {
1614 int32_t a = irk->i;
1615 int32_t y = kfold_intop(a, fright->i, fins->o);
1616 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1617 return LEFTFOLD;
1618 PHIBARRIER(fleft);
1619 fins->op1 = fleft->op1;
1620 fins->op2 = (IRRef1)lj_ir_kint(J, y);
1621 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1622 }
1623 return NEXTFOLD;
1624}
1625
1626LJFOLD(MIN MAX any)
1627LJFOLD(MAX MIN any)
1628LJFOLDF(reassoc_minmax_left)
1629{
1630 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1631 return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
1632 return NEXTFOLD;
1633}
1634
1635LJFOLD(MIN any MAX)
1636LJFOLD(MAX any MIN)
1637LJFOLDF(reassoc_minmax_right)
1638{
1639 if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
1640 return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
1641 return NEXTFOLD;
1642}
1643
1644/* -- Array bounds check elimination -------------------------------------- */
1645
1646/* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
1647** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
1648** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
1649*/
1650LJFOLD(ABC any ADD)
1651LJFOLDF(abc_fwd)
1652{
1653 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1654 if (irref_isk(fright->op2)) {
1655 IRIns *add2 = IR(fright->op1);
1656 if (add2->o == IR_ADD && irref_isk(add2->op2) &&
1657 IR(fright->op2)->i == -IR(add2->op2)->i) {
1658 IRRef ref = J->chain[IR_ABC];
1659 IRRef lim = add2->op1;
1660 if (fins->op1 > lim) lim = fins->op1;
1661 while (ref > lim) {
1662 IRIns *ir = IR(ref);
1663 if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
1664 return DROPFOLD;
1665 ref = ir->prev;
1666 }
1667 }
1668 }
1669 }
1670 return NEXTFOLD;
1671}
1672
1673/* Eliminate ABC for constants.
1674** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2))
1675** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2.
1676*/
1677LJFOLD(ABC any KINT)
1678LJFOLDF(abc_k)
1679{
1680 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1681 IRRef ref = J->chain[IR_ABC];
1682 IRRef asize = fins->op1;
1683 while (ref > asize) {
1684 IRIns *ir = IR(ref);
1685 if (ir->op1 == asize && irref_isk(ir->op2)) {
1686 int32_t k = IR(ir->op2)->i;
1687 if (fright->i > k)
1688 ir->op2 = fins->op2;
1689 return DROPFOLD;
1690 }
1691 ref = ir->prev;
1692 }
1693 return EMITFOLD; /* Already performed CSE. */
1694 }
1695 return NEXTFOLD;
1696}
1697
1698/* Eliminate invariant ABC inside loop. */
1699LJFOLD(ABC any any)
1700LJFOLDF(abc_invar)
1701{
1702 /* Invariant ABC marked as PTR. Drop if op1 is invariant, too. */
1703 if (!irt_isint(fins->t) && fins->op1 < J->chain[IR_LOOP] &&
1704 !irt_isphi(IR(fins->op1)->t))
1705 return DROPFOLD;
1706 return NEXTFOLD;
1707}
1708
1709/* -- Commutativity ------------------------------------------------------- */
1710
1711/* The refs of commutative ops are canonicalized. Lower refs go to the right.
1712** Rationale behind this:
1713** - It (also) moves constants to the right.
1714** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
1715** - It helps CSE to find more matches.
1716** - The assembler generates better code with constants at the right.
1717*/
1718
1719LJFOLD(ADD any any)
1720LJFOLD(MUL any any)
1721LJFOLD(ADDOV any any)
1722LJFOLD(MULOV any any)
1723LJFOLDF(comm_swap)
1724{
1725 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1726 IRRef1 tmp = fins->op1;
1727 fins->op1 = fins->op2;
1728 fins->op2 = tmp;
1729 return RETRYFOLD;
1730 }
1731 return NEXTFOLD;
1732}
1733
1734LJFOLD(EQ any any)
1735LJFOLD(NE any any)
1736LJFOLDF(comm_equal)
1737{
1738 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
1739 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1740 return CONDFOLD(fins->o == IR_EQ);
1741 return fold_comm_swap(J);
1742}
1743
1744LJFOLD(LT any any)
1745LJFOLD(GE any any)
1746LJFOLD(LE any any)
1747LJFOLD(GT any any)
1748LJFOLD(ULT any any)
1749LJFOLD(UGE any any)
1750LJFOLD(ULE any any)
1751LJFOLD(UGT any any)
1752LJFOLDF(comm_comp)
1753{
1754 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
1755 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1756 return CONDFOLD((fins->o ^ (fins->o >> 1)) & 1);
1757 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1758 IRRef1 tmp = fins->op1;
1759 fins->op1 = fins->op2;
1760 fins->op2 = tmp;
1761 fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
1762 return RETRYFOLD;
1763 }
1764 return NEXTFOLD;
1765}
1766
1767LJFOLD(BAND any any)
1768LJFOLD(BOR any any)
1769LJFOLD(MIN any any)
1770LJFOLD(MAX any any)
1771LJFOLDF(comm_dup)
1772{
1773 if (fins->op1 == fins->op2) /* x o x ==> x */
1774 return LEFTFOLD;
1775 return fold_comm_swap(J);
1776}
1777
1778LJFOLD(BXOR any any)
1779LJFOLDF(comm_bxor)
1780{
1781 if (fins->op1 == fins->op2) /* i xor i ==> 0 */
1782 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1783 return fold_comm_swap(J);
1784}
1785
1786/* -- Simplification of compound expressions ------------------------------ */
1787
1788static TRef kfold_xload(jit_State *J, IRIns *ir, const void *p)
1789{
1790 int32_t k;
1791 switch (irt_type(ir->t)) {
1792 case IRT_NUM: return lj_ir_knum_u64(J, *(uint64_t *)p);
1793 case IRT_I8: k = (int32_t)*(int8_t *)p; break;
1794 case IRT_U8: k = (int32_t)*(uint8_t *)p; break;
1795 case IRT_I16: k = (int32_t)(int16_t)lj_getu16(p); break;
1796 case IRT_U16: k = (int32_t)(uint16_t)lj_getu16(p); break;
1797 case IRT_INT: case IRT_U32: k = (int32_t)lj_getu32(p); break;
1798 case IRT_I64: case IRT_U64: return lj_ir_kint64(J, *(uint64_t *)p);
1799 default: return 0;
1800 }
1801 return lj_ir_kint(J, k);
1802}
1803
1804/* Turn: string.sub(str, a, b) == kstr
1805** into: string.byte(str, a) == string.byte(kstr, 1) etc.
1806** Note: this creates unaligned XLOADs on x86/x64.
1807*/
1808LJFOLD(EQ SNEW KGC)
1809LJFOLD(NE SNEW KGC)
1810LJFOLDF(merge_eqne_snew_kgc)
1811{
1812 GCstr *kstr = ir_kstr(fright);
1813 int32_t len = (int32_t)kstr->len;
1814 lua_assert(irt_isstr(fins->t));
1815
1816#if LJ_TARGET_UNALIGNED
1817#define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
1818#define FOLD_SNEW_TYPE8 IRT_I8 /* Creates shorter immediates. */
1819#else
1820#define FOLD_SNEW_MAX_LEN 1 /* Handle string lengths 0 or 1. */
1821#define FOLD_SNEW_TYPE8 IRT_U8 /* Prefer unsigned loads. */
1822#endif
1823
1824 PHIBARRIER(fleft);
1825 if (len <= FOLD_SNEW_MAX_LEN) {
1826 IROp op = (IROp)fins->o;
1827 IRRef strref = fleft->op1;
1828 lua_assert(IR(strref)->o == IR_STRREF);
1829 if (op == IR_EQ) {
1830 emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len));
1831 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
1832 } else {
1833 /* NE is not expanded since this would need an OR of two conds. */
1834 if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */
1835 return NEXTFOLD;
1836 if (IR(fleft->op2)->i != len)
1837 return DROPFOLD;
1838 }
1839 if (len > 0) {
1840 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
1841 uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, FOLD_SNEW_TYPE8) :
1842 len == 2 ? IRT(IR_XLOAD, IRT_U16) :
1843 IRTI(IR_XLOAD));
1844 TRef tmp = emitir(ot, strref,
1845 IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
1846 TRef val = kfold_xload(J, IR(tref_ref(tmp)), strdata(kstr));
1847 if (len == 3)
1848 tmp = emitir(IRTI(IR_BAND), tmp,
1849 lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
1850 fins->op1 = (IRRef1)tmp;
1851 fins->op2 = (IRRef1)val;
1852 fins->ot = (IROpT)IRTGI(op);
1853 return RETRYFOLD;
1854 } else {
1855 return DROPFOLD;
1856 }
1857 }
1858 return NEXTFOLD;
1859}
1860
1861/* -- Loads --------------------------------------------------------------- */
1862
1863/* Loads cannot be folded or passed on to CSE in general.
1864** Alias analysis is needed to check for forwarding opportunities.
1865**
1866** Caveat: *all* loads must be listed here or they end up at CSE!
1867*/
1868
1869LJFOLD(ALOAD any)
1870LJFOLDX(lj_opt_fwd_aload)
1871
1872/* From HREF fwd (see below). Must eliminate, not supported by fwd/backend. */
1873LJFOLD(HLOAD KKPTR)
1874LJFOLDF(kfold_hload_kkptr)
1875{
1876 UNUSED(J);
1877 lua_assert(ir_kptr(fleft) == niltvg(J2G(J)));
1878 return TREF_NIL;
1879}
1880
1881LJFOLD(HLOAD any)
1882LJFOLDX(lj_opt_fwd_hload)
1883
1884LJFOLD(ULOAD any)
1885LJFOLDX(lj_opt_fwd_uload)
1886
1887LJFOLD(CALLL any IRCALL_lj_tab_len)
1888LJFOLDX(lj_opt_fwd_tab_len)
1889
1890/* Upvalue refs are really loads, but there are no corresponding stores.
1891** So CSE is ok for them, except for UREFO across a GC step (see below).
1892** If the referenced function is const, its upvalue addresses are const, too.
1893** This can be used to improve CSE by looking for the same address,
1894** even if the upvalues originate from a different function.
1895*/
1896LJFOLD(UREFO KGC any)
1897LJFOLD(UREFC KGC any)
1898LJFOLDF(cse_uref)
1899{
1900 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1901 IRRef ref = J->chain[fins->o];
1902 GCfunc *fn = ir_kfunc(fleft);
1903 GCupval *uv = gco2uv(gcref(fn->l.uvptr[(fins->op2 >> 8)]));
1904 while (ref > 0) {
1905 IRIns *ir = IR(ref);
1906 if (irref_isk(ir->op1)) {
1907 GCfunc *fn2 = ir_kfunc(IR(ir->op1));
1908 if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
1909 if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
1910 break;
1911 return ref;
1912 }
1913 }
1914 ref = ir->prev;
1915 }
1916 }
1917 return EMITFOLD;
1918}
1919
1920LJFOLD(HREFK any any)
1921LJFOLDX(lj_opt_fwd_hrefk)
1922
1923LJFOLD(HREF TNEW any)
1924LJFOLDF(fwd_href_tnew)
1925{
1926 if (lj_opt_fwd_href_nokey(J))
1927 return lj_ir_kkptr(J, niltvg(J2G(J)));
1928 return NEXTFOLD;
1929}
1930
1931LJFOLD(HREF TDUP KPRI)
1932LJFOLD(HREF TDUP KGC)
1933LJFOLD(HREF TDUP KNUM)
1934LJFOLDF(fwd_href_tdup)
1935{
1936 TValue keyv;
1937 lj_ir_kvalue(J->L, &keyv, fright);
1938 if (lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv) == niltvg(J2G(J)) &&
1939 lj_opt_fwd_href_nokey(J))
1940 return lj_ir_kkptr(J, niltvg(J2G(J)));
1941 return NEXTFOLD;
1942}
1943
1944/* We can safely FOLD/CSE array/hash refs and field loads, since there
1945** are no corresponding stores. But we need to check for any NEWREF with
1946** an aliased table, as it may invalidate all of the pointers and fields.
1947** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1948** FLOADs. And NEWREF itself is treated like a store (see below).
1949*/
1950LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1951LJFOLDF(fload_tab_tnew_asize)
1952{
1953 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1954 return INTFOLD(fleft->op1);
1955 return NEXTFOLD;
1956}
1957
1958LJFOLD(FLOAD TNEW IRFL_TAB_HMASK)
1959LJFOLDF(fload_tab_tnew_hmask)
1960{
1961 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1962 return INTFOLD((1 << fleft->op2)-1);
1963 return NEXTFOLD;
1964}
1965
1966LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE)
1967LJFOLDF(fload_tab_tdup_asize)
1968{
1969 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1970 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize);
1971 return NEXTFOLD;
1972}
1973
1974LJFOLD(FLOAD TDUP IRFL_TAB_HMASK)
1975LJFOLDF(fload_tab_tdup_hmask)
1976{
1977 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1978 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask);
1979 return NEXTFOLD;
1980}
1981
1982LJFOLD(HREF any any)
1983LJFOLD(FLOAD any IRFL_TAB_ARRAY)
1984LJFOLD(FLOAD any IRFL_TAB_NODE)
1985LJFOLD(FLOAD any IRFL_TAB_ASIZE)
1986LJFOLD(FLOAD any IRFL_TAB_HMASK)
1987LJFOLDF(fload_tab_ah)
1988{
1989 TRef tr = lj_opt_cse(J);
1990 return lj_opt_fwd_tptr(J, tref_ref(tr)) ? tr : EMITFOLD;
1991}
1992
1993/* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
1994LJFOLD(FLOAD KGC IRFL_STR_LEN)
1995LJFOLDF(fload_str_len_kgc)
1996{
1997 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1998 return INTFOLD((int32_t)ir_kstr(fleft)->len);
1999 return NEXTFOLD;
2000}
2001
2002LJFOLD(FLOAD SNEW IRFL_STR_LEN)
2003LJFOLDF(fload_str_len_snew)
2004{
2005 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2006 PHIBARRIER(fleft);
2007 return fleft->op2;
2008 }
2009 return NEXTFOLD;
2010}
2011
2012/* The C type ID of cdata objects is immutable. */
2013LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
2014LJFOLDF(fload_cdata_typeid_kgc)
2015{
2016 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2017 return INTFOLD((int32_t)ir_kcdata(fleft)->ctypeid);
2018 return NEXTFOLD;
2019}
2020
2021/* Get the contents of immutable cdata objects. */
2022LJFOLD(FLOAD KGC IRFL_CDATA_PTR)
2023LJFOLD(FLOAD KGC IRFL_CDATA_INT)
2024LJFOLD(FLOAD KGC IRFL_CDATA_INT64)
2025LJFOLDF(fload_cdata_int64_kgc)
2026{
2027 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2028 void *p = cdataptr(ir_kcdata(fleft));
2029 if (irt_is64(fins->t))
2030 return INT64FOLD(*(uint64_t *)p);
2031 else
2032 return INTFOLD(*(int32_t *)p);
2033 }
2034 return NEXTFOLD;
2035}
2036
2037LJFOLD(FLOAD CNEW IRFL_CDATA_CTYPEID)
2038LJFOLD(FLOAD CNEWI IRFL_CDATA_CTYPEID)
2039LJFOLDF(fload_cdata_typeid_cnew)
2040{
2041 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2042 return fleft->op1; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */
2043 return NEXTFOLD;
2044}
2045
2046/* Pointer, int and int64 cdata objects are immutable. */
2047LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR)
2048LJFOLD(FLOAD CNEWI IRFL_CDATA_INT)
2049LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64)
2050LJFOLDF(fload_cdata_ptr_int64_cnew)
2051{
2052 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2053 return fleft->op2; /* Fold even across PHI to avoid allocations. */
2054 return NEXTFOLD;
2055}
2056
2057LJFOLD(FLOAD any IRFL_STR_LEN)
2058LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
2059LJFOLD(FLOAD any IRFL_CDATA_PTR)
2060LJFOLD(FLOAD any IRFL_CDATA_INT)
2061LJFOLD(FLOAD any IRFL_CDATA_INT64)
2062LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */
2063LJFOLDX(lj_opt_cse)
2064
2065/* All other field loads need alias analysis. */
2066LJFOLD(FLOAD any any)
2067LJFOLDX(lj_opt_fwd_fload)
2068
2069/* This is for LOOP only. Recording handles SLOADs internally. */
2070LJFOLD(SLOAD any any)
2071LJFOLDF(fwd_sload)
2072{
2073 if ((fins->op2 & IRSLOAD_FRAME)) {
2074 TRef tr = lj_opt_cse(J);
2075 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
2076 } else {
2077 lua_assert(J->slot[fins->op1] != 0);
2078 return J->slot[fins->op1];
2079 }
2080}
2081
2082/* Only fold for KKPTR. The pointer _and_ the contents must be const. */
2083LJFOLD(XLOAD KKPTR any)
2084LJFOLDF(xload_kptr)
2085{
2086 TRef tr = kfold_xload(J, fins, ir_kptr(fleft));
2087 return tr ? tr : NEXTFOLD;
2088}
2089
2090LJFOLD(XLOAD any any)
2091LJFOLDX(lj_opt_fwd_xload)
2092
2093/* -- Write barriers ------------------------------------------------------ */
2094
2095/* Write barriers are amenable to CSE, but not across any incremental
2096** GC steps.
2097**
2098** The same logic applies to open upvalue references, because a stack
2099** may be resized during a GC step (not the current stack, but maybe that
2100** of a coroutine).
2101*/
2102LJFOLD(TBAR any)
2103LJFOLD(OBAR any any)
2104LJFOLD(UREFO any any)
2105LJFOLDF(barrier_tab)
2106{
2107 TRef tr = lj_opt_cse(J);
2108 if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */
2109 return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */
2110 return tr;
2111}
2112
2113LJFOLD(TBAR TNEW)
2114LJFOLD(TBAR TDUP)
2115LJFOLDF(barrier_tnew_tdup)
2116{
2117 /* New tables are always white and never need a barrier. */
2118 if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */
2119 return NEXTFOLD;
2120 return DROPFOLD;
2121}
2122
2123/* -- Stores and allocations ---------------------------------------------- */
2124
2125/* Stores and allocations cannot be folded or passed on to CSE in general.
2126** But some stores can be eliminated with dead-store elimination (DSE).
2127**
2128** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
2129*/
2130
2131LJFOLD(ASTORE any any)
2132LJFOLD(HSTORE any any)
2133LJFOLDX(lj_opt_dse_ahstore)
2134
2135LJFOLD(USTORE any any)
2136LJFOLDX(lj_opt_dse_ustore)
2137
2138LJFOLD(FSTORE any any)
2139LJFOLDX(lj_opt_dse_fstore)
2140
2141LJFOLD(XSTORE any any)
2142LJFOLDX(lj_opt_dse_xstore)
2143
2144LJFOLD(NEWREF any any) /* Treated like a store. */
2145LJFOLD(CALLS any any)
2146LJFOLD(CALLL any any) /* Safeguard fallback. */
2147LJFOLD(CALLXS any any)
2148LJFOLD(XBAR)
2149LJFOLD(RETF any any) /* Modifies BASE. */
2150LJFOLD(TNEW any any)
2151LJFOLD(TDUP any)
2152LJFOLD(CNEW any any)
2153LJFOLD(XSNEW any any)
2154LJFOLDX(lj_ir_emit)
2155
2156/* ------------------------------------------------------------------------ */
2157
2158/* Every entry in the generated hash table is a 32 bit pattern:
2159**
2160** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
2161**
2162** xxxxxxxx = 8 bit index into fold function table
2163** iiiiiii = 7 bit folded instruction opcode
2164** lllllll = 7 bit left instruction opcode
2165** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
2166*/
2167
2168#include "lj_folddef.h"
2169
2170/* ------------------------------------------------------------------------ */
2171
2172/* Fold IR instruction. */
2173TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
2174{
2175 uint32_t key, any;
2176 IRRef ref;
2177
2178 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
2179 lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
2180 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT);
2181 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2182 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
2183 return lj_opt_cse(J);
2184
2185 /* No FOLD, forwarding or CSE? Emit raw IR for loads, except for SLOAD. */
2186 if ((J->flags & (JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE)) !=
2187 (JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE) &&
2188 irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD)
2189 return lj_ir_emit(J);
2190
2191 /* No FOLD or DSE? Emit raw IR for stores. */
2192 if ((J->flags & (JIT_F_OPT_FOLD|JIT_F_OPT_DSE)) !=
2193 (JIT_F_OPT_FOLD|JIT_F_OPT_DSE) &&
2194 irm_kind(lj_ir_mode[fins->o]) == IRM_S)
2195 return lj_ir_emit(J);
2196 }
2197
2198 /* Fold engine start/retry point. */
2199retry:
2200 /* Construct key from opcode and operand opcodes (unless literal/none). */
2201 key = ((uint32_t)fins->o << 17);
2202 if (fins->op1 >= J->cur.nk) {
2203 key += (uint32_t)IR(fins->op1)->o << 10;
2204 *fleft = *IR(fins->op1);
2205 }
2206 if (fins->op2 >= J->cur.nk) {
2207 key += (uint32_t)IR(fins->op2)->o;
2208 *fright = *IR(fins->op2);
2209 } else {
2210 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2211 }
2212
2213 /* Check for a match in order from most specific to least specific. */
2214 any = 0;
2215 for (;;) {
2216 uint32_t k = key | (any & 0x1ffff);
2217 uint32_t h = fold_hashkey(k);
2218 uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
2219 if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
2220 ref = (IRRef)tref_ref(fold_func[fh >> 24](J));
2221 if (ref != NEXTFOLD)
2222 break;
2223 }
2224 if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */
2225 return lj_opt_cse(J);
2226 any = (any | (any >> 10)) ^ 0xffc00;
2227 }
2228
2229 /* Return value processing, ordered by frequency. */
2230 if (LJ_LIKELY(ref >= MAX_FOLD))
2231 return TREF(ref, irt_t(IR(ref)->t));
2232 if (ref == RETRYFOLD)
2233 goto retry;
2234 if (ref == KINTFOLD)
2235 return lj_ir_kint(J, fins->i);
2236 if (ref == FAILFOLD)
2237 lj_trace_err(J, LJ_TRERR_GFAIL);
2238 lua_assert(ref == DROPFOLD);
2239 return REF_DROP;
2240}
2241
2242/* -- Common-Subexpression Elimination ------------------------------------ */
2243
2244/* CSE an IR instruction. This is very fast due to the skip-list chains. */
2245TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
2246{
2247 /* Avoid narrow to wide store-to-load forwarding stall */
2248 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2249 IROp op = fins->o;
2250 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2251 /* Limited search for same operands in per-opcode chain. */
2252 IRRef ref = J->chain[op];
2253 IRRef lim = fins->op1;
2254 if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */
2255 while (ref > lim) {
2256 if (IR(ref)->op12 == op12)
2257 return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */
2258 ref = IR(ref)->prev;
2259 }
2260 }
2261 /* Otherwise emit IR (inlined for speed). */
2262 {
2263 IRRef ref = lj_ir_nextins(J);
2264 IRIns *ir = IR(ref);
2265 ir->prev = J->chain[op];
2266 ir->op12 = op12;
2267 J->chain[op] = (IRRef1)ref;
2268 ir->o = fins->o;
2269 J->guardemit.irt |= fins->t.irt;
2270 return TREF(ref, irt_t((ir->t = fins->t)));
2271 }
2272}
2273
2274/* CSE with explicit search limit. */
2275TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
2276{
2277 IRRef ref = J->chain[fins->o];
2278 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2279 while (ref > lim) {
2280 if (IR(ref)->op12 == op12)
2281 return ref;
2282 ref = IR(ref)->prev;
2283 }
2284 return lj_ir_emit(J);
2285}
2286
2287/* ------------------------------------------------------------------------ */
2288
2289#undef IR
2290#undef fins
2291#undef fleft
2292#undef fright
2293#undef knumleft
2294#undef knumright
2295#undef emitir
2296
2297#endif
2298