1/*
2** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3** ABCelim: Array Bounds Check Elimination.
4** CSE: Common-Subexpression Elimination.
5** Copyright (C) 2005-2021 Mike Pall. See Copyright Notice in luajit.h
6*/
7
8#define lj_opt_fold_c
9#define LUA_CORE
10
11#include <math.h>
12
13#include "lj_obj.h"
14
15#if LJ_HASJIT
16
17#include "lj_buf.h"
18#include "lj_str.h"
19#include "lj_tab.h"
20#include "lj_ir.h"
21#include "lj_jit.h"
22#include "lj_ircall.h"
23#include "lj_iropt.h"
24#include "lj_trace.h"
25#if LJ_HASFFI
26#include "lj_ctype.h"
27#include "lj_carith.h"
28#endif
29#include "lj_vm.h"
30#include "lj_strscan.h"
31#include "lj_strfmt.h"
32
33/* Here's a short description how the FOLD engine processes instructions:
34**
35** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
36** The instruction and its operands are used to select matching fold rules.
37** These are applied iteratively until a fixed point is reached.
38**
39** The 8 bit opcode of the instruction itself plus the opcodes of the
40** two instructions referenced by its operands form a 24 bit key
41** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
42**
43** This key is used for partial matching against the fold rules. The
44** left/right operand fields of the key are successively masked with
45** the 'any' wildcard, from most specific to least specific:
46**
47** ins left right
48** ins any right
49** ins left any
50** ins any any
51**
52** The masked key is used to lookup a matching fold rule in a semi-perfect
53** hash table. If a matching rule is found, the related fold function is run.
54** Multiple rules can share the same fold function. A fold rule may return
55** one of several special values:
56**
57** - NEXTFOLD means no folding was applied, because an additional test
58** inside the fold function failed. Matching continues against less
59** specific fold rules. Finally the instruction is passed on to CSE.
60**
61** - RETRYFOLD means the instruction was modified in-place. Folding is
62** retried as if this instruction had just been received.
63**
64** All other return values are terminal actions -- no further folding is
65** applied:
66**
67** - INTFOLD(i) returns a reference to the integer constant i.
68**
69** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
70** without emitting an instruction.
71**
72** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
73** it without passing through any further optimizations.
74**
75** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
76** no result (e.g. guarded assertions): FAILFOLD means the guard would
77** always fail, i.e. the current trace is pointless. DROPFOLD means
78** the guard is always true and has been eliminated. CONDFOLD is a
79** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
80**
81** - Any other return value is interpreted as an IRRef or TRef. This
82** can be a reference to an existing or a newly created instruction.
83** Only the least-significant 16 bits (IRRef1) are used to form a TRef
84** which is finally returned to the caller.
85**
86** The FOLD engine receives instructions both from the trace recorder and
87** substituted instructions from LOOP unrolling. This means all types
88** of instructions may end up here, even though the recorder bypasses
89** FOLD in some cases. Thus all loads, stores and allocations must have
90** an any/any rule to avoid being passed on to CSE.
91**
92** Carefully read the following requirements before adding or modifying
93** any fold rules:
94**
95** Requirement #1: All fold rules must preserve their destination type.
96**
97** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
98** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
99**
100** Requirement #2: Fold rules should not create *new* instructions which
101** reference operands *across* PHIs.
102**
103** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
104** left operand is a PHI. Then fleft->op1 would point across the PHI
105** frontier to an invariant instruction. Adding a PHI for this instruction
106** would be counterproductive. The solution is to add a barrier which
107** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
108** The only exception is for recurrences with high latencies like
109** repeated int->num->int conversions.
110**
111** One could relax this condition a bit if the referenced instruction is
112** a PHI, too. But this often leads to worse code due to excessive
113** register shuffling.
114**
115** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
116** Even returning fleft->op1 would be ok, because a new PHI will added,
117** if needed. But again, this leads to excessive register shuffling and
118** should be avoided.
119**
120** Requirement #3: The set of all fold rules must be monotonic to guarantee
121** termination.
122**
123** The goal is optimization, so one primarily wants to add strength-reducing
124** rules. This means eliminating an instruction or replacing an instruction
125** with one or more simpler instructions. Don't add fold rules which point
126** into the other direction.
127**
128** Some rules (like commutativity) do not directly reduce the strength of
129** an instruction, but enable other fold rules (e.g. by moving constants
130** to the right operand). These rules must be made unidirectional to avoid
131** cycles.
132**
133** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
134*/
135
136/* Some local macros to save typing. Undef'd at the end. */
137#define IR(ref) (&J->cur.ir[(ref)])
138#define fins (&J->fold.ins)
139#define fleft (J->fold.left)
140#define fright (J->fold.right)
141#define knumleft (ir_knum(fleft)->n)
142#define knumright (ir_knum(fright)->n)
143
144/* Pass IR on to next optimization in chain (FOLD). */
145#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
146
147/* Fold function type. Fastcall on x86 significantly reduces their size. */
148typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
149
150/* Macros for the fold specs, so buildvm can recognize them. */
151#define LJFOLD(x)
152#define LJFOLDX(x)
153#define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J)
154/* Note: They must be at the start of a line or buildvm ignores them! */
155
156/* Barrier to prevent using operands across PHIs. */
157#define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
158
159/* Barrier to prevent folding across a GC step.
160** GC steps can only happen at the head of a trace and at LOOP.
161** And the GC is only driven forward if there's at least one allocation.
162*/
163#define gcstep_barrier(J, ref) \
164 ((ref) < J->chain[IR_LOOP] && \
165 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
166 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
167 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
168 J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA]))
169
170/* -- Constant folding for FP numbers ------------------------------------- */
171
172LJFOLD(ADD KNUM KNUM)
173LJFOLD(SUB KNUM KNUM)
174LJFOLD(MUL KNUM KNUM)
175LJFOLD(DIV KNUM KNUM)
176LJFOLD(LDEXP KNUM KNUM)
177LJFOLD(MIN KNUM KNUM)
178LJFOLD(MAX KNUM KNUM)
179LJFOLDF(kfold_numarith)
180{
181 lua_Number a = knumleft;
182 lua_Number b = knumright;
183 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
184 return lj_ir_knum(J, y);
185}
186
187LJFOLD(NEG KNUM FLOAD)
188LJFOLD(ABS KNUM FLOAD)
189LJFOLDF(kfold_numabsneg)
190{
191 lua_Number a = knumleft;
192 lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD);
193 return lj_ir_knum(J, y);
194}
195
196LJFOLD(LDEXP KNUM KINT)
197LJFOLDF(kfold_ldexp)
198{
199#if LJ_TARGET_X86ORX64
200 UNUSED(J);
201 return NEXTFOLD;
202#else
203 return lj_ir_knum(J, ldexp(knumleft, fright->i));
204#endif
205}
206
207LJFOLD(FPMATH KNUM any)
208LJFOLDF(kfold_fpmath)
209{
210 lua_Number a = knumleft;
211 lua_Number y = lj_vm_foldfpm(a, fins->op2);
212 return lj_ir_knum(J, y);
213}
214
215LJFOLD(CALLN KNUM any)
216LJFOLDF(kfold_fpcall1)
217{
218 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
219 if (CCI_TYPE(ci) == IRT_NUM) {
220 double y = ((double (*)(double))ci->func)(knumleft);
221 return lj_ir_knum(J, y);
222 }
223 return NEXTFOLD;
224}
225
226LJFOLD(CALLN CARG IRCALL_atan2)
227LJFOLDF(kfold_fpcall2)
228{
229 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
230 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
231 double a = ir_knum(IR(fleft->op1))->n;
232 double b = ir_knum(IR(fleft->op2))->n;
233 double y = ((double (*)(double, double))ci->func)(a, b);
234 return lj_ir_knum(J, y);
235 }
236 return NEXTFOLD;
237}
238
239LJFOLD(POW KNUM KINT)
240LJFOLD(POW KNUM KNUM)
241LJFOLDF(kfold_numpow)
242{
243 lua_Number a = knumleft;
244 lua_Number b = fright->o == IR_KINT ? (lua_Number)fright->i : knumright;
245 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
246 return lj_ir_knum(J, y);
247}
248
249/* Must not use kfold_kref for numbers (could be NaN). */
250LJFOLD(EQ KNUM KNUM)
251LJFOLD(NE KNUM KNUM)
252LJFOLD(LT KNUM KNUM)
253LJFOLD(GE KNUM KNUM)
254LJFOLD(LE KNUM KNUM)
255LJFOLD(GT KNUM KNUM)
256LJFOLD(ULT KNUM KNUM)
257LJFOLD(UGE KNUM KNUM)
258LJFOLD(ULE KNUM KNUM)
259LJFOLD(UGT KNUM KNUM)
260LJFOLDF(kfold_numcomp)
261{
262 return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o));
263}
264
265/* -- Constant folding for 32 bit integers -------------------------------- */
266
267static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
268{
269 switch (op) {
270 case IR_ADD: k1 += k2; break;
271 case IR_SUB: k1 -= k2; break;
272 case IR_MUL: k1 *= k2; break;
273 case IR_MOD: k1 = lj_vm_modi(k1, k2); break;
274 case IR_NEG: k1 = -k1; break;
275 case IR_BAND: k1 &= k2; break;
276 case IR_BOR: k1 |= k2; break;
277 case IR_BXOR: k1 ^= k2; break;
278 case IR_BSHL: k1 <<= (k2 & 31); break;
279 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break;
280 case IR_BSAR: k1 >>= (k2 & 31); break;
281 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break;
282 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
283 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
284 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
285 default: lj_assertX(0, "bad IR op %d", op); break;
286 }
287 return k1;
288}
289
290LJFOLD(ADD KINT KINT)
291LJFOLD(SUB KINT KINT)
292LJFOLD(MUL KINT KINT)
293LJFOLD(MOD KINT KINT)
294LJFOLD(NEG KINT KINT)
295LJFOLD(BAND KINT KINT)
296LJFOLD(BOR KINT KINT)
297LJFOLD(BXOR KINT KINT)
298LJFOLD(BSHL KINT KINT)
299LJFOLD(BSHR KINT KINT)
300LJFOLD(BSAR KINT KINT)
301LJFOLD(BROL KINT KINT)
302LJFOLD(BROR KINT KINT)
303LJFOLD(MIN KINT KINT)
304LJFOLD(MAX KINT KINT)
305LJFOLDF(kfold_intarith)
306{
307 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
308}
309
310LJFOLD(ADDOV KINT KINT)
311LJFOLD(SUBOV KINT KINT)
312LJFOLD(MULOV KINT KINT)
313LJFOLDF(kfold_intovarith)
314{
315 lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i,
316 fins->o - IR_ADDOV);
317 int32_t k = lj_num2int(n);
318 if (n != (lua_Number)k)
319 return FAILFOLD;
320 return INTFOLD(k);
321}
322
323LJFOLD(BNOT KINT)
324LJFOLDF(kfold_bnot)
325{
326 return INTFOLD(~fleft->i);
327}
328
329LJFOLD(BSWAP KINT)
330LJFOLDF(kfold_bswap)
331{
332 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i));
333}
334
335LJFOLD(LT KINT KINT)
336LJFOLD(GE KINT KINT)
337LJFOLD(LE KINT KINT)
338LJFOLD(GT KINT KINT)
339LJFOLD(ULT KINT KINT)
340LJFOLD(UGE KINT KINT)
341LJFOLD(ULE KINT KINT)
342LJFOLD(UGT KINT KINT)
343LJFOLD(ABC KINT KINT)
344LJFOLDF(kfold_intcomp)
345{
346 int32_t a = fleft->i, b = fright->i;
347 switch ((IROp)fins->o) {
348 case IR_LT: return CONDFOLD(a < b);
349 case IR_GE: return CONDFOLD(a >= b);
350 case IR_LE: return CONDFOLD(a <= b);
351 case IR_GT: return CONDFOLD(a > b);
352 case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b);
353 case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b);
354 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
355 case IR_ABC:
356 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
357 default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
358 }
359}
360
361LJFOLD(UGE any KINT)
362LJFOLDF(kfold_intcomp0)
363{
364 if (fright->i == 0)
365 return DROPFOLD;
366 return NEXTFOLD;
367}
368
369/* -- Constant folding for 64 bit integers -------------------------------- */
370
371static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
372 IROp op)
373{
374 UNUSED(J);
375#if LJ_HASFFI
376 switch (op) {
377 case IR_ADD: k1 += k2; break;
378 case IR_SUB: k1 -= k2; break;
379 case IR_MUL: k1 *= k2; break;
380 case IR_BAND: k1 &= k2; break;
381 case IR_BOR: k1 |= k2; break;
382 case IR_BXOR: k1 ^= k2; break;
383 case IR_BSHL: k1 <<= (k2 & 63); break;
384 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break;
385 case IR_BSAR: k1 >>= (k2 & 63); break;
386 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break;
387 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break;
388 default: lj_assertJ(0, "bad IR op %d", op); break;
389 }
390#else
391 UNUSED(k2); UNUSED(op);
392 lj_assertJ(0, "FFI IR op without FFI");
393#endif
394 return k1;
395}
396
397LJFOLD(ADD KINT64 KINT64)
398LJFOLD(SUB KINT64 KINT64)
399LJFOLD(MUL KINT64 KINT64)
400LJFOLD(BAND KINT64 KINT64)
401LJFOLD(BOR KINT64 KINT64)
402LJFOLD(BXOR KINT64 KINT64)
403LJFOLDF(kfold_int64arith)
404{
405 return INT64FOLD(kfold_int64arith(J, ir_k64(fleft)->u64,
406 ir_k64(fright)->u64, (IROp)fins->o));
407}
408
409LJFOLD(DIV KINT64 KINT64)
410LJFOLD(MOD KINT64 KINT64)
411LJFOLD(POW KINT64 KINT64)
412LJFOLDF(kfold_int64arith2)
413{
414#if LJ_HASFFI
415 uint64_t k1 = ir_k64(fleft)->u64, k2 = ir_k64(fright)->u64;
416 if (irt_isi64(fins->t)) {
417 k1 = fins->o == IR_DIV ? lj_carith_divi64((int64_t)k1, (int64_t)k2) :
418 fins->o == IR_MOD ? lj_carith_modi64((int64_t)k1, (int64_t)k2) :
419 lj_carith_powi64((int64_t)k1, (int64_t)k2);
420 } else {
421 k1 = fins->o == IR_DIV ? lj_carith_divu64(k1, k2) :
422 fins->o == IR_MOD ? lj_carith_modu64(k1, k2) :
423 lj_carith_powu64(k1, k2);
424 }
425 return INT64FOLD(k1);
426#else
427 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
428#endif
429}
430
431LJFOLD(BSHL KINT64 KINT)
432LJFOLD(BSHR KINT64 KINT)
433LJFOLD(BSAR KINT64 KINT)
434LJFOLD(BROL KINT64 KINT)
435LJFOLD(BROR KINT64 KINT)
436LJFOLDF(kfold_int64shift)
437{
438#if LJ_HASFFI
439 uint64_t k = ir_k64(fleft)->u64;
440 int32_t sh = (fright->i & 63);
441 return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
442#else
443 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
444#endif
445}
446
447LJFOLD(BNOT KINT64)
448LJFOLDF(kfold_bnot64)
449{
450#if LJ_HASFFI
451 return INT64FOLD(~ir_k64(fleft)->u64);
452#else
453 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
454#endif
455}
456
457LJFOLD(BSWAP KINT64)
458LJFOLDF(kfold_bswap64)
459{
460#if LJ_HASFFI
461 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
462#else
463 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
464#endif
465}
466
467LJFOLD(LT KINT64 KINT64)
468LJFOLD(GE KINT64 KINT64)
469LJFOLD(LE KINT64 KINT64)
470LJFOLD(GT KINT64 KINT64)
471LJFOLD(ULT KINT64 KINT64)
472LJFOLD(UGE KINT64 KINT64)
473LJFOLD(ULE KINT64 KINT64)
474LJFOLD(UGT KINT64 KINT64)
475LJFOLDF(kfold_int64comp)
476{
477#if LJ_HASFFI
478 uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64;
479 switch ((IROp)fins->o) {
480 case IR_LT: return CONDFOLD((int64_t)a < (int64_t)b);
481 case IR_GE: return CONDFOLD((int64_t)a >= (int64_t)b);
482 case IR_LE: return CONDFOLD((int64_t)a <= (int64_t)b);
483 case IR_GT: return CONDFOLD((int64_t)a > (int64_t)b);
484 case IR_ULT: return CONDFOLD(a < b);
485 case IR_UGE: return CONDFOLD(a >= b);
486 case IR_ULE: return CONDFOLD(a <= b);
487 case IR_UGT: return CONDFOLD(a > b);
488 default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
489 }
490#else
491 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
492#endif
493}
494
495LJFOLD(UGE any KINT64)
496LJFOLDF(kfold_int64comp0)
497{
498#if LJ_HASFFI
499 if (ir_k64(fright)->u64 == 0)
500 return DROPFOLD;
501 return NEXTFOLD;
502#else
503 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
504#endif
505}
506
507/* -- Constant folding for strings ---------------------------------------- */
508
509LJFOLD(SNEW KKPTR KINT)
510LJFOLDF(kfold_snew_kptr)
511{
512 GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
513 return lj_ir_kstr(J, s);
514}
515
516LJFOLD(SNEW any KINT)
517LJFOLDF(kfold_snew_empty)
518{
519 if (fright->i == 0)
520 return lj_ir_kstr(J, &J2G(J)->strempty);
521 return NEXTFOLD;
522}
523
524LJFOLD(STRREF KGC KINT)
525LJFOLDF(kfold_strref)
526{
527 GCstr *str = ir_kstr(fleft);
528 lj_assertJ((MSize)fright->i <= str->len, "bad string ref");
529 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
530}
531
532LJFOLD(STRREF SNEW any)
533LJFOLDF(kfold_strref_snew)
534{
535 PHIBARRIER(fleft);
536 if (irref_isk(fins->op2) && fright->i == 0) {
537 return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */
538 } else {
539 /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
540 IRIns *ir = IR(fleft->op1);
541 if (ir->o == IR_STRREF) {
542 IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */
543 PHIBARRIER(ir);
544 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
545 fins->op1 = str;
546 fins->ot = IRT(IR_STRREF, IRT_PGC);
547 return RETRYFOLD;
548 }
549 }
550 return NEXTFOLD;
551}
552
553LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
554LJFOLDF(kfold_strcmp)
555{
556 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
557 GCstr *a = ir_kstr(IR(fleft->op1));
558 GCstr *b = ir_kstr(IR(fleft->op2));
559 return INTFOLD(lj_str_cmp(a, b));
560 }
561 return NEXTFOLD;
562}
563
564/* -- Constant folding and forwarding for buffers ------------------------- */
565
566/*
567** Buffer ops perform stores, but their effect is limited to the buffer
568** itself. Also, buffer ops are chained: a use of an op implies a use of
569** all other ops up the chain. Conversely, if an op is unused, all ops
570** up the chain can go unsed. This largely eliminates the need to treat
571** them as stores.
572**
573** Alas, treating them as normal (IRM_N) ops doesn't work, because they
574** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
575** or if FOLD is disabled.
576**
577** The compromise is to declare them as loads, emit them like stores and
578** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
579** fragments left over from CSE are eliminated by DCE.
580*/
581
582/* BUFHDR is emitted like a store, see below. */
583
584LJFOLD(BUFPUT BUFHDR BUFSTR)
585LJFOLDF(bufput_append)
586{
587 /* New buffer, no other buffer op inbetween and same buffer? */
588 if ((J->flags & JIT_F_OPT_FWD) &&
589 !(fleft->op2 & IRBUFHDR_APPEND) &&
590 fleft->prev == fright->op2 &&
591 fleft->op1 == IR(fright->op2)->op1) {
592 IRRef ref = fins->op1;
593 IR(ref)->op2 = (fleft->op2 | IRBUFHDR_APPEND); /* Modify BUFHDR. */
594 IR(ref)->op1 = fright->op1;
595 return ref;
596 }
597 return EMITFOLD; /* Always emit, CSE later. */
598}
599
600LJFOLD(BUFPUT any any)
601LJFOLDF(bufput_kgc)
602{
603 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) {
604 GCstr *s2 = ir_kstr(fright);
605 if (s2->len == 0) { /* Empty string? */
606 return LEFTFOLD;
607 } else {
608 if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) &&
609 !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */
610 GCstr *s1 = ir_kstr(IR(fleft->op2));
611 IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2));
612 /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
613 IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */
614 return fins->op1;
615 }
616 }
617 }
618 return EMITFOLD; /* Always emit, CSE later. */
619}
620
621LJFOLD(BUFSTR any any)
622LJFOLDF(bufstr_kfold_cse)
623{
624 lj_assertJ(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
625 fleft->o == IR_CALLL,
626 "bad buffer constructor IR op %d", fleft->o);
627 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
628 if (fleft->o == IR_BUFHDR) { /* No put operations? */
629 if (!(fleft->op2 & IRBUFHDR_APPEND)) /* Empty buffer? */
630 return lj_ir_kstr(J, &J2G(J)->strempty);
631 fins->op1 = fleft->op1;
632 fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */
633 return CSEFOLD;
634 } else if (fleft->o == IR_BUFPUT) {
635 IRIns *irb = IR(fleft->op1);
636 if (irb->o == IR_BUFHDR && !(irb->op2 & IRBUFHDR_APPEND))
637 return fleft->op2; /* Shortcut for a single put operation. */
638 }
639 }
640 /* Try to CSE the whole chain. */
641 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
642 IRRef ref = J->chain[IR_BUFSTR];
643 while (ref) {
644 IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
645 while (ira->o == irb->o && ira->op2 == irb->op2) {
646 lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
647 ira->o == IR_CALLL || ira->o == IR_CARG,
648 "bad buffer constructor IR op %d", ira->o);
649 if (ira->o == IR_BUFHDR && !(ira->op2 & IRBUFHDR_APPEND))
650 return ref; /* CSE succeeded. */
651 if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
652 break;
653 ira = IR(ira->op1);
654 irb = IR(irb->op1);
655 }
656 ref = irs->prev;
657 }
658 }
659 return EMITFOLD; /* No CSE possible. */
660}
661
662LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse)
663LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)
664LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower)
665LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted)
666LJFOLDF(bufput_kfold_op)
667{
668 if (irref_isk(fleft->op2)) {
669 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
670 SBuf *sb = lj_buf_tmp_(J->L);
671 sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb,
672 ir_kstr(IR(fleft->op2)));
673 fins->o = IR_BUFPUT;
674 fins->op1 = fleft->op1;
675 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
676 return RETRYFOLD;
677 }
678 return EMITFOLD; /* Always emit, CSE later. */
679}
680
681LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep)
682LJFOLDF(bufput_kfold_rep)
683{
684 if (irref_isk(fleft->op2)) {
685 IRIns *irc = IR(fleft->op1);
686 if (irref_isk(irc->op2)) {
687 SBuf *sb = lj_buf_tmp_(J->L);
688 sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i);
689 fins->o = IR_BUFPUT;
690 fins->op1 = irc->op1;
691 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
692 return RETRYFOLD;
693 }
694 }
695 return EMITFOLD; /* Always emit, CSE later. */
696}
697
698LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint)
699LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)
700LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint)
701LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum)
702LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr)
703LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar)
704LJFOLDF(bufput_kfold_fmt)
705{
706 IRIns *irc = IR(fleft->op1);
707 lj_assertJ(irref_isk(irc->op2), "SFormat must be const");
708 if (irref_isk(fleft->op2)) {
709 SFormat sf = (SFormat)IR(irc->op2)->i;
710 IRIns *ira = IR(fleft->op2);
711 SBuf *sb = lj_buf_tmp_(J->L);
712 switch (fins->op2) {
713 case IRCALL_lj_strfmt_putfxint:
714 sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64);
715 break;
716 case IRCALL_lj_strfmt_putfstr:
717 sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira));
718 break;
719 case IRCALL_lj_strfmt_putfchar:
720 sb = lj_strfmt_putfchar(sb, sf, ira->i);
721 break;
722 case IRCALL_lj_strfmt_putfnum_int:
723 case IRCALL_lj_strfmt_putfnum_uint:
724 case IRCALL_lj_strfmt_putfnum:
725 default: {
726 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
727 sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf,
728 ir_knum(ira)->n);
729 break;
730 }
731 }
732 fins->o = IR_BUFPUT;
733 fins->op1 = irc->op1;
734 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
735 return RETRYFOLD;
736 }
737 return EMITFOLD; /* Always emit, CSE later. */
738}
739
740/* -- Constant folding of pointer arithmetic ------------------------------ */
741
742LJFOLD(ADD KGC KINT)
743LJFOLD(ADD KGC KINT64)
744LJFOLDF(kfold_add_kgc)
745{
746 GCobj *o = ir_kgc(fleft);
747#if LJ_64
748 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
749#else
750 ptrdiff_t ofs = fright->i;
751#endif
752#if LJ_HASFFI
753 if (irt_iscdata(fleft->t)) {
754 CType *ct = ctype_raw(ctype_ctsG(J2G(J)), gco2cd(o)->ctypeid);
755 if (ctype_isnum(ct->info) || ctype_isenum(ct->info) ||
756 ctype_isptr(ct->info) || ctype_isfunc(ct->info) ||
757 ctype_iscomplex(ct->info) || ctype_isvector(ct->info))
758 return lj_ir_kkptr(J, (char *)o + ofs);
759 }
760#endif
761 return lj_ir_kptr(J, (char *)o + ofs);
762}
763
764LJFOLD(ADD KPTR KINT)
765LJFOLD(ADD KPTR KINT64)
766LJFOLD(ADD KKPTR KINT)
767LJFOLD(ADD KKPTR KINT64)
768LJFOLDF(kfold_add_kptr)
769{
770 void *p = ir_kptr(fleft);
771#if LJ_64
772 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
773#else
774 ptrdiff_t ofs = fright->i;
775#endif
776 return lj_ir_kptr_(J, fleft->o, (char *)p + ofs);
777}
778
779LJFOLD(ADD any KGC)
780LJFOLD(ADD any KPTR)
781LJFOLD(ADD any KKPTR)
782LJFOLDF(kfold_add_kright)
783{
784 if (fleft->o == IR_KINT || fleft->o == IR_KINT64) {
785 IRRef1 tmp = fins->op1; fins->op1 = fins->op2; fins->op2 = tmp;
786 return RETRYFOLD;
787 }
788 return NEXTFOLD;
789}
790
791/* -- Constant folding of conversions ------------------------------------- */
792
793LJFOLD(TOBIT KNUM KNUM)
794LJFOLDF(kfold_tobit)
795{
796 return INTFOLD(lj_num2bit(knumleft));
797}
798
799LJFOLD(CONV KINT IRCONV_NUM_INT)
800LJFOLDF(kfold_conv_kint_num)
801{
802 return lj_ir_knum(J, (lua_Number)fleft->i);
803}
804
805LJFOLD(CONV KINT IRCONV_NUM_U32)
806LJFOLDF(kfold_conv_kintu32_num)
807{
808 return lj_ir_knum(J, (lua_Number)(uint32_t)fleft->i);
809}
810
811LJFOLD(CONV KINT IRCONV_INT_I8)
812LJFOLD(CONV KINT IRCONV_INT_U8)
813LJFOLD(CONV KINT IRCONV_INT_I16)
814LJFOLD(CONV KINT IRCONV_INT_U16)
815LJFOLDF(kfold_conv_kint_ext)
816{
817 int32_t k = fleft->i;
818 if ((fins->op2 & IRCONV_SRCMASK) == IRT_I8) k = (int8_t)k;
819 else if ((fins->op2 & IRCONV_SRCMASK) == IRT_U8) k = (uint8_t)k;
820 else if ((fins->op2 & IRCONV_SRCMASK) == IRT_I16) k = (int16_t)k;
821 else k = (uint16_t)k;
822 return INTFOLD(k);
823}
824
825LJFOLD(CONV KINT IRCONV_I64_INT)
826LJFOLD(CONV KINT IRCONV_U64_INT)
827LJFOLD(CONV KINT IRCONV_I64_U32)
828LJFOLD(CONV KINT IRCONV_U64_U32)
829LJFOLDF(kfold_conv_kint_i64)
830{
831 if ((fins->op2 & IRCONV_SEXT))
832 return INT64FOLD((uint64_t)(int64_t)fleft->i);
833 else
834 return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i);
835}
836
837LJFOLD(CONV KINT64 IRCONV_NUM_I64)
838LJFOLDF(kfold_conv_kint64_num_i64)
839{
840 return lj_ir_knum(J, (lua_Number)(int64_t)ir_kint64(fleft)->u64);
841}
842
843LJFOLD(CONV KINT64 IRCONV_NUM_U64)
844LJFOLDF(kfold_conv_kint64_num_u64)
845{
846 return lj_ir_knum(J, (lua_Number)ir_kint64(fleft)->u64);
847}
848
849LJFOLD(CONV KINT64 IRCONV_INT_I64)
850LJFOLD(CONV KINT64 IRCONV_U32_I64)
851LJFOLDF(kfold_conv_kint64_int_i64)
852{
853 return INTFOLD((int32_t)ir_kint64(fleft)->u64);
854}
855
856LJFOLD(CONV KNUM IRCONV_INT_NUM)
857LJFOLDF(kfold_conv_knum_int_num)
858{
859 lua_Number n = knumleft;
860 int32_t k = lj_num2int(n);
861 if (irt_isguard(fins->t) && n != (lua_Number)k) {
862 /* We're about to create a guard which always fails, like CONV +1.5.
863 ** Some pathological loops cause this during LICM, e.g.:
864 ** local x,k,t = 0,1.5,{1,[1.5]=2}
865 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
866 ** assert(x == 300)
867 */
868 return FAILFOLD;
869 }
870 return INTFOLD(k);
871}
872
873LJFOLD(CONV KNUM IRCONV_U32_NUM)
874LJFOLDF(kfold_conv_knum_u32_num)
875{
876#ifdef _MSC_VER
877 { /* Workaround for MSVC bug. */
878 volatile uint32_t u = (uint32_t)knumleft;
879 return INTFOLD((int32_t)u);
880 }
881#else
882 return INTFOLD((int32_t)(uint32_t)knumleft);
883#endif
884}
885
886LJFOLD(CONV KNUM IRCONV_I64_NUM)
887LJFOLDF(kfold_conv_knum_i64_num)
888{
889 return INT64FOLD((uint64_t)(int64_t)knumleft);
890}
891
892LJFOLD(CONV KNUM IRCONV_U64_NUM)
893LJFOLDF(kfold_conv_knum_u64_num)
894{
895 return INT64FOLD(lj_num2u64(knumleft));
896}
897
898LJFOLD(TOSTR KNUM any)
899LJFOLDF(kfold_tostr_knum)
900{
901 return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft)));
902}
903
904LJFOLD(TOSTR KINT any)
905LJFOLDF(kfold_tostr_kint)
906{
907 return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ?
908 lj_strfmt_int(J->L, fleft->i) :
909 lj_strfmt_char(J->L, fleft->i));
910}
911
912LJFOLD(STRTO KGC)
913LJFOLDF(kfold_strto)
914{
915 TValue n;
916 if (lj_strscan_num(ir_kstr(fleft), &n))
917 return lj_ir_knum(J, numV(&n));
918 return FAILFOLD;
919}
920
921/* -- Constant folding of equality checks --------------------------------- */
922
923/* Don't constant-fold away FLOAD checks against KNULL. */
924LJFOLD(EQ FLOAD KNULL)
925LJFOLD(NE FLOAD KNULL)
926LJFOLDX(lj_opt_cse)
927
928/* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
929LJFOLD(EQ any KNULL)
930LJFOLD(NE any KNULL)
931LJFOLD(EQ KNULL any)
932LJFOLD(NE KNULL any)
933LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */
934LJFOLD(NE KINT KINT)
935LJFOLD(EQ KINT64 KINT64)
936LJFOLD(NE KINT64 KINT64)
937LJFOLD(EQ KGC KGC)
938LJFOLD(NE KGC KGC)
939LJFOLDF(kfold_kref)
940{
941 return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE));
942}
943
944/* -- Algebraic shortcuts ------------------------------------------------- */
945
946LJFOLD(FPMATH FPMATH IRFPM_FLOOR)
947LJFOLD(FPMATH FPMATH IRFPM_CEIL)
948LJFOLD(FPMATH FPMATH IRFPM_TRUNC)
949LJFOLDF(shortcut_round)
950{
951 IRFPMathOp op = (IRFPMathOp)fleft->op2;
952 if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC)
953 return LEFTFOLD; /* round(round_left(x)) = round_left(x) */
954 return NEXTFOLD;
955}
956
957LJFOLD(ABS ABS FLOAD)
958LJFOLDF(shortcut_left)
959{
960 return LEFTFOLD; /* f(g(x)) ==> g(x) */
961}
962
963LJFOLD(ABS NEG FLOAD)
964LJFOLDF(shortcut_dropleft)
965{
966 PHIBARRIER(fleft);
967 fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */
968 return RETRYFOLD;
969}
970
971/* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
972LJFOLD(NEG NEG any)
973LJFOLD(BNOT BNOT)
974LJFOLD(BSWAP BSWAP)
975LJFOLDF(shortcut_leftleft)
976{
977 PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */
978 return fleft->op1; /* f(g(x)) ==> x */
979}
980
981/* -- FP algebraic simplifications ---------------------------------------- */
982
983/* FP arithmetic is tricky -- there's not much to simplify.
984** Please note the following common pitfalls before sending "improvements":
985** x+0 ==> x is INVALID for x=-0
986** 0-x ==> -x is INVALID for x=+0
987** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
988*/
989
990LJFOLD(ADD NEG any)
991LJFOLDF(simplify_numadd_negx)
992{
993 PHIBARRIER(fleft);
994 fins->o = IR_SUB; /* (-a) + b ==> b - a */
995 fins->op1 = fins->op2;
996 fins->op2 = fleft->op1;
997 return RETRYFOLD;
998}
999
1000LJFOLD(ADD any NEG)
1001LJFOLDF(simplify_numadd_xneg)
1002{
1003 PHIBARRIER(fright);
1004 fins->o = IR_SUB; /* a + (-b) ==> a - b */
1005 fins->op2 = fright->op1;
1006 return RETRYFOLD;
1007}
1008
1009LJFOLD(SUB any KNUM)
1010LJFOLDF(simplify_numsub_k)
1011{
1012 lua_Number n = knumright;
1013 if (n == 0.0) /* x - (+-0) ==> x */
1014 return LEFTFOLD;
1015 return NEXTFOLD;
1016}
1017
1018LJFOLD(SUB NEG KNUM)
1019LJFOLDF(simplify_numsub_negk)
1020{
1021 PHIBARRIER(fleft);
1022 fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */
1023 fins->op1 = (IRRef1)lj_ir_knum(J, -knumright);
1024 return RETRYFOLD;
1025}
1026
1027LJFOLD(SUB any NEG)
1028LJFOLDF(simplify_numsub_xneg)
1029{
1030 PHIBARRIER(fright);
1031 fins->o = IR_ADD; /* a - (-b) ==> a + b */
1032 fins->op2 = fright->op1;
1033 return RETRYFOLD;
1034}
1035
1036LJFOLD(MUL any KNUM)
1037LJFOLD(DIV any KNUM)
1038LJFOLDF(simplify_nummuldiv_k)
1039{
1040 lua_Number n = knumright;
1041 if (n == 1.0) { /* x o 1 ==> x */
1042 return LEFTFOLD;
1043 } else if (n == -1.0) { /* x o -1 ==> -x */
1044 IRRef op1 = fins->op1;
1045 fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */
1046 fins->op1 = op1;
1047 fins->o = IR_NEG;
1048 return RETRYFOLD;
1049 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
1050 fins->o = IR_ADD;
1051 fins->op2 = fins->op1;
1052 return RETRYFOLD;
1053 } else if (fins->o == IR_DIV) { /* x / 2^k ==> x * 2^-k */
1054 uint64_t u = ir_knum(fright)->u64;
1055 uint32_t ex = ((uint32_t)(u >> 52) & 0x7ff);
1056 if ((u & U64x(000fffff,ffffffff)) == 0 && ex - 1 < 0x7fd) {
1057 u = (u & ((uint64_t)1 << 63)) | ((uint64_t)(0x7fe - ex) << 52);
1058 fins->o = IR_MUL; /* Multiply by exact reciprocal. */
1059 fins->op2 = lj_ir_knum_u64(J, u);
1060 return RETRYFOLD;
1061 }
1062 }
1063 return NEXTFOLD;
1064}
1065
1066LJFOLD(MUL NEG KNUM)
1067LJFOLD(DIV NEG KNUM)
1068LJFOLDF(simplify_nummuldiv_negk)
1069{
1070 PHIBARRIER(fleft);
1071 fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */
1072 fins->op2 = (IRRef1)lj_ir_knum(J, -knumright);
1073 return RETRYFOLD;
1074}
1075
1076LJFOLD(MUL NEG NEG)
1077LJFOLD(DIV NEG NEG)
1078LJFOLDF(simplify_nummuldiv_negneg)
1079{
1080 PHIBARRIER(fleft);
1081 PHIBARRIER(fright);
1082 fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */
1083 fins->op2 = fright->op1;
1084 return RETRYFOLD;
1085}
1086
1087LJFOLD(POW any KINT)
1088LJFOLDF(simplify_numpow_xkint)
1089{
1090 int32_t k = fright->i;
1091 TRef ref = fins->op1;
1092 if (k == 0) /* x ^ 0 ==> 1 */
1093 return lj_ir_knum_one(J); /* Result must be a number, not an int. */
1094 if (k == 1) /* x ^ 1 ==> x */
1095 return LEFTFOLD;
1096 if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */
1097 return NEXTFOLD;
1098 if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
1099 ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
1100 k = -k;
1101 }
1102 /* Unroll x^k for 1 <= k <= 65536. */
1103 for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
1104 ref = emitir(IRTN(IR_MUL), ref, ref);
1105 if ((k >>= 1) != 0) { /* Handle trailing bits. */
1106 TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
1107 for (; k != 1; k >>= 1) {
1108 if (k & 1)
1109 ref = emitir(IRTN(IR_MUL), ref, tmp);
1110 tmp = emitir(IRTN(IR_MUL), tmp, tmp);
1111 }
1112 ref = emitir(IRTN(IR_MUL), ref, tmp);
1113 }
1114 return ref;
1115}
1116
1117LJFOLD(POW any KNUM)
1118LJFOLDF(simplify_numpow_xknum)
1119{
1120 if (knumright == 0.5) /* x ^ 0.5 ==> sqrt(x) */
1121 return emitir(IRTN(IR_FPMATH), fins->op1, IRFPM_SQRT);
1122 return NEXTFOLD;
1123}
1124
1125LJFOLD(POW KNUM any)
1126LJFOLDF(simplify_numpow_kx)
1127{
1128 lua_Number n = knumleft;
1129 if (n == 2.0 && irt_isint(fright->t)) { /* 2.0 ^ i ==> ldexp(1.0, i) */
1130#if LJ_TARGET_X86ORX64
1131 /* Different IR_LDEXP calling convention on x86/x64 requires conversion. */
1132 fins->o = IR_CONV;
1133 fins->op1 = fins->op2;
1134 fins->op2 = IRCONV_NUM_INT;
1135 fins->op2 = (IRRef1)lj_opt_fold(J);
1136#endif
1137 fins->op1 = (IRRef1)lj_ir_knum_one(J);
1138 fins->o = IR_LDEXP;
1139 return RETRYFOLD;
1140 }
1141 return NEXTFOLD;
1142}
1143
1144/* -- Simplify conversions ------------------------------------------------ */
1145
1146LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */
1147LJFOLDF(shortcut_conv_num_int)
1148{
1149 PHIBARRIER(fleft);
1150 /* Only safe with a guarded conversion to int. */
1151 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t))
1152 return fleft->op1; /* f(g(x)) ==> x */
1153 return NEXTFOLD;
1154}
1155
1156LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
1157LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/
1158LJFOLDF(simplify_conv_int_num)
1159{
1160 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1161 if ((fleft->op2 & IRCONV_SRCMASK) ==
1162 ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH))
1163 return fleft->op1;
1164 return NEXTFOLD;
1165}
1166
1167LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32 */
1168LJFOLD(CONV CONV IRCONV_U64_NUM) /* _INT or _U32 */
1169LJFOLDF(simplify_conv_i64_num)
1170{
1171 PHIBARRIER(fleft);
1172 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
1173 /* Reduce to a sign-extension. */
1174 fins->op1 = fleft->op1;
1175 fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
1176 return RETRYFOLD;
1177 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1178#if LJ_TARGET_X64
1179 return fleft->op1;
1180#else
1181 /* Reduce to a zero-extension. */
1182 fins->op1 = fleft->op1;
1183 fins->op2 = (IRT_I64<<5)|IRT_U32;
1184 return RETRYFOLD;
1185#endif
1186 }
1187 return NEXTFOLD;
1188}
1189
1190LJFOLD(CONV CONV IRCONV_INT_I64) /* _INT or _U32 */
1191LJFOLD(CONV CONV IRCONV_INT_U64) /* _INT or _U32 */
1192LJFOLD(CONV CONV IRCONV_U32_I64) /* _INT or _U32 */
1193LJFOLD(CONV CONV IRCONV_U32_U64) /* _INT or _U32 */
1194LJFOLDF(simplify_conv_int_i64)
1195{
1196 int src;
1197 PHIBARRIER(fleft);
1198 src = (fleft->op2 & IRCONV_SRCMASK);
1199 if (src == IRT_INT || src == IRT_U32) {
1200 if (src == ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH)) {
1201 return fleft->op1;
1202 } else {
1203 fins->op2 = ((fins->op2 & IRCONV_DSTMASK) | src);
1204 fins->op1 = fleft->op1;
1205 return RETRYFOLD;
1206 }
1207 }
1208 return NEXTFOLD;
1209}
1210
1211LJFOLD(CONV CONV IRCONV_FLOAT_NUM) /* _FLOAT */
1212LJFOLDF(simplify_conv_flt_num)
1213{
1214 PHIBARRIER(fleft);
1215 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_FLOAT)
1216 return fleft->op1;
1217 return NEXTFOLD;
1218}
1219
1220/* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1221LJFOLD(TOBIT CONV KNUM)
1222LJFOLDF(simplify_tobit_conv)
1223{
1224 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1225 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
1226 lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
1227 return fleft->op1;
1228 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1229 lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
1230 fins->o = IR_CONV;
1231 fins->op1 = fleft->op1;
1232 fins->op2 = (IRT_INT<<5)|IRT_U32;
1233 return RETRYFOLD;
1234 }
1235 return NEXTFOLD;
1236}
1237
1238/* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1239LJFOLD(FPMATH CONV IRFPM_FLOOR)
1240LJFOLD(FPMATH CONV IRFPM_CEIL)
1241LJFOLD(FPMATH CONV IRFPM_TRUNC)
1242LJFOLDF(simplify_floor_conv)
1243{
1244 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
1245 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
1246 return LEFTFOLD;
1247 return NEXTFOLD;
1248}
1249
1250/* Strength reduction of widening. */
1251LJFOLD(CONV any IRCONV_I64_INT)
1252LJFOLD(CONV any IRCONV_U64_INT)
1253LJFOLDF(simplify_conv_sext)
1254{
1255 IRRef ref = fins->op1;
1256 int64_t ofs = 0;
1257 if (!(fins->op2 & IRCONV_SEXT))
1258 return NEXTFOLD;
1259 PHIBARRIER(fleft);
1260 if (fleft->o == IR_XLOAD && (irt_isu8(fleft->t) || irt_isu16(fleft->t)))
1261 goto ok_reduce;
1262 if (fleft->o == IR_ADD && irref_isk(fleft->op2)) {
1263 ofs = (int64_t)IR(fleft->op2)->i;
1264 ref = fleft->op1;
1265 }
1266 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
1267 if (ref == J->scev.idx) {
1268 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
1269 lj_assertJ(irt_isint(J->scev.t), "only int SCEV supported");
1270 if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) {
1271 ok_reduce:
1272#if LJ_TARGET_X64
1273 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
1274 return LEFTFOLD;
1275#else
1276 /* Reduce to a (cheaper) zero-extension. */
1277 fins->op2 &= ~IRCONV_SEXT;
1278 return RETRYFOLD;
1279#endif
1280 }
1281 }
1282 return NEXTFOLD;
1283}
1284
1285/* Strength reduction of narrowing. */
1286LJFOLD(CONV ADD IRCONV_INT_I64)
1287LJFOLD(CONV SUB IRCONV_INT_I64)
1288LJFOLD(CONV MUL IRCONV_INT_I64)
1289LJFOLD(CONV ADD IRCONV_INT_U64)
1290LJFOLD(CONV SUB IRCONV_INT_U64)
1291LJFOLD(CONV MUL IRCONV_INT_U64)
1292LJFOLD(CONV ADD IRCONV_U32_I64)
1293LJFOLD(CONV SUB IRCONV_U32_I64)
1294LJFOLD(CONV MUL IRCONV_U32_I64)
1295LJFOLD(CONV ADD IRCONV_U32_U64)
1296LJFOLD(CONV SUB IRCONV_U32_U64)
1297LJFOLD(CONV MUL IRCONV_U32_U64)
1298LJFOLDF(simplify_conv_narrow)
1299{
1300 IROp op = (IROp)fleft->o;
1301 IRType t = irt_type(fins->t);
1302 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
1303 PHIBARRIER(fleft);
1304 op1 = emitir(IRT(IR_CONV, t), op1, mode);
1305 op2 = emitir(IRT(IR_CONV, t), op2, mode);
1306 fins->ot = IRT(op, t);
1307 fins->op1 = op1;
1308 fins->op2 = op2;
1309 return RETRYFOLD;
1310}
1311
1312/* Special CSE rule for CONV. */
1313LJFOLD(CONV any any)
1314LJFOLDF(cse_conv)
1315{
1316 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1317 IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
1318 uint8_t guard = irt_isguard(fins->t);
1319 IRRef ref = J->chain[IR_CONV];
1320 while (ref > op1) {
1321 IRIns *ir = IR(ref);
1322 /* Commoning with stronger checks is ok. */
1323 if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
1324 irt_isguard(ir->t) >= guard)
1325 return ref;
1326 ref = ir->prev;
1327 }
1328 }
1329 return EMITFOLD; /* No fallthrough to regular CSE. */
1330}
1331
1332/* FP conversion narrowing. */
1333LJFOLD(TOBIT ADD KNUM)
1334LJFOLD(TOBIT SUB KNUM)
1335LJFOLD(CONV ADD IRCONV_INT_NUM)
1336LJFOLD(CONV SUB IRCONV_INT_NUM)
1337LJFOLD(CONV ADD IRCONV_I64_NUM)
1338LJFOLD(CONV SUB IRCONV_I64_NUM)
1339LJFOLDF(narrow_convert)
1340{
1341 PHIBARRIER(fleft);
1342 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1343 if (J->chain[IR_LOOP])
1344 return NEXTFOLD;
1345 lj_assertJ(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT,
1346 "unexpected CONV TOBIT");
1347 return lj_opt_narrow_convert(J);
1348}
1349
1350/* -- Integer algebraic simplifications ----------------------------------- */
1351
1352LJFOLD(ADD any KINT)
1353LJFOLD(ADDOV any KINT)
1354LJFOLD(SUBOV any KINT)
1355LJFOLDF(simplify_intadd_k)
1356{
1357 if (fright->i == 0) /* i o 0 ==> i */
1358 return LEFTFOLD;
1359 return NEXTFOLD;
1360}
1361
1362LJFOLD(MULOV any KINT)
1363LJFOLDF(simplify_intmul_k)
1364{
1365 if (fright->i == 0) /* i * 0 ==> 0 */
1366 return RIGHTFOLD;
1367 if (fright->i == 1) /* i * 1 ==> i */
1368 return LEFTFOLD;
1369 if (fright->i == 2) { /* i * 2 ==> i + i */
1370 fins->o = IR_ADDOV;
1371 fins->op2 = fins->op1;
1372 return RETRYFOLD;
1373 }
1374 return NEXTFOLD;
1375}
1376
1377LJFOLD(SUB any KINT)
1378LJFOLDF(simplify_intsub_k)
1379{
1380 if (fright->i == 0) /* i - 0 ==> i */
1381 return LEFTFOLD;
1382 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1383 fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */
1384 return RETRYFOLD;
1385}
1386
1387LJFOLD(SUB KINT any)
1388LJFOLD(SUB KINT64 any)
1389LJFOLDF(simplify_intsub_kleft)
1390{
1391 if (fleft->o == IR_KINT ? (fleft->i == 0) : (ir_kint64(fleft)->u64 == 0)) {
1392 fins->o = IR_NEG; /* 0 - i ==> -i */
1393 fins->op1 = fins->op2;
1394 return RETRYFOLD;
1395 }
1396 return NEXTFOLD;
1397}
1398
1399LJFOLD(ADD any KINT64)
1400LJFOLDF(simplify_intadd_k64)
1401{
1402 if (ir_kint64(fright)->u64 == 0) /* i + 0 ==> i */
1403 return LEFTFOLD;
1404 return NEXTFOLD;
1405}
1406
1407LJFOLD(SUB any KINT64)
1408LJFOLDF(simplify_intsub_k64)
1409{
1410 uint64_t k = ir_kint64(fright)->u64;
1411 if (k == 0) /* i - 0 ==> i */
1412 return LEFTFOLD;
1413 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1414 fins->op2 = (IRRef1)lj_ir_kint64(J, (uint64_t)-(int64_t)k);
1415 return RETRYFOLD;
1416}
1417
1418static TRef simplify_intmul_k(jit_State *J, int32_t k)
1419{
1420 /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
1421 ** But this is mainly intended for simple address arithmetic.
1422 ** Also it's easier for the backend to optimize the original multiplies.
1423 */
1424 if (k == 0) { /* i * 0 ==> 0 */
1425 return RIGHTFOLD;
1426 } else if (k == 1) { /* i * 1 ==> i */
1427 return LEFTFOLD;
1428 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1429 fins->o = IR_BSHL;
1430 fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k));
1431 return RETRYFOLD;
1432 }
1433 return NEXTFOLD;
1434}
1435
1436LJFOLD(MUL any KINT)
1437LJFOLDF(simplify_intmul_k32)
1438{
1439 if (fright->i >= 0)
1440 return simplify_intmul_k(J, fright->i);
1441 return NEXTFOLD;
1442}
1443
1444LJFOLD(MUL any KINT64)
1445LJFOLDF(simplify_intmul_k64)
1446{
1447#if LJ_HASFFI
1448 if (ir_kint64(fright)->u64 < 0x80000000u)
1449 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1450 return NEXTFOLD;
1451#else
1452 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1453#endif
1454}
1455
1456LJFOLD(MOD any KINT)
1457LJFOLDF(simplify_intmod_k)
1458{
1459 int32_t k = fright->i;
1460 lj_assertJ(k != 0, "integer mod 0");
1461 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
1462 fins->o = IR_BAND;
1463 fins->op2 = lj_ir_kint(J, k-1);
1464 return RETRYFOLD;
1465 }
1466 return NEXTFOLD;
1467}
1468
1469LJFOLD(MOD KINT any)
1470LJFOLDF(simplify_intmod_kleft)
1471{
1472 if (fleft->i == 0)
1473 return INTFOLD(0);
1474 return NEXTFOLD;
1475}
1476
1477LJFOLD(SUB any any)
1478LJFOLD(SUBOV any any)
1479LJFOLDF(simplify_intsub)
1480{
1481 if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */
1482 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1483 return NEXTFOLD;
1484}
1485
1486LJFOLD(SUB ADD any)
1487LJFOLDF(simplify_intsubadd_leftcancel)
1488{
1489 if (!irt_isnum(fins->t)) {
1490 PHIBARRIER(fleft);
1491 if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */
1492 return fleft->op2;
1493 if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */
1494 return fleft->op1;
1495 }
1496 return NEXTFOLD;
1497}
1498
1499LJFOLD(SUB SUB any)
1500LJFOLDF(simplify_intsubsub_leftcancel)
1501{
1502 if (!irt_isnum(fins->t)) {
1503 PHIBARRIER(fleft);
1504 if (fins->op2 == fleft->op1) { /* (i - j) - i ==> 0 - j */
1505 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1506 fins->op2 = fleft->op2;
1507 return RETRYFOLD;
1508 }
1509 }
1510 return NEXTFOLD;
1511}
1512
1513LJFOLD(SUB any SUB)
1514LJFOLDF(simplify_intsubsub_rightcancel)
1515{
1516 if (!irt_isnum(fins->t)) {
1517 PHIBARRIER(fright);
1518 if (fins->op1 == fright->op1) /* i - (i - j) ==> j */
1519 return fright->op2;
1520 }
1521 return NEXTFOLD;
1522}
1523
1524LJFOLD(SUB any ADD)
1525LJFOLDF(simplify_intsubadd_rightcancel)
1526{
1527 if (!irt_isnum(fins->t)) {
1528 PHIBARRIER(fright);
1529 if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */
1530 fins->op2 = fright->op2;
1531 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1532 return RETRYFOLD;
1533 }
1534 if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */
1535 fins->op2 = fright->op1;
1536 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1537 return RETRYFOLD;
1538 }
1539 }
1540 return NEXTFOLD;
1541}
1542
1543LJFOLD(SUB ADD ADD)
1544LJFOLDF(simplify_intsubaddadd_cancel)
1545{
1546 if (!irt_isnum(fins->t)) {
1547 PHIBARRIER(fleft);
1548 PHIBARRIER(fright);
1549 if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */
1550 fins->op1 = fleft->op2;
1551 fins->op2 = fright->op2;
1552 return RETRYFOLD;
1553 }
1554 if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
1555 fins->op1 = fleft->op2;
1556 fins->op2 = fright->op1;
1557 return RETRYFOLD;
1558 }
1559 if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
1560 fins->op1 = fleft->op1;
1561 fins->op2 = fright->op2;
1562 return RETRYFOLD;
1563 }
1564 if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
1565 fins->op1 = fleft->op1;
1566 fins->op2 = fright->op1;
1567 return RETRYFOLD;
1568 }
1569 }
1570 return NEXTFOLD;
1571}
1572
1573LJFOLD(BAND any KINT)
1574LJFOLD(BAND any KINT64)
1575LJFOLDF(simplify_band_k)
1576{
1577 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1578 (int64_t)ir_k64(fright)->u64;
1579 if (k == 0) /* i & 0 ==> 0 */
1580 return RIGHTFOLD;
1581 if (k == -1) /* i & -1 ==> i */
1582 return LEFTFOLD;
1583 return NEXTFOLD;
1584}
1585
1586LJFOLD(BOR any KINT)
1587LJFOLD(BOR any KINT64)
1588LJFOLDF(simplify_bor_k)
1589{
1590 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1591 (int64_t)ir_k64(fright)->u64;
1592 if (k == 0) /* i | 0 ==> i */
1593 return LEFTFOLD;
1594 if (k == -1) /* i | -1 ==> -1 */
1595 return RIGHTFOLD;
1596 return NEXTFOLD;
1597}
1598
1599LJFOLD(BXOR any KINT)
1600LJFOLD(BXOR any KINT64)
1601LJFOLDF(simplify_bxor_k)
1602{
1603 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1604 (int64_t)ir_k64(fright)->u64;
1605 if (k == 0) /* i xor 0 ==> i */
1606 return LEFTFOLD;
1607 if (k == -1) { /* i xor -1 ==> ~i */
1608 fins->o = IR_BNOT;
1609 fins->op2 = 0;
1610 return RETRYFOLD;
1611 }
1612 return NEXTFOLD;
1613}
1614
1615LJFOLD(BSHL any KINT)
1616LJFOLD(BSHR any KINT)
1617LJFOLD(BSAR any KINT)
1618LJFOLD(BROL any KINT)
1619LJFOLD(BROR any KINT)
1620LJFOLDF(simplify_shift_ik)
1621{
1622 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1623 int32_t k = (fright->i & mask);
1624 if (k == 0) /* i o 0 ==> i */
1625 return LEFTFOLD;
1626 if (k == 1 && fins->o == IR_BSHL) { /* i << 1 ==> i + i */
1627 fins->o = IR_ADD;
1628 fins->op2 = fins->op1;
1629 return RETRYFOLD;
1630 }
1631 if (k != fright->i) { /* i o k ==> i o (k & mask) */
1632 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1633 return RETRYFOLD;
1634 }
1635#ifndef LJ_TARGET_UNIFYROT
1636 if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */
1637 fins->o = IR_BROL;
1638 fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask);
1639 return RETRYFOLD;
1640 }
1641#endif
1642 return NEXTFOLD;
1643}
1644
1645LJFOLD(BSHL any BAND)
1646LJFOLD(BSHR any BAND)
1647LJFOLD(BSAR any BAND)
1648LJFOLD(BROL any BAND)
1649LJFOLD(BROR any BAND)
1650LJFOLDF(simplify_shift_andk)
1651{
1652 IRIns *irk = IR(fright->op2);
1653 PHIBARRIER(fright);
1654 if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1655 irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */
1656 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1657 int32_t k = irk->i & mask;
1658 if (k == mask) {
1659 fins->op2 = fright->op1;
1660 return RETRYFOLD;
1661 }
1662 }
1663 return NEXTFOLD;
1664}
1665
1666LJFOLD(BSHL KINT any)
1667LJFOLD(BSHR KINT any)
1668LJFOLD(BSHL KINT64 any)
1669LJFOLD(BSHR KINT64 any)
1670LJFOLDF(simplify_shift1_ki)
1671{
1672 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1673 (int64_t)ir_k64(fleft)->u64;
1674 if (k == 0) /* 0 o i ==> 0 */
1675 return LEFTFOLD;
1676 return NEXTFOLD;
1677}
1678
1679LJFOLD(BSAR KINT any)
1680LJFOLD(BROL KINT any)
1681LJFOLD(BROR KINT any)
1682LJFOLD(BSAR KINT64 any)
1683LJFOLD(BROL KINT64 any)
1684LJFOLD(BROR KINT64 any)
1685LJFOLDF(simplify_shift2_ki)
1686{
1687 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1688 (int64_t)ir_k64(fleft)->u64;
1689 if (k == 0 || k == -1) /* 0 o i ==> 0; -1 o i ==> -1 */
1690 return LEFTFOLD;
1691 return NEXTFOLD;
1692}
1693
1694LJFOLD(BSHL BAND KINT)
1695LJFOLD(BSHR BAND KINT)
1696LJFOLD(BROL BAND KINT)
1697LJFOLD(BROR BAND KINT)
1698LJFOLDF(simplify_shiftk_andk)
1699{
1700 IRIns *irk = IR(fleft->op2);
1701 PHIBARRIER(fleft);
1702 if (irk->o == IR_KINT) { /* (i & k1) o k2 ==> (i o k2) & (k1 o k2) */
1703 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1704 fins->op1 = fleft->op1;
1705 fins->op1 = (IRRef1)lj_opt_fold(J);
1706 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1707 fins->ot = IRTI(IR_BAND);
1708 return RETRYFOLD;
1709 } else if (irk->o == IR_KINT64) {
1710 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, fright->i,
1711 (IROp)fins->o);
1712 IROpT ot = fleft->ot;
1713 fins->op1 = fleft->op1;
1714 fins->op1 = (IRRef1)lj_opt_fold(J);
1715 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1716 fins->ot = ot;
1717 return RETRYFOLD;
1718 }
1719 return NEXTFOLD;
1720}
1721
1722LJFOLD(BAND BSHL KINT)
1723LJFOLD(BAND BSHR KINT)
1724LJFOLDF(simplify_andk_shiftk)
1725{
1726 IRIns *irk = IR(fleft->op2);
1727 if (irk->o == IR_KINT &&
1728 kfold_intop(-1, irk->i, (IROp)fleft->o) == fright->i)
1729 return LEFTFOLD; /* (i o k1) & k2 ==> i, if (-1 o k1) == k2 */
1730 return NEXTFOLD;
1731}
1732
1733LJFOLD(BAND BOR KINT)
1734LJFOLD(BOR BAND KINT)
1735LJFOLDF(simplify_andor_k)
1736{
1737 IRIns *irk = IR(fleft->op2);
1738 PHIBARRIER(fleft);
1739 if (irk->o == IR_KINT) {
1740 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1741 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1742 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1743 if (k == (fins->o == IR_BAND ? 0 : -1)) {
1744 fins->op1 = fleft->op1;
1745 return RETRYFOLD;
1746 }
1747 }
1748 return NEXTFOLD;
1749}
1750
1751LJFOLD(BAND BOR KINT64)
1752LJFOLD(BOR BAND KINT64)
1753LJFOLDF(simplify_andor_k64)
1754{
1755#if LJ_HASFFI
1756 IRIns *irk = IR(fleft->op2);
1757 PHIBARRIER(fleft);
1758 if (irk->o == IR_KINT64) {
1759 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
1760 (IROp)fins->o);
1761 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1762 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1763 if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) {
1764 fins->op1 = fleft->op1;
1765 return RETRYFOLD;
1766 }
1767 }
1768 return NEXTFOLD;
1769#else
1770 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1771#endif
1772}
1773
1774/* -- Reassociation ------------------------------------------------------- */
1775
1776LJFOLD(ADD ADD KINT)
1777LJFOLD(MUL MUL KINT)
1778LJFOLD(BAND BAND KINT)
1779LJFOLD(BOR BOR KINT)
1780LJFOLD(BXOR BXOR KINT)
1781LJFOLDF(reassoc_intarith_k)
1782{
1783 IRIns *irk = IR(fleft->op2);
1784 if (irk->o == IR_KINT) {
1785 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1786 if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
1787 return LEFTFOLD;
1788 PHIBARRIER(fleft);
1789 fins->op1 = fleft->op1;
1790 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1791 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1792 }
1793 return NEXTFOLD;
1794}
1795
1796LJFOLD(ADD ADD KINT64)
1797LJFOLD(MUL MUL KINT64)
1798LJFOLD(BAND BAND KINT64)
1799LJFOLD(BOR BOR KINT64)
1800LJFOLD(BXOR BXOR KINT64)
1801LJFOLDF(reassoc_intarith_k64)
1802{
1803#if LJ_HASFFI
1804 IRIns *irk = IR(fleft->op2);
1805 if (irk->o == IR_KINT64) {
1806 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
1807 (IROp)fins->o);
1808 PHIBARRIER(fleft);
1809 fins->op1 = fleft->op1;
1810 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1811 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1812 }
1813 return NEXTFOLD;
1814#else
1815 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1816#endif
1817}
1818
1819LJFOLD(BAND BAND any)
1820LJFOLD(BOR BOR any)
1821LJFOLDF(reassoc_dup)
1822{
1823 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1824 return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
1825 return NEXTFOLD;
1826}
1827
1828LJFOLD(MIN MIN any)
1829LJFOLD(MAX MAX any)
1830LJFOLDF(reassoc_dup_minmax)
1831{
1832 if (fins->op2 == fleft->op2)
1833 return LEFTFOLD; /* (a o b) o b ==> a o b */
1834 return NEXTFOLD;
1835}
1836
1837LJFOLD(BXOR BXOR any)
1838LJFOLDF(reassoc_bxor)
1839{
1840 PHIBARRIER(fleft);
1841 if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */
1842 return fleft->op2;
1843 if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */
1844 return fleft->op1;
1845 return NEXTFOLD;
1846}
1847
1848LJFOLD(BSHL BSHL KINT)
1849LJFOLD(BSHR BSHR KINT)
1850LJFOLD(BSAR BSAR KINT)
1851LJFOLD(BROL BROL KINT)
1852LJFOLD(BROR BROR KINT)
1853LJFOLDF(reassoc_shift)
1854{
1855 IRIns *irk = IR(fleft->op2);
1856 PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
1857 if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
1858 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1859 int32_t k = (irk->i & mask) + (fright->i & mask);
1860 if (k > mask) { /* Combined shift too wide? */
1861 if (fins->o == IR_BSHL || fins->o == IR_BSHR)
1862 return mask == 31 ? INTFOLD(0) : INT64FOLD(0);
1863 else if (fins->o == IR_BSAR)
1864 k = mask;
1865 else
1866 k &= mask;
1867 }
1868 fins->op1 = fleft->op1;
1869 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1870 return RETRYFOLD;
1871 }
1872 return NEXTFOLD;
1873}
1874
1875LJFOLD(MIN MIN KINT)
1876LJFOLD(MAX MAX KINT)
1877LJFOLDF(reassoc_minmax_k)
1878{
1879 IRIns *irk = IR(fleft->op2);
1880 if (irk->o == IR_KINT) {
1881 int32_t a = irk->i;
1882 int32_t y = kfold_intop(a, fright->i, fins->o);
1883 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1884 return LEFTFOLD;
1885 PHIBARRIER(fleft);
1886 fins->op1 = fleft->op1;
1887 fins->op2 = (IRRef1)lj_ir_kint(J, y);
1888 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1889 }
1890 return NEXTFOLD;
1891}
1892
1893/* -- Array bounds check elimination -------------------------------------- */
1894
1895/* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
1896** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
1897** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
1898*/
1899LJFOLD(ABC any ADD)
1900LJFOLDF(abc_fwd)
1901{
1902 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1903 if (irref_isk(fright->op2)) {
1904 IRIns *add2 = IR(fright->op1);
1905 if (add2->o == IR_ADD && irref_isk(add2->op2) &&
1906 IR(fright->op2)->i == -IR(add2->op2)->i) {
1907 IRRef ref = J->chain[IR_ABC];
1908 IRRef lim = add2->op1;
1909 if (fins->op1 > lim) lim = fins->op1;
1910 while (ref > lim) {
1911 IRIns *ir = IR(ref);
1912 if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
1913 return DROPFOLD;
1914 ref = ir->prev;
1915 }
1916 }
1917 }
1918 }
1919 return NEXTFOLD;
1920}
1921
1922/* Eliminate ABC for constants.
1923** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2))
1924** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2.
1925*/
1926LJFOLD(ABC any KINT)
1927LJFOLDF(abc_k)
1928{
1929 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1930 IRRef ref = J->chain[IR_ABC];
1931 IRRef asize = fins->op1;
1932 while (ref > asize) {
1933 IRIns *ir = IR(ref);
1934 if (ir->op1 == asize && irref_isk(ir->op2)) {
1935 int32_t k = IR(ir->op2)->i;
1936 if (fright->i > k)
1937 ir->op2 = fins->op2;
1938 return DROPFOLD;
1939 }
1940 ref = ir->prev;
1941 }
1942 return EMITFOLD; /* Already performed CSE. */
1943 }
1944 return NEXTFOLD;
1945}
1946
1947/* Eliminate invariant ABC inside loop. */
1948LJFOLD(ABC any any)
1949LJFOLDF(abc_invar)
1950{
1951 /* Invariant ABC marked as PTR. Drop if op1 is invariant, too. */
1952 if (!irt_isint(fins->t) && fins->op1 < J->chain[IR_LOOP] &&
1953 !irt_isphi(IR(fins->op1)->t))
1954 return DROPFOLD;
1955 return NEXTFOLD;
1956}
1957
1958/* -- Commutativity ------------------------------------------------------- */
1959
1960/* The refs of commutative ops are canonicalized. Lower refs go to the right.
1961** Rationale behind this:
1962** - It (also) moves constants to the right.
1963** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
1964** - It helps CSE to find more matches.
1965** - The assembler generates better code with constants at the right.
1966*/
1967
1968LJFOLD(ADD any any)
1969LJFOLD(MUL any any)
1970LJFOLD(ADDOV any any)
1971LJFOLD(MULOV any any)
1972LJFOLDF(comm_swap)
1973{
1974 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1975 IRRef1 tmp = fins->op1;
1976 fins->op1 = fins->op2;
1977 fins->op2 = tmp;
1978 return RETRYFOLD;
1979 }
1980 return NEXTFOLD;
1981}
1982
1983LJFOLD(EQ any any)
1984LJFOLD(NE any any)
1985LJFOLDF(comm_equal)
1986{
1987 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
1988 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1989 return CONDFOLD(fins->o == IR_EQ);
1990 return fold_comm_swap(J);
1991}
1992
1993LJFOLD(LT any any)
1994LJFOLD(GE any any)
1995LJFOLD(LE any any)
1996LJFOLD(GT any any)
1997LJFOLD(ULT any any)
1998LJFOLD(UGE any any)
1999LJFOLD(ULE any any)
2000LJFOLD(UGT any any)
2001LJFOLDF(comm_comp)
2002{
2003 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
2004 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
2005 return CONDFOLD((fins->o ^ (fins->o >> 1)) & 1);
2006 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
2007 IRRef1 tmp = fins->op1;
2008 fins->op1 = fins->op2;
2009 fins->op2 = tmp;
2010 fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
2011 return RETRYFOLD;
2012 }
2013 return NEXTFOLD;
2014}
2015
2016LJFOLD(BAND any any)
2017LJFOLD(BOR any any)
2018LJFOLDF(comm_dup)
2019{
2020 if (fins->op1 == fins->op2) /* x o x ==> x */
2021 return LEFTFOLD;
2022 return fold_comm_swap(J);
2023}
2024
2025LJFOLD(MIN any any)
2026LJFOLD(MAX any any)
2027LJFOLDF(comm_dup_minmax)
2028{
2029 if (fins->op1 == fins->op2) /* x o x ==> x */
2030 return LEFTFOLD;
2031 return NEXTFOLD;
2032}
2033
2034LJFOLD(BXOR any any)
2035LJFOLDF(comm_bxor)
2036{
2037 if (fins->op1 == fins->op2) /* i xor i ==> 0 */
2038 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
2039 return fold_comm_swap(J);
2040}
2041
2042/* -- Simplification of compound expressions ------------------------------ */
2043
2044static TRef kfold_xload(jit_State *J, IRIns *ir, const void *p)
2045{
2046 int32_t k;
2047 switch (irt_type(ir->t)) {
2048 case IRT_NUM: return lj_ir_knum_u64(J, *(uint64_t *)p);
2049 case IRT_I8: k = (int32_t)*(int8_t *)p; break;
2050 case IRT_U8: k = (int32_t)*(uint8_t *)p; break;
2051 case IRT_I16: k = (int32_t)(int16_t)lj_getu16(p); break;
2052 case IRT_U16: k = (int32_t)(uint16_t)lj_getu16(p); break;
2053 case IRT_INT: case IRT_U32: k = (int32_t)lj_getu32(p); break;
2054 case IRT_I64: case IRT_U64: return lj_ir_kint64(J, *(uint64_t *)p);
2055 default: return 0;
2056 }
2057 return lj_ir_kint(J, k);
2058}
2059
2060/* Turn: string.sub(str, a, b) == kstr
2061** into: string.byte(str, a) == string.byte(kstr, 1) etc.
2062** Note: this creates unaligned XLOADs on x86/x64.
2063*/
2064LJFOLD(EQ SNEW KGC)
2065LJFOLD(NE SNEW KGC)
2066LJFOLDF(merge_eqne_snew_kgc)
2067{
2068 GCstr *kstr = ir_kstr(fright);
2069 int32_t len = (int32_t)kstr->len;
2070 lj_assertJ(irt_isstr(fins->t), "bad equality IR type");
2071
2072#if LJ_TARGET_UNALIGNED
2073#define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
2074#define FOLD_SNEW_TYPE8 IRT_I8 /* Creates shorter immediates. */
2075#else
2076#define FOLD_SNEW_MAX_LEN 1 /* Handle string lengths 0 or 1. */
2077#define FOLD_SNEW_TYPE8 IRT_U8 /* Prefer unsigned loads. */
2078#endif
2079
2080 PHIBARRIER(fleft);
2081 if (len <= FOLD_SNEW_MAX_LEN) {
2082 IROp op = (IROp)fins->o;
2083 IRRef strref = fleft->op1;
2084 if (IR(strref)->o != IR_STRREF)
2085 return NEXTFOLD;
2086 if (op == IR_EQ) {
2087 emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len));
2088 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
2089 } else {
2090 /* NE is not expanded since this would need an OR of two conds. */
2091 if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */
2092 return NEXTFOLD;
2093 if (IR(fleft->op2)->i != len)
2094 return DROPFOLD;
2095 }
2096 if (len > 0) {
2097 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
2098 uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, FOLD_SNEW_TYPE8) :
2099 len == 2 ? IRT(IR_XLOAD, IRT_U16) :
2100 IRTI(IR_XLOAD));
2101 TRef tmp = emitir(ot, strref,
2102 IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
2103 TRef val = kfold_xload(J, IR(tref_ref(tmp)), strdata(kstr));
2104 if (len == 3)
2105 tmp = emitir(IRTI(IR_BAND), tmp,
2106 lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
2107 fins->op1 = (IRRef1)tmp;
2108 fins->op2 = (IRRef1)val;
2109 fins->ot = (IROpT)IRTGI(op);
2110 return RETRYFOLD;
2111 } else {
2112 return DROPFOLD;
2113 }
2114 }
2115 return NEXTFOLD;
2116}
2117
2118/* -- Loads --------------------------------------------------------------- */
2119
2120/* Loads cannot be folded or passed on to CSE in general.
2121** Alias analysis is needed to check for forwarding opportunities.
2122**
2123** Caveat: *all* loads must be listed here or they end up at CSE!
2124*/
2125
2126LJFOLD(ALOAD any)
2127LJFOLDX(lj_opt_fwd_aload)
2128
2129/* From HREF fwd (see below). Must eliminate, not supported by fwd/backend. */
2130LJFOLD(HLOAD KKPTR)
2131LJFOLDF(kfold_hload_kkptr)
2132{
2133 UNUSED(J);
2134 lj_assertJ(ir_kptr(fleft) == niltvg(J2G(J)), "expected niltv");
2135 return TREF_NIL;
2136}
2137
2138LJFOLD(HLOAD any)
2139LJFOLDX(lj_opt_fwd_hload)
2140
2141LJFOLD(ULOAD any)
2142LJFOLDX(lj_opt_fwd_uload)
2143
2144LJFOLD(ALEN any any)
2145LJFOLDX(lj_opt_fwd_alen)
2146
2147/* Upvalue refs are really loads, but there are no corresponding stores.
2148** So CSE is ok for them, except for UREFO across a GC step (see below).
2149** If the referenced function is const, its upvalue addresses are const, too.
2150** This can be used to improve CSE by looking for the same address,
2151** even if the upvalues originate from a different function.
2152*/
2153LJFOLD(UREFO KGC any)
2154LJFOLD(UREFC KGC any)
2155LJFOLDF(cse_uref)
2156{
2157 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2158 IRRef ref = J->chain[fins->o];
2159 GCfunc *fn = ir_kfunc(fleft);
2160 GCupval *uv = gco2uv(gcref(fn->l.uvptr[(fins->op2 >> 8)]));
2161 while (ref > 0) {
2162 IRIns *ir = IR(ref);
2163 if (irref_isk(ir->op1)) {
2164 GCfunc *fn2 = ir_kfunc(IR(ir->op1));
2165 if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
2166 if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
2167 break;
2168 return ref;
2169 }
2170 }
2171 ref = ir->prev;
2172 }
2173 }
2174 return EMITFOLD;
2175}
2176
2177LJFOLD(HREFK any any)
2178LJFOLDX(lj_opt_fwd_hrefk)
2179
2180LJFOLD(HREF TNEW any)
2181LJFOLDF(fwd_href_tnew)
2182{
2183 if (lj_opt_fwd_href_nokey(J))
2184 return lj_ir_kkptr(J, niltvg(J2G(J)));
2185 return NEXTFOLD;
2186}
2187
2188LJFOLD(HREF TDUP KPRI)
2189LJFOLD(HREF TDUP KGC)
2190LJFOLD(HREF TDUP KNUM)
2191LJFOLDF(fwd_href_tdup)
2192{
2193 TValue keyv;
2194 lj_ir_kvalue(J->L, &keyv, fright);
2195 if (lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv) == niltvg(J2G(J)) &&
2196 lj_opt_fwd_href_nokey(J))
2197 return lj_ir_kkptr(J, niltvg(J2G(J)));
2198 return NEXTFOLD;
2199}
2200
2201/* We can safely FOLD/CSE array/hash refs and field loads, since there
2202** are no corresponding stores. But we need to check for any NEWREF with
2203** an aliased table, as it may invalidate all of the pointers and fields.
2204** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
2205** FLOADs. And NEWREF itself is treated like a store (see below).
2206** LREF is constant (per trace) since coroutine switches are not inlined.
2207*/
2208LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
2209LJFOLDF(fload_tab_tnew_asize)
2210{
2211 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2212 return INTFOLD(fleft->op1);
2213 return NEXTFOLD;
2214}
2215
2216LJFOLD(FLOAD TNEW IRFL_TAB_HMASK)
2217LJFOLDF(fload_tab_tnew_hmask)
2218{
2219 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2220 return INTFOLD((1 << fleft->op2)-1);
2221 return NEXTFOLD;
2222}
2223
2224LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE)
2225LJFOLDF(fload_tab_tdup_asize)
2226{
2227 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2228 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize);
2229 return NEXTFOLD;
2230}
2231
2232LJFOLD(FLOAD TDUP IRFL_TAB_HMASK)
2233LJFOLDF(fload_tab_tdup_hmask)
2234{
2235 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2236 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask);
2237 return NEXTFOLD;
2238}
2239
2240LJFOLD(HREF any any)
2241LJFOLD(FLOAD any IRFL_TAB_ARRAY)
2242LJFOLD(FLOAD any IRFL_TAB_NODE)
2243LJFOLD(FLOAD any IRFL_TAB_ASIZE)
2244LJFOLD(FLOAD any IRFL_TAB_HMASK)
2245LJFOLDF(fload_tab_ah)
2246{
2247 TRef tr = lj_opt_cse(J);
2248 return lj_opt_fwd_tptr(J, tref_ref(tr)) ? tr : EMITFOLD;
2249}
2250
2251/* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
2252LJFOLD(FLOAD KGC IRFL_STR_LEN)
2253LJFOLDF(fload_str_len_kgc)
2254{
2255 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2256 return INTFOLD((int32_t)ir_kstr(fleft)->len);
2257 return NEXTFOLD;
2258}
2259
2260LJFOLD(FLOAD SNEW IRFL_STR_LEN)
2261LJFOLDF(fload_str_len_snew)
2262{
2263 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2264 PHIBARRIER(fleft);
2265 return fleft->op2;
2266 }
2267 return NEXTFOLD;
2268}
2269
2270LJFOLD(FLOAD TOSTR IRFL_STR_LEN)
2271LJFOLDF(fload_str_len_tostr)
2272{
2273 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR)
2274 return INTFOLD(1);
2275 return NEXTFOLD;
2276}
2277
2278/* The C type ID of cdata objects is immutable. */
2279LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
2280LJFOLDF(fload_cdata_typeid_kgc)
2281{
2282 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2283 return INTFOLD((int32_t)ir_kcdata(fleft)->ctypeid);
2284 return NEXTFOLD;
2285}
2286
2287/* Get the contents of immutable cdata objects. */
2288LJFOLD(FLOAD KGC IRFL_CDATA_PTR)
2289LJFOLD(FLOAD KGC IRFL_CDATA_INT)
2290LJFOLD(FLOAD KGC IRFL_CDATA_INT64)
2291LJFOLDF(fload_cdata_int64_kgc)
2292{
2293 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2294 void *p = cdataptr(ir_kcdata(fleft));
2295 if (irt_is64(fins->t))
2296 return INT64FOLD(*(uint64_t *)p);
2297 else
2298 return INTFOLD(*(int32_t *)p);
2299 }
2300 return NEXTFOLD;
2301}
2302
2303LJFOLD(FLOAD CNEW IRFL_CDATA_CTYPEID)
2304LJFOLD(FLOAD CNEWI IRFL_CDATA_CTYPEID)
2305LJFOLDF(fload_cdata_typeid_cnew)
2306{
2307 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2308 return fleft->op1; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */
2309 return NEXTFOLD;
2310}
2311
2312/* Pointer, int and int64 cdata objects are immutable. */
2313LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR)
2314LJFOLD(FLOAD CNEWI IRFL_CDATA_INT)
2315LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64)
2316LJFOLDF(fload_cdata_ptr_int64_cnew)
2317{
2318 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2319 return fleft->op2; /* Fold even across PHI to avoid allocations. */
2320 return NEXTFOLD;
2321}
2322
2323LJFOLD(FLOAD any IRFL_STR_LEN)
2324LJFOLD(FLOAD any IRFL_FUNC_ENV)
2325LJFOLD(FLOAD any IRFL_THREAD_ENV)
2326LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
2327LJFOLD(FLOAD any IRFL_CDATA_PTR)
2328LJFOLD(FLOAD any IRFL_CDATA_INT)
2329LJFOLD(FLOAD any IRFL_CDATA_INT64)
2330LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */
2331LJFOLDX(lj_opt_cse)
2332
2333/* All other field loads need alias analysis. */
2334LJFOLD(FLOAD any any)
2335LJFOLDX(lj_opt_fwd_fload)
2336
2337/* This is for LOOP only. Recording handles SLOADs internally. */
2338LJFOLD(SLOAD any any)
2339LJFOLDF(fwd_sload)
2340{
2341 if ((fins->op2 & IRSLOAD_FRAME)) {
2342 TRef tr = lj_opt_cse(J);
2343 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
2344 } else {
2345 lj_assertJ(J->slot[fins->op1] != 0, "uninitialized slot accessed");
2346 return J->slot[fins->op1];
2347 }
2348}
2349
2350/* Only fold for KKPTR. The pointer _and_ the contents must be const. */
2351LJFOLD(XLOAD KKPTR any)
2352LJFOLDF(xload_kptr)
2353{
2354 TRef tr = kfold_xload(J, fins, ir_kptr(fleft));
2355 return tr ? tr : NEXTFOLD;
2356}
2357
2358LJFOLD(XLOAD any any)
2359LJFOLDX(lj_opt_fwd_xload)
2360
2361/* -- Write barriers ------------------------------------------------------ */
2362
2363/* Write barriers are amenable to CSE, but not across any incremental
2364** GC steps.
2365**
2366** The same logic applies to open upvalue references, because a stack
2367** may be resized during a GC step (not the current stack, but maybe that
2368** of a coroutine).
2369*/
2370LJFOLD(TBAR any)
2371LJFOLD(OBAR any any)
2372LJFOLD(UREFO any any)
2373LJFOLDF(barrier_tab)
2374{
2375 TRef tr = lj_opt_cse(J);
2376 if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */
2377 return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */
2378 return tr;
2379}
2380
2381LJFOLD(TBAR TNEW)
2382LJFOLD(TBAR TDUP)
2383LJFOLDF(barrier_tnew_tdup)
2384{
2385 /* New tables are always white and never need a barrier. */
2386 if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */
2387 return NEXTFOLD;
2388 return DROPFOLD;
2389}
2390
2391/* -- Profiling ----------------------------------------------------------- */
2392
2393LJFOLD(PROF any any)
2394LJFOLDF(prof)
2395{
2396 IRRef ref = J->chain[IR_PROF];
2397 if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */
2398 return ref;
2399 return EMITFOLD;
2400}
2401
2402/* -- Stores and allocations ---------------------------------------------- */
2403
2404/* Stores and allocations cannot be folded or passed on to CSE in general.
2405** But some stores can be eliminated with dead-store elimination (DSE).
2406**
2407** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
2408*/
2409
2410LJFOLD(ASTORE any any)
2411LJFOLD(HSTORE any any)
2412LJFOLDX(lj_opt_dse_ahstore)
2413
2414LJFOLD(USTORE any any)
2415LJFOLDX(lj_opt_dse_ustore)
2416
2417LJFOLD(FSTORE any any)
2418LJFOLDX(lj_opt_dse_fstore)
2419
2420LJFOLD(XSTORE any any)
2421LJFOLDX(lj_opt_dse_xstore)
2422
2423LJFOLD(NEWREF any any) /* Treated like a store. */
2424LJFOLD(CALLA any any)
2425LJFOLD(CALLL any any) /* Safeguard fallback. */
2426LJFOLD(CALLS any any)
2427LJFOLD(CALLXS any any)
2428LJFOLD(XBAR)
2429LJFOLD(RETF any any) /* Modifies BASE. */
2430LJFOLD(TNEW any any)
2431LJFOLD(TDUP any)
2432LJFOLD(CNEW any any)
2433LJFOLD(XSNEW any any)
2434LJFOLD(BUFHDR any any)
2435LJFOLDX(lj_ir_emit)
2436
2437/* ------------------------------------------------------------------------ */
2438
2439/* Every entry in the generated hash table is a 32 bit pattern:
2440**
2441** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
2442**
2443** xxxxxxxx = 8 bit index into fold function table
2444** iiiiiii = 7 bit folded instruction opcode
2445** lllllll = 7 bit left instruction opcode
2446** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
2447*/
2448
2449#include "lj_folddef.h"
2450
2451/* ------------------------------------------------------------------------ */
2452
2453/* Fold IR instruction. */
2454TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
2455{
2456 uint32_t key, any;
2457 IRRef ref;
2458
2459 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
2460 lj_assertJ(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
2461 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT,
2462 "bad JIT_F_OPT_DEFAULT");
2463 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2464 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
2465 return lj_opt_cse(J);
2466
2467 /* No FOLD, forwarding or CSE? Emit raw IR for loads, except for SLOAD. */
2468 if ((J->flags & (JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE)) !=
2469 (JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE) &&
2470 irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD)
2471 return lj_ir_emit(J);
2472
2473 /* No FOLD or DSE? Emit raw IR for stores. */
2474 if ((J->flags & (JIT_F_OPT_FOLD|JIT_F_OPT_DSE)) !=
2475 (JIT_F_OPT_FOLD|JIT_F_OPT_DSE) &&
2476 irm_kind(lj_ir_mode[fins->o]) == IRM_S)
2477 return lj_ir_emit(J);
2478 }
2479
2480 /* Fold engine start/retry point. */
2481retry:
2482 /* Construct key from opcode and operand opcodes (unless literal/none). */
2483 key = ((uint32_t)fins->o << 17);
2484 if (fins->op1 >= J->cur.nk) {
2485 key += (uint32_t)IR(fins->op1)->o << 10;
2486 *fleft = *IR(fins->op1);
2487 if (fins->op1 < REF_TRUE)
2488 fleft[1] = IR(fins->op1)[1];
2489 }
2490 if (fins->op2 >= J->cur.nk) {
2491 key += (uint32_t)IR(fins->op2)->o;
2492 *fright = *IR(fins->op2);
2493 if (fins->op2 < REF_TRUE)
2494 fright[1] = IR(fins->op2)[1];
2495 } else {
2496 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2497 }
2498
2499 /* Check for a match in order from most specific to least specific. */
2500 any = 0;
2501 for (;;) {
2502 uint32_t k = key | (any & 0x1ffff);
2503 uint32_t h = fold_hashkey(k);
2504 uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
2505 if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
2506 ref = (IRRef)tref_ref(fold_func[fh >> 24](J));
2507 if (ref != NEXTFOLD)
2508 break;
2509 }
2510 if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */
2511 return lj_opt_cse(J);
2512 any = (any | (any >> 10)) ^ 0xffc00;
2513 }
2514
2515 /* Return value processing, ordered by frequency. */
2516 if (LJ_LIKELY(ref >= MAX_FOLD))
2517 return TREF(ref, irt_t(IR(ref)->t));
2518 if (ref == RETRYFOLD)
2519 goto retry;
2520 if (ref == KINTFOLD)
2521 return lj_ir_kint(J, fins->i);
2522 if (ref == FAILFOLD)
2523 lj_trace_err(J, LJ_TRERR_GFAIL);
2524 lj_assertJ(ref == DROPFOLD, "bad fold result");
2525 return REF_DROP;
2526}
2527
2528/* -- Common-Subexpression Elimination ------------------------------------ */
2529
2530/* CSE an IR instruction. This is very fast due to the skip-list chains. */
2531TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
2532{
2533 /* Avoid narrow to wide store-to-load forwarding stall */
2534 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2535 IROp op = fins->o;
2536 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2537 /* Limited search for same operands in per-opcode chain. */
2538 IRRef ref = J->chain[op];
2539 IRRef lim = fins->op1;
2540 if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */
2541 while (ref > lim) {
2542 if (IR(ref)->op12 == op12)
2543 return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */
2544 ref = IR(ref)->prev;
2545 }
2546 }
2547 /* Otherwise emit IR (inlined for speed). */
2548 {
2549 IRRef ref = lj_ir_nextins(J);
2550 IRIns *ir = IR(ref);
2551 ir->prev = J->chain[op];
2552 ir->op12 = op12;
2553 J->chain[op] = (IRRef1)ref;
2554 ir->o = fins->o;
2555 J->guardemit.irt |= fins->t.irt;
2556 return TREF(ref, irt_t((ir->t = fins->t)));
2557 }
2558}
2559
2560/* CSE with explicit search limit. */
2561TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
2562{
2563 IRRef ref = J->chain[fins->o];
2564 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2565 while (ref > lim) {
2566 if (IR(ref)->op12 == op12)
2567 return ref;
2568 ref = IR(ref)->prev;
2569 }
2570 return lj_ir_emit(J);
2571}
2572
2573/* ------------------------------------------------------------------------ */
2574
2575#undef IR
2576#undef fins
2577#undef fleft
2578#undef fright
2579#undef knumleft
2580#undef knumright
2581#undef emitir
2582
2583#endif
2584