1 | /* |
2 | ** SINK: Allocation Sinking and Store Sinking. |
3 | ** Copyright (C) 2005-2014 Mike Pall. See Copyright Notice in luajit.h |
4 | */ |
5 | |
6 | #define lj_opt_sink_c |
7 | #define LUA_CORE |
8 | |
9 | #include "lj_obj.h" |
10 | |
11 | #if LJ_HASJIT |
12 | |
13 | #include "lj_ir.h" |
14 | #include "lj_jit.h" |
15 | #include "lj_iropt.h" |
16 | #include "lj_target.h" |
17 | |
18 | /* Some local macros to save typing. Undef'd at the end. */ |
19 | #define IR(ref) (&J->cur.ir[(ref)]) |
20 | |
21 | /* Check whether the store ref points to an eligible allocation. */ |
22 | static IRIns *sink_checkalloc(jit_State *J, IRIns *irs) |
23 | { |
24 | IRIns *ir = IR(irs->op1); |
25 | if (!irref_isk(ir->op2)) |
26 | return NULL; /* Non-constant key. */ |
27 | if (ir->o == IR_HREFK || ir->o == IR_AREF) |
28 | ir = IR(ir->op1); |
29 | else if (!(ir->o == IR_HREF || ir->o == IR_NEWREF || |
30 | ir->o == IR_FREF || ir->o == IR_ADD)) |
31 | return NULL; /* Unhandled reference type (for XSTORE). */ |
32 | ir = IR(ir->op1); |
33 | if (!(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW)) |
34 | return NULL; /* Not an allocation. */ |
35 | return ir; /* Return allocation. */ |
36 | } |
37 | |
38 | /* Recursively check whether a value depends on a PHI. */ |
39 | static int sink_phidep(jit_State *J, IRRef ref) |
40 | { |
41 | IRIns *ir = IR(ref); |
42 | if (irt_isphi(ir->t)) return 1; |
43 | if (ir->op1 >= REF_FIRST && sink_phidep(J, ir->op1)) return 1; |
44 | if (ir->op2 >= REF_FIRST && sink_phidep(J, ir->op2)) return 1; |
45 | return 0; |
46 | } |
47 | |
48 | /* Check whether a value is a sinkable PHI or loop-invariant. */ |
49 | static int sink_checkphi(jit_State *J, IRIns *ira, IRRef ref) |
50 | { |
51 | if (ref >= REF_FIRST) { |
52 | IRIns *ir = IR(ref); |
53 | if (irt_isphi(ir->t) || (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT && |
54 | irt_isphi(IR(ir->op1)->t))) { |
55 | ira->prev++; |
56 | return 1; /* Sinkable PHI. */ |
57 | } |
58 | /* Otherwise the value must be loop-invariant. */ |
59 | return ref < J->loopref && !sink_phidep(J, ref); |
60 | } |
61 | return 1; /* Constant (non-PHI). */ |
62 | } |
63 | |
64 | /* Mark non-sinkable allocations using single-pass backward propagation. |
65 | ** |
66 | ** Roots for the marking process are: |
67 | ** - Some PHIs or snapshots (see below). |
68 | ** - Non-PHI, non-constant values stored to PHI allocations. |
69 | ** - All guards. |
70 | ** - Any remaining loads not eliminated by store-to-load forwarding. |
71 | ** - Stores with non-constant keys. |
72 | ** - All stored values. |
73 | */ |
74 | static void sink_mark_ins(jit_State *J) |
75 | { |
76 | IRIns *ir, *irlast = IR(J->cur.nins-1); |
77 | for (ir = irlast ; ; ir--) { |
78 | switch (ir->o) { |
79 | case IR_BASE: |
80 | return; /* Finished. */ |
81 | case IR_CALLL: /* IRCALL_lj_tab_len */ |
82 | case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: |
83 | irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */ |
84 | break; |
85 | case IR_FLOAD: |
86 | if (irt_ismarked(ir->t) || ir->op2 == IRFL_TAB_META) |
87 | irt_setmark(IR(ir->op1)->t); /* Mark table for remaining loads. */ |
88 | break; |
89 | case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: { |
90 | IRIns *ira = sink_checkalloc(J, ir); |
91 | if (!ira || (irt_isphi(ira->t) && !sink_checkphi(J, ira, ir->op2))) |
92 | irt_setmark(IR(ir->op1)->t); /* Mark ineligible ref. */ |
93 | irt_setmark(IR(ir->op2)->t); /* Mark stored value. */ |
94 | break; |
95 | } |
96 | #if LJ_HASFFI |
97 | case IR_CNEWI: |
98 | if (irt_isphi(ir->t) && |
99 | (!sink_checkphi(J, ir, ir->op2) || |
100 | (LJ_32 && ir+1 < irlast && (ir+1)->o == IR_HIOP && |
101 | !sink_checkphi(J, ir, (ir+1)->op2)))) |
102 | irt_setmark(ir->t); /* Mark ineligible allocation. */ |
103 | /* fallthrough */ |
104 | #endif |
105 | case IR_USTORE: |
106 | irt_setmark(IR(ir->op2)->t); /* Mark stored value. */ |
107 | break; |
108 | #if LJ_HASFFI |
109 | case IR_CALLXS: |
110 | #endif |
111 | case IR_CALLS: |
112 | irt_setmark(IR(ir->op1)->t); /* Mark (potentially) stored values. */ |
113 | break; |
114 | case IR_PHI: { |
115 | IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); |
116 | irl->prev = irr->prev = 0; /* Clear PHI value counts. */ |
117 | if (irl->o == irr->o && |
118 | (irl->o == IR_TNEW || irl->o == IR_TDUP || |
119 | (LJ_HASFFI && (irl->o == IR_CNEW || irl->o == IR_CNEWI)))) |
120 | break; |
121 | irt_setmark(irl->t); |
122 | irt_setmark(irr->t); |
123 | break; |
124 | } |
125 | default: |
126 | if (irt_ismarked(ir->t) || irt_isguard(ir->t)) { /* Propagate mark. */ |
127 | if (ir->op1 >= REF_FIRST) irt_setmark(IR(ir->op1)->t); |
128 | if (ir->op2 >= REF_FIRST) irt_setmark(IR(ir->op2)->t); |
129 | } |
130 | break; |
131 | } |
132 | } |
133 | } |
134 | |
135 | /* Mark all instructions referenced by a snapshot. */ |
136 | static void sink_mark_snap(jit_State *J, SnapShot *snap) |
137 | { |
138 | SnapEntry *map = &J->cur.snapmap[snap->mapofs]; |
139 | MSize n, nent = snap->nent; |
140 | for (n = 0; n < nent; n++) { |
141 | IRRef ref = snap_ref(map[n]); |
142 | if (!irref_isk(ref)) |
143 | irt_setmark(IR(ref)->t); |
144 | } |
145 | } |
146 | |
147 | /* Iteratively remark PHI refs with differing marks or PHI value counts. */ |
148 | static void (jit_State *J) |
149 | { |
150 | IRIns *ir; |
151 | int ; |
152 | do { |
153 | remark = 0; |
154 | for (ir = IR(J->cur.nins-1); ir->o == IR_PHI; ir--) { |
155 | IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); |
156 | if (((irl->t.irt ^ irr->t.irt) & IRT_MARK)) |
157 | remark = 1; |
158 | else if (irl->prev == irr->prev) |
159 | continue; |
160 | irt_setmark(IR(ir->op1)->t); |
161 | irt_setmark(IR(ir->op2)->t); |
162 | } |
163 | } while (remark); |
164 | } |
165 | |
166 | /* Sweep instructions and tag sunken allocations and stores. */ |
167 | static void sink_sweep_ins(jit_State *J) |
168 | { |
169 | IRIns *ir, *irfirst = IR(J->cur.nk); |
170 | for (ir = IR(J->cur.nins-1) ; ir >= irfirst; ir--) { |
171 | switch (ir->o) { |
172 | case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: { |
173 | IRIns *ira = sink_checkalloc(J, ir); |
174 | if (ira && !irt_ismarked(ira->t)) { |
175 | int delta = (int)(ir - ira); |
176 | ir->prev = REGSP(RID_SINK, delta > 255 ? 255 : delta); |
177 | } else { |
178 | ir->prev = REGSP_INIT; |
179 | } |
180 | break; |
181 | } |
182 | case IR_NEWREF: |
183 | if (!irt_ismarked(IR(ir->op1)->t)) { |
184 | ir->prev = REGSP(RID_SINK, 0); |
185 | } else { |
186 | irt_clearmark(ir->t); |
187 | ir->prev = REGSP_INIT; |
188 | } |
189 | break; |
190 | #if LJ_HASFFI |
191 | case IR_CNEW: case IR_CNEWI: |
192 | #endif |
193 | case IR_TNEW: case IR_TDUP: |
194 | if (!irt_ismarked(ir->t)) { |
195 | ir->t.irt &= ~IRT_GUARD; |
196 | ir->prev = REGSP(RID_SINK, 0); |
197 | J->cur.sinktags = 1; /* Signal present SINK tags to assembler. */ |
198 | } else { |
199 | irt_clearmark(ir->t); |
200 | ir->prev = REGSP_INIT; |
201 | } |
202 | break; |
203 | case IR_PHI: { |
204 | IRIns *ira = IR(ir->op2); |
205 | if (!irt_ismarked(ira->t) && |
206 | (ira->o == IR_TNEW || ira->o == IR_TDUP || |
207 | (LJ_HASFFI && (ira->o == IR_CNEW || ira->o == IR_CNEWI)))) { |
208 | ir->prev = REGSP(RID_SINK, 0); |
209 | } else { |
210 | ir->prev = REGSP_INIT; |
211 | } |
212 | break; |
213 | } |
214 | default: |
215 | irt_clearmark(ir->t); |
216 | ir->prev = REGSP_INIT; |
217 | break; |
218 | } |
219 | } |
220 | } |
221 | |
222 | /* Allocation sinking and store sinking. |
223 | ** |
224 | ** 1. Mark all non-sinkable allocations. |
225 | ** 2. Then sink all remaining allocations and the related stores. |
226 | */ |
227 | void lj_opt_sink(jit_State *J) |
228 | { |
229 | const uint32_t need = (JIT_F_OPT_SINK|JIT_F_OPT_FWD| |
230 | JIT_F_OPT_DCE|JIT_F_OPT_CSE|JIT_F_OPT_FOLD); |
231 | if ((J->flags & need) == need && |
232 | (J->chain[IR_TNEW] || J->chain[IR_TDUP] || |
233 | (LJ_HASFFI && (J->chain[IR_CNEW] || J->chain[IR_CNEWI])))) { |
234 | if (!J->loopref) |
235 | sink_mark_snap(J, &J->cur.snap[J->cur.nsnap-1]); |
236 | sink_mark_ins(J); |
237 | if (J->loopref) |
238 | sink_remark_phi(J); |
239 | sink_sweep_ins(J); |
240 | } |
241 | } |
242 | |
243 | #undef IR |
244 | |
245 | #endif |
246 | |