1 | /* |
2 | * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. |
3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
4 | * |
5 | * This code is free software; you can redistribute it and/or modify it |
6 | * under the terms of the GNU General Public License version 2 only, as |
7 | * published by the Free Software Foundation. |
8 | * |
9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
12 | * version 2 for more details (a copy is included in the LICENSE file that |
13 | * accompanied this code). |
14 | * |
15 | * You should have received a copy of the GNU General Public License version |
16 | * 2 along with this work; if not, write to the Free Software Foundation, |
17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * |
19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
20 | * or visit www.oracle.com if you need additional information or have any |
21 | * questions. |
22 | * |
23 | */ |
24 | |
25 | #ifndef SHARE_OPTO_REGMASK_HPP |
26 | #define SHARE_OPTO_REGMASK_HPP |
27 | |
28 | #include "code/vmreg.hpp" |
29 | #include "opto/optoreg.hpp" |
30 | #include "utilities/count_leading_zeros.hpp" |
31 | #include "utilities/count_trailing_zeros.hpp" |
32 | |
33 | //-------------Non-zero bit search methods used by RegMask--------------------- |
34 | // Find lowest 1, undefined if empty/0 |
35 | static int find_lowest_bit(uint32_t mask) { |
36 | return count_trailing_zeros(mask); |
37 | } |
38 | // Find highest 1, undefined if empty/0 |
39 | static int find_highest_bit(uint32_t mask) { |
40 | return count_leading_zeros(mask) ^ 31; |
41 | } |
42 | |
43 | //------------------------------RegMask---------------------------------------- |
44 | // The ADL file describes how to print the machine-specific registers, as well |
45 | // as any notion of register classes. We provide a register mask, which is |
46 | // just a collection of Register numbers. |
47 | |
48 | // The ADLC defines 2 macros, RM_SIZE and FORALL_BODY. |
49 | // RM_SIZE is the size of a register mask in words. |
50 | // FORALL_BODY replicates a BODY macro once per word in the register mask. |
51 | // The usage is somewhat clumsy and limited to the regmask.[h,c]pp files. |
52 | // However, it means the ADLC can redefine the unroll macro and all loops |
53 | // over register masks will be unrolled by the correct amount. |
54 | |
55 | class RegMask { |
56 | union { |
57 | double _dummy_force_double_alignment[RM_SIZE>>1]; |
58 | // Array of Register Mask bits. This array is large enough to cover |
59 | // all the machine registers and all parameters that need to be passed |
60 | // on the stack (stack registers) up to some interesting limit. Methods |
61 | // that need more parameters will NOT be compiled. On Intel, the limit |
62 | // is something like 90+ parameters. |
63 | int _A[RM_SIZE]; |
64 | }; |
65 | // The low and high water marks represents the lowest and highest word |
66 | // that might contain set register mask bits, respectively. We guarantee |
67 | // that there are no bits in words outside this range, but any word at |
68 | // and between the two marks can still be 0. |
69 | int _lwm; |
70 | int _hwm; |
71 | |
72 | enum { |
73 | _WordBits = BitsPerInt, |
74 | _LogWordBits = LogBitsPerInt, |
75 | _RM_SIZE = RM_SIZE // local constant, imported, then hidden by #undef |
76 | }; |
77 | |
78 | public: |
79 | enum { CHUNK_SIZE = RM_SIZE*_WordBits }; |
80 | |
81 | // SlotsPerLong is 2, since slots are 32 bits and longs are 64 bits. |
82 | // Also, consider the maximum alignment size for a normally allocated |
83 | // value. Since we allocate register pairs but not register quads (at |
84 | // present), this alignment is SlotsPerLong (== 2). A normally |
85 | // aligned allocated register is either a single register, or a pair |
86 | // of adjacent registers, the lower-numbered being even. |
87 | // See also is_aligned_Pairs() below, and the padding added before |
88 | // Matcher::_new_SP to keep allocated pairs aligned properly. |
89 | // If we ever go to quad-word allocations, SlotsPerQuad will become |
90 | // the controlling alignment constraint. Note that this alignment |
91 | // requirement is internal to the allocator, and independent of any |
92 | // particular platform. |
93 | enum { SlotsPerLong = 2, |
94 | SlotsPerVecS = 1, |
95 | SlotsPerVecD = 2, |
96 | SlotsPerVecX = 4, |
97 | SlotsPerVecY = 8, |
98 | SlotsPerVecZ = 16 }; |
99 | |
100 | // A constructor only used by the ADLC output. All mask fields are filled |
101 | // in directly. Calls to this look something like RM(1,2,3,4); |
102 | RegMask( |
103 | # define BODY(I) int a##I, |
104 | FORALL_BODY |
105 | # undef BODY |
106 | int dummy = 0) { |
107 | # define BODY(I) _A[I] = a##I; |
108 | FORALL_BODY |
109 | # undef BODY |
110 | _lwm = 0; |
111 | _hwm = RM_SIZE - 1; |
112 | while (_hwm > 0 && _A[_hwm] == 0) _hwm--; |
113 | while ((_lwm < _hwm) && _A[_lwm] == 0) _lwm++; |
114 | assert(valid_watermarks(), "post-condition" ); |
115 | } |
116 | |
117 | // Handy copying constructor |
118 | RegMask(RegMask *rm) { |
119 | _hwm = rm->_hwm; |
120 | _lwm = rm->_lwm; |
121 | for (int i = 0; i < RM_SIZE; i++) { |
122 | _A[i] = rm->_A[i]; |
123 | } |
124 | assert(valid_watermarks(), "post-condition" ); |
125 | } |
126 | |
127 | // Construct an empty mask |
128 | RegMask() { |
129 | Clear(); |
130 | } |
131 | |
132 | // Construct a mask with a single bit |
133 | RegMask(OptoReg::Name reg) { |
134 | Clear(); |
135 | Insert(reg); |
136 | } |
137 | |
138 | // Check for register being in mask |
139 | int Member(OptoReg::Name reg) const { |
140 | assert(reg < CHUNK_SIZE, "" ); |
141 | return _A[reg>>_LogWordBits] & (1<<(reg&(_WordBits-1))); |
142 | } |
143 | |
144 | // The last bit in the register mask indicates that the mask should repeat |
145 | // indefinitely with ONE bits. Returns TRUE if mask is infinite or |
146 | // unbounded in size. Returns FALSE if mask is finite size. |
147 | int is_AllStack() const { return _A[RM_SIZE-1] >> (_WordBits-1); } |
148 | |
149 | // Work around an -xO3 optimization problme in WS6U1. The old way: |
150 | // void set_AllStack() { _A[RM_SIZE-1] |= (1<<(_WordBits-1)); } |
151 | // will cause _A[RM_SIZE-1] to be clobbered, not updated when set_AllStack() |
152 | // follows an Insert() loop, like the one found in init_spill_mask(). Using |
153 | // Insert() instead works because the index into _A in computed instead of |
154 | // constant. See bug 4665841. |
155 | void set_AllStack() { Insert(OptoReg::Name(CHUNK_SIZE-1)); } |
156 | |
157 | // Test for being a not-empty mask. |
158 | int is_NotEmpty() const { |
159 | assert(valid_watermarks(), "sanity" ); |
160 | int tmp = 0; |
161 | for (int i = _lwm; i <= _hwm; i++) { |
162 | tmp |= _A[i]; |
163 | } |
164 | return tmp; |
165 | } |
166 | |
167 | // Find lowest-numbered register from mask, or BAD if mask is empty. |
168 | OptoReg::Name find_first_elem() const { |
169 | assert(valid_watermarks(), "sanity" ); |
170 | for (int i = _lwm; i <= _hwm; i++) { |
171 | int bits = _A[i]; |
172 | if (bits) { |
173 | return OptoReg::Name((i<<_LogWordBits) + find_lowest_bit(bits)); |
174 | } |
175 | } |
176 | return OptoReg::Name(OptoReg::Bad); |
177 | } |
178 | |
179 | // Get highest-numbered register from mask, or BAD if mask is empty. |
180 | OptoReg::Name find_last_elem() const { |
181 | assert(valid_watermarks(), "sanity" ); |
182 | for (int i = _hwm; i >= _lwm; i--) { |
183 | int bits = _A[i]; |
184 | if (bits) { |
185 | return OptoReg::Name((i<<_LogWordBits) + find_highest_bit(bits)); |
186 | } |
187 | } |
188 | return OptoReg::Name(OptoReg::Bad); |
189 | } |
190 | |
191 | // Clear out partial bits; leave only aligned adjacent bit pairs. |
192 | void clear_to_pairs(); |
193 | |
194 | #ifdef ASSERT |
195 | // Verify watermarks are sane, i.e., within bounds and that no |
196 | // register words below or above the watermarks have bits set. |
197 | bool valid_watermarks() const { |
198 | assert(_hwm >= 0 && _hwm < RM_SIZE, "_hwm out of range: %d" , _hwm); |
199 | assert(_lwm >= 0 && _lwm < RM_SIZE, "_lwm out of range: %d" , _lwm); |
200 | for (int i = 0; i < _lwm; i++) { |
201 | assert(_A[i] == 0, "_lwm too high: %d regs at: %d" , _lwm, i); |
202 | } |
203 | for (int i = _hwm + 1; i < RM_SIZE; i++) { |
204 | assert(_A[i] == 0, "_hwm too low: %d regs at: %d" , _hwm, i); |
205 | } |
206 | return true; |
207 | } |
208 | #endif // !ASSERT |
209 | |
210 | // Test that the mask contains only aligned adjacent bit pairs |
211 | bool is_aligned_pairs() const; |
212 | |
213 | // mask is a pair of misaligned registers |
214 | bool is_misaligned_pair() const; |
215 | // Test for single register |
216 | bool is_bound1() const; |
217 | // Test for a single adjacent pair |
218 | bool is_bound_pair() const; |
219 | // Test for a single adjacent set of ideal register's size. |
220 | bool is_bound(uint ireg) const; |
221 | |
222 | // Find the lowest-numbered register set in the mask. Return the |
223 | // HIGHEST register number in the set, or BAD if no sets. |
224 | // Assert that the mask contains only bit sets. |
225 | OptoReg::Name find_first_set(const int size) const; |
226 | |
227 | // Clear out partial bits; leave only aligned adjacent bit sets of size. |
228 | void clear_to_sets(const int size); |
229 | // Smear out partial bits to aligned adjacent bit sets. |
230 | void smear_to_sets(const int size); |
231 | // Test that the mask contains only aligned adjacent bit sets |
232 | bool is_aligned_sets(const int size) const; |
233 | |
234 | // Test for a single adjacent set |
235 | int is_bound_set(const int size) const; |
236 | |
237 | static bool is_vector(uint ireg); |
238 | static int num_registers(uint ireg); |
239 | |
240 | // Fast overlap test. Non-zero if any registers in common. |
241 | int overlap(const RegMask &rm) const { |
242 | assert(valid_watermarks() && rm.valid_watermarks(), "sanity" ); |
243 | int hwm = MIN2(_hwm, rm._hwm); |
244 | int lwm = MAX2(_lwm, rm._lwm); |
245 | int result = 0; |
246 | for (int i = lwm; i <= hwm; i++) { |
247 | result |= _A[i] & rm._A[i]; |
248 | } |
249 | return result; |
250 | } |
251 | |
252 | // Special test for register pressure based splitting |
253 | // UP means register only, Register plus stack, or stack only is DOWN |
254 | bool is_UP() const; |
255 | |
256 | // Clear a register mask |
257 | void Clear() { |
258 | _lwm = RM_SIZE - 1; |
259 | _hwm = 0; |
260 | memset(_A, 0, sizeof(int)*RM_SIZE); |
261 | assert(valid_watermarks(), "sanity" ); |
262 | } |
263 | |
264 | // Fill a register mask with 1's |
265 | void Set_All() { |
266 | _lwm = 0; |
267 | _hwm = RM_SIZE - 1; |
268 | memset(_A, 0xFF, sizeof(int)*RM_SIZE); |
269 | assert(valid_watermarks(), "sanity" ); |
270 | } |
271 | |
272 | // Insert register into mask |
273 | void Insert(OptoReg::Name reg) { |
274 | assert(reg < CHUNK_SIZE, "sanity" ); |
275 | assert(valid_watermarks(), "pre-condition" ); |
276 | int index = reg>>_LogWordBits; |
277 | if (index > _hwm) _hwm = index; |
278 | if (index < _lwm) _lwm = index; |
279 | _A[index] |= (1<<(reg&(_WordBits-1))); |
280 | assert(valid_watermarks(), "post-condition" ); |
281 | } |
282 | |
283 | // Remove register from mask |
284 | void Remove(OptoReg::Name reg) { |
285 | assert(reg < CHUNK_SIZE, "" ); |
286 | _A[reg>>_LogWordBits] &= ~(1<<(reg&(_WordBits-1))); |
287 | } |
288 | |
289 | // OR 'rm' into 'this' |
290 | void OR(const RegMask &rm) { |
291 | assert(valid_watermarks() && rm.valid_watermarks(), "sanity" ); |
292 | // OR widens the live range |
293 | if (_lwm > rm._lwm) _lwm = rm._lwm; |
294 | if (_hwm < rm._hwm) _hwm = rm._hwm; |
295 | for (int i = _lwm; i <= _hwm; i++) { |
296 | _A[i] |= rm._A[i]; |
297 | } |
298 | assert(valid_watermarks(), "sanity" ); |
299 | } |
300 | |
301 | // AND 'rm' into 'this' |
302 | void AND(const RegMask &rm) { |
303 | assert(valid_watermarks() && rm.valid_watermarks(), "sanity" ); |
304 | // Do not evaluate words outside the current watermark range, as they are |
305 | // already zero and an &= would not change that |
306 | for (int i = _lwm; i <= _hwm; i++) { |
307 | _A[i] &= rm._A[i]; |
308 | } |
309 | // Narrow the watermarks if &rm spans a narrower range. |
310 | // Update after to ensure non-overlapping words are zeroed out. |
311 | if (_lwm < rm._lwm) _lwm = rm._lwm; |
312 | if (_hwm > rm._hwm) _hwm = rm._hwm; |
313 | } |
314 | |
315 | // Subtract 'rm' from 'this' |
316 | void SUBTRACT(const RegMask &rm) { |
317 | assert(valid_watermarks() && rm.valid_watermarks(), "sanity" ); |
318 | int hwm = MIN2(_hwm, rm._hwm); |
319 | int lwm = MAX2(_lwm, rm._lwm); |
320 | for (int i = lwm; i <= hwm; i++) { |
321 | _A[i] &= ~rm._A[i]; |
322 | } |
323 | } |
324 | |
325 | // Compute size of register mask: number of bits |
326 | uint Size() const; |
327 | |
328 | #ifndef PRODUCT |
329 | void print() const { dump(); } |
330 | void dump(outputStream *st = tty) const; // Print a mask |
331 | #endif |
332 | |
333 | static const RegMask Empty; // Common empty mask |
334 | |
335 | static bool can_represent(OptoReg::Name reg) { |
336 | // NOTE: -1 in computation reflects the usage of the last |
337 | // bit of the regmask as an infinite stack flag and |
338 | // -7 is to keep mask aligned for largest value (VecZ). |
339 | return (int)reg < (int)(CHUNK_SIZE-1); |
340 | } |
341 | static bool can_represent_arg(OptoReg::Name reg) { |
342 | // NOTE: -SlotsPerVecZ in computation reflects the need |
343 | // to keep mask aligned for largest value (VecZ). |
344 | return (int)reg < (int)(CHUNK_SIZE-SlotsPerVecZ); |
345 | } |
346 | }; |
347 | |
348 | // Do not use this constant directly in client code! |
349 | #undef RM_SIZE |
350 | |
351 | #endif // SHARE_OPTO_REGMASK_HPP |
352 | |