1/*
2 * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#ifndef SHARE_OPTO_REGMASK_HPP
26#define SHARE_OPTO_REGMASK_HPP
27
28#include "code/vmreg.hpp"
29#include "opto/optoreg.hpp"
30#include "utilities/count_leading_zeros.hpp"
31#include "utilities/count_trailing_zeros.hpp"
32
33//-------------Non-zero bit search methods used by RegMask---------------------
34// Find lowest 1, undefined if empty/0
35static int find_lowest_bit(uint32_t mask) {
36 return count_trailing_zeros(mask);
37}
38// Find highest 1, undefined if empty/0
39static int find_highest_bit(uint32_t mask) {
40 return count_leading_zeros(mask) ^ 31;
41}
42
43//------------------------------RegMask----------------------------------------
44// The ADL file describes how to print the machine-specific registers, as well
45// as any notion of register classes. We provide a register mask, which is
46// just a collection of Register numbers.
47
48// The ADLC defines 2 macros, RM_SIZE and FORALL_BODY.
49// RM_SIZE is the size of a register mask in words.
50// FORALL_BODY replicates a BODY macro once per word in the register mask.
51// The usage is somewhat clumsy and limited to the regmask.[h,c]pp files.
52// However, it means the ADLC can redefine the unroll macro and all loops
53// over register masks will be unrolled by the correct amount.
54
55class RegMask {
56 union {
57 double _dummy_force_double_alignment[RM_SIZE>>1];
58 // Array of Register Mask bits. This array is large enough to cover
59 // all the machine registers and all parameters that need to be passed
60 // on the stack (stack registers) up to some interesting limit. Methods
61 // that need more parameters will NOT be compiled. On Intel, the limit
62 // is something like 90+ parameters.
63 int _A[RM_SIZE];
64 };
65 // The low and high water marks represents the lowest and highest word
66 // that might contain set register mask bits, respectively. We guarantee
67 // that there are no bits in words outside this range, but any word at
68 // and between the two marks can still be 0.
69 int _lwm;
70 int _hwm;
71
72 enum {
73 _WordBits = BitsPerInt,
74 _LogWordBits = LogBitsPerInt,
75 _RM_SIZE = RM_SIZE // local constant, imported, then hidden by #undef
76 };
77
78 public:
79 enum { CHUNK_SIZE = RM_SIZE*_WordBits };
80
81 // SlotsPerLong is 2, since slots are 32 bits and longs are 64 bits.
82 // Also, consider the maximum alignment size for a normally allocated
83 // value. Since we allocate register pairs but not register quads (at
84 // present), this alignment is SlotsPerLong (== 2). A normally
85 // aligned allocated register is either a single register, or a pair
86 // of adjacent registers, the lower-numbered being even.
87 // See also is_aligned_Pairs() below, and the padding added before
88 // Matcher::_new_SP to keep allocated pairs aligned properly.
89 // If we ever go to quad-word allocations, SlotsPerQuad will become
90 // the controlling alignment constraint. Note that this alignment
91 // requirement is internal to the allocator, and independent of any
92 // particular platform.
93 enum { SlotsPerLong = 2,
94 SlotsPerVecS = 1,
95 SlotsPerVecD = 2,
96 SlotsPerVecX = 4,
97 SlotsPerVecY = 8,
98 SlotsPerVecZ = 16 };
99
100 // A constructor only used by the ADLC output. All mask fields are filled
101 // in directly. Calls to this look something like RM(1,2,3,4);
102 RegMask(
103# define BODY(I) int a##I,
104 FORALL_BODY
105# undef BODY
106 int dummy = 0) {
107# define BODY(I) _A[I] = a##I;
108 FORALL_BODY
109# undef BODY
110 _lwm = 0;
111 _hwm = RM_SIZE - 1;
112 while (_hwm > 0 && _A[_hwm] == 0) _hwm--;
113 while ((_lwm < _hwm) && _A[_lwm] == 0) _lwm++;
114 assert(valid_watermarks(), "post-condition");
115 }
116
117 // Handy copying constructor
118 RegMask(RegMask *rm) {
119 _hwm = rm->_hwm;
120 _lwm = rm->_lwm;
121 for (int i = 0; i < RM_SIZE; i++) {
122 _A[i] = rm->_A[i];
123 }
124 assert(valid_watermarks(), "post-condition");
125 }
126
127 // Construct an empty mask
128 RegMask() {
129 Clear();
130 }
131
132 // Construct a mask with a single bit
133 RegMask(OptoReg::Name reg) {
134 Clear();
135 Insert(reg);
136 }
137
138 // Check for register being in mask
139 int Member(OptoReg::Name reg) const {
140 assert(reg < CHUNK_SIZE, "");
141 return _A[reg>>_LogWordBits] & (1<<(reg&(_WordBits-1)));
142 }
143
144 // The last bit in the register mask indicates that the mask should repeat
145 // indefinitely with ONE bits. Returns TRUE if mask is infinite or
146 // unbounded in size. Returns FALSE if mask is finite size.
147 int is_AllStack() const { return _A[RM_SIZE-1] >> (_WordBits-1); }
148
149 // Work around an -xO3 optimization problme in WS6U1. The old way:
150 // void set_AllStack() { _A[RM_SIZE-1] |= (1<<(_WordBits-1)); }
151 // will cause _A[RM_SIZE-1] to be clobbered, not updated when set_AllStack()
152 // follows an Insert() loop, like the one found in init_spill_mask(). Using
153 // Insert() instead works because the index into _A in computed instead of
154 // constant. See bug 4665841.
155 void set_AllStack() { Insert(OptoReg::Name(CHUNK_SIZE-1)); }
156
157 // Test for being a not-empty mask.
158 int is_NotEmpty() const {
159 assert(valid_watermarks(), "sanity");
160 int tmp = 0;
161 for (int i = _lwm; i <= _hwm; i++) {
162 tmp |= _A[i];
163 }
164 return tmp;
165 }
166
167 // Find lowest-numbered register from mask, or BAD if mask is empty.
168 OptoReg::Name find_first_elem() const {
169 assert(valid_watermarks(), "sanity");
170 for (int i = _lwm; i <= _hwm; i++) {
171 int bits = _A[i];
172 if (bits) {
173 return OptoReg::Name((i<<_LogWordBits) + find_lowest_bit(bits));
174 }
175 }
176 return OptoReg::Name(OptoReg::Bad);
177 }
178
179 // Get highest-numbered register from mask, or BAD if mask is empty.
180 OptoReg::Name find_last_elem() const {
181 assert(valid_watermarks(), "sanity");
182 for (int i = _hwm; i >= _lwm; i--) {
183 int bits = _A[i];
184 if (bits) {
185 return OptoReg::Name((i<<_LogWordBits) + find_highest_bit(bits));
186 }
187 }
188 return OptoReg::Name(OptoReg::Bad);
189 }
190
191 // Clear out partial bits; leave only aligned adjacent bit pairs.
192 void clear_to_pairs();
193
194#ifdef ASSERT
195 // Verify watermarks are sane, i.e., within bounds and that no
196 // register words below or above the watermarks have bits set.
197 bool valid_watermarks() const {
198 assert(_hwm >= 0 && _hwm < RM_SIZE, "_hwm out of range: %d", _hwm);
199 assert(_lwm >= 0 && _lwm < RM_SIZE, "_lwm out of range: %d", _lwm);
200 for (int i = 0; i < _lwm; i++) {
201 assert(_A[i] == 0, "_lwm too high: %d regs at: %d", _lwm, i);
202 }
203 for (int i = _hwm + 1; i < RM_SIZE; i++) {
204 assert(_A[i] == 0, "_hwm too low: %d regs at: %d", _hwm, i);
205 }
206 return true;
207 }
208#endif // !ASSERT
209
210 // Test that the mask contains only aligned adjacent bit pairs
211 bool is_aligned_pairs() const;
212
213 // mask is a pair of misaligned registers
214 bool is_misaligned_pair() const;
215 // Test for single register
216 bool is_bound1() const;
217 // Test for a single adjacent pair
218 bool is_bound_pair() const;
219 // Test for a single adjacent set of ideal register's size.
220 bool is_bound(uint ireg) const;
221
222 // Find the lowest-numbered register set in the mask. Return the
223 // HIGHEST register number in the set, or BAD if no sets.
224 // Assert that the mask contains only bit sets.
225 OptoReg::Name find_first_set(const int size) const;
226
227 // Clear out partial bits; leave only aligned adjacent bit sets of size.
228 void clear_to_sets(const int size);
229 // Smear out partial bits to aligned adjacent bit sets.
230 void smear_to_sets(const int size);
231 // Test that the mask contains only aligned adjacent bit sets
232 bool is_aligned_sets(const int size) const;
233
234 // Test for a single adjacent set
235 int is_bound_set(const int size) const;
236
237 static bool is_vector(uint ireg);
238 static int num_registers(uint ireg);
239
240 // Fast overlap test. Non-zero if any registers in common.
241 int overlap(const RegMask &rm) const {
242 assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
243 int hwm = MIN2(_hwm, rm._hwm);
244 int lwm = MAX2(_lwm, rm._lwm);
245 int result = 0;
246 for (int i = lwm; i <= hwm; i++) {
247 result |= _A[i] & rm._A[i];
248 }
249 return result;
250 }
251
252 // Special test for register pressure based splitting
253 // UP means register only, Register plus stack, or stack only is DOWN
254 bool is_UP() const;
255
256 // Clear a register mask
257 void Clear() {
258 _lwm = RM_SIZE - 1;
259 _hwm = 0;
260 memset(_A, 0, sizeof(int)*RM_SIZE);
261 assert(valid_watermarks(), "sanity");
262 }
263
264 // Fill a register mask with 1's
265 void Set_All() {
266 _lwm = 0;
267 _hwm = RM_SIZE - 1;
268 memset(_A, 0xFF, sizeof(int)*RM_SIZE);
269 assert(valid_watermarks(), "sanity");
270 }
271
272 // Insert register into mask
273 void Insert(OptoReg::Name reg) {
274 assert(reg < CHUNK_SIZE, "sanity");
275 assert(valid_watermarks(), "pre-condition");
276 int index = reg>>_LogWordBits;
277 if (index > _hwm) _hwm = index;
278 if (index < _lwm) _lwm = index;
279 _A[index] |= (1<<(reg&(_WordBits-1)));
280 assert(valid_watermarks(), "post-condition");
281 }
282
283 // Remove register from mask
284 void Remove(OptoReg::Name reg) {
285 assert(reg < CHUNK_SIZE, "");
286 _A[reg>>_LogWordBits] &= ~(1<<(reg&(_WordBits-1)));
287 }
288
289 // OR 'rm' into 'this'
290 void OR(const RegMask &rm) {
291 assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
292 // OR widens the live range
293 if (_lwm > rm._lwm) _lwm = rm._lwm;
294 if (_hwm < rm._hwm) _hwm = rm._hwm;
295 for (int i = _lwm; i <= _hwm; i++) {
296 _A[i] |= rm._A[i];
297 }
298 assert(valid_watermarks(), "sanity");
299 }
300
301 // AND 'rm' into 'this'
302 void AND(const RegMask &rm) {
303 assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
304 // Do not evaluate words outside the current watermark range, as they are
305 // already zero and an &= would not change that
306 for (int i = _lwm; i <= _hwm; i++) {
307 _A[i] &= rm._A[i];
308 }
309 // Narrow the watermarks if &rm spans a narrower range.
310 // Update after to ensure non-overlapping words are zeroed out.
311 if (_lwm < rm._lwm) _lwm = rm._lwm;
312 if (_hwm > rm._hwm) _hwm = rm._hwm;
313 }
314
315 // Subtract 'rm' from 'this'
316 void SUBTRACT(const RegMask &rm) {
317 assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
318 int hwm = MIN2(_hwm, rm._hwm);
319 int lwm = MAX2(_lwm, rm._lwm);
320 for (int i = lwm; i <= hwm; i++) {
321 _A[i] &= ~rm._A[i];
322 }
323 }
324
325 // Compute size of register mask: number of bits
326 uint Size() const;
327
328#ifndef PRODUCT
329 void print() const { dump(); }
330 void dump(outputStream *st = tty) const; // Print a mask
331#endif
332
333 static const RegMask Empty; // Common empty mask
334
335 static bool can_represent(OptoReg::Name reg) {
336 // NOTE: -1 in computation reflects the usage of the last
337 // bit of the regmask as an infinite stack flag and
338 // -7 is to keep mask aligned for largest value (VecZ).
339 return (int)reg < (int)(CHUNK_SIZE-1);
340 }
341 static bool can_represent_arg(OptoReg::Name reg) {
342 // NOTE: -SlotsPerVecZ in computation reflects the need
343 // to keep mask aligned for largest value (VecZ).
344 return (int)reg < (int)(CHUNK_SIZE-SlotsPerVecZ);
345 }
346};
347
348// Do not use this constant directly in client code!
349#undef RM_SIZE
350
351#endif // SHARE_OPTO_REGMASK_HPP
352