| 1 | /* |
| 2 | * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. |
| 8 | * |
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 12 | * version 2 for more details (a copy is included in the LICENSE file that |
| 13 | * accompanied this code). |
| 14 | * |
| 15 | * You should have received a copy of the GNU General Public License version |
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 18 | * |
| 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| 20 | * or visit www.oracle.com if you need additional information or have any |
| 21 | * questions. |
| 22 | * |
| 23 | */ |
| 24 | |
| 25 | #ifndef SHARE_OPTO_REGMASK_HPP |
| 26 | #define SHARE_OPTO_REGMASK_HPP |
| 27 | |
| 28 | #include "code/vmreg.hpp" |
| 29 | #include "opto/optoreg.hpp" |
| 30 | #include "utilities/count_leading_zeros.hpp" |
| 31 | #include "utilities/count_trailing_zeros.hpp" |
| 32 | |
| 33 | //-------------Non-zero bit search methods used by RegMask--------------------- |
| 34 | // Find lowest 1, undefined if empty/0 |
| 35 | static int find_lowest_bit(uint32_t mask) { |
| 36 | return count_trailing_zeros(mask); |
| 37 | } |
| 38 | // Find highest 1, undefined if empty/0 |
| 39 | static int find_highest_bit(uint32_t mask) { |
| 40 | return count_leading_zeros(mask) ^ 31; |
| 41 | } |
| 42 | |
| 43 | //------------------------------RegMask---------------------------------------- |
| 44 | // The ADL file describes how to print the machine-specific registers, as well |
| 45 | // as any notion of register classes. We provide a register mask, which is |
| 46 | // just a collection of Register numbers. |
| 47 | |
| 48 | // The ADLC defines 2 macros, RM_SIZE and FORALL_BODY. |
| 49 | // RM_SIZE is the size of a register mask in words. |
| 50 | // FORALL_BODY replicates a BODY macro once per word in the register mask. |
| 51 | // The usage is somewhat clumsy and limited to the regmask.[h,c]pp files. |
| 52 | // However, it means the ADLC can redefine the unroll macro and all loops |
| 53 | // over register masks will be unrolled by the correct amount. |
| 54 | |
| 55 | class RegMask { |
| 56 | union { |
| 57 | double _dummy_force_double_alignment[RM_SIZE>>1]; |
| 58 | // Array of Register Mask bits. This array is large enough to cover |
| 59 | // all the machine registers and all parameters that need to be passed |
| 60 | // on the stack (stack registers) up to some interesting limit. Methods |
| 61 | // that need more parameters will NOT be compiled. On Intel, the limit |
| 62 | // is something like 90+ parameters. |
| 63 | int _A[RM_SIZE]; |
| 64 | }; |
| 65 | // The low and high water marks represents the lowest and highest word |
| 66 | // that might contain set register mask bits, respectively. We guarantee |
| 67 | // that there are no bits in words outside this range, but any word at |
| 68 | // and between the two marks can still be 0. |
| 69 | int _lwm; |
| 70 | int _hwm; |
| 71 | |
| 72 | enum { |
| 73 | _WordBits = BitsPerInt, |
| 74 | _LogWordBits = LogBitsPerInt, |
| 75 | _RM_SIZE = RM_SIZE // local constant, imported, then hidden by #undef |
| 76 | }; |
| 77 | |
| 78 | public: |
| 79 | enum { CHUNK_SIZE = RM_SIZE*_WordBits }; |
| 80 | |
| 81 | // SlotsPerLong is 2, since slots are 32 bits and longs are 64 bits. |
| 82 | // Also, consider the maximum alignment size for a normally allocated |
| 83 | // value. Since we allocate register pairs but not register quads (at |
| 84 | // present), this alignment is SlotsPerLong (== 2). A normally |
| 85 | // aligned allocated register is either a single register, or a pair |
| 86 | // of adjacent registers, the lower-numbered being even. |
| 87 | // See also is_aligned_Pairs() below, and the padding added before |
| 88 | // Matcher::_new_SP to keep allocated pairs aligned properly. |
| 89 | // If we ever go to quad-word allocations, SlotsPerQuad will become |
| 90 | // the controlling alignment constraint. Note that this alignment |
| 91 | // requirement is internal to the allocator, and independent of any |
| 92 | // particular platform. |
| 93 | enum { SlotsPerLong = 2, |
| 94 | SlotsPerVecS = 1, |
| 95 | SlotsPerVecD = 2, |
| 96 | SlotsPerVecX = 4, |
| 97 | SlotsPerVecY = 8, |
| 98 | SlotsPerVecZ = 16 }; |
| 99 | |
| 100 | // A constructor only used by the ADLC output. All mask fields are filled |
| 101 | // in directly. Calls to this look something like RM(1,2,3,4); |
| 102 | RegMask( |
| 103 | # define BODY(I) int a##I, |
| 104 | FORALL_BODY |
| 105 | # undef BODY |
| 106 | int dummy = 0) { |
| 107 | # define BODY(I) _A[I] = a##I; |
| 108 | FORALL_BODY |
| 109 | # undef BODY |
| 110 | _lwm = 0; |
| 111 | _hwm = RM_SIZE - 1; |
| 112 | while (_hwm > 0 && _A[_hwm] == 0) _hwm--; |
| 113 | while ((_lwm < _hwm) && _A[_lwm] == 0) _lwm++; |
| 114 | assert(valid_watermarks(), "post-condition" ); |
| 115 | } |
| 116 | |
| 117 | // Handy copying constructor |
| 118 | RegMask(RegMask *rm) { |
| 119 | _hwm = rm->_hwm; |
| 120 | _lwm = rm->_lwm; |
| 121 | for (int i = 0; i < RM_SIZE; i++) { |
| 122 | _A[i] = rm->_A[i]; |
| 123 | } |
| 124 | assert(valid_watermarks(), "post-condition" ); |
| 125 | } |
| 126 | |
| 127 | // Construct an empty mask |
| 128 | RegMask() { |
| 129 | Clear(); |
| 130 | } |
| 131 | |
| 132 | // Construct a mask with a single bit |
| 133 | RegMask(OptoReg::Name reg) { |
| 134 | Clear(); |
| 135 | Insert(reg); |
| 136 | } |
| 137 | |
| 138 | // Check for register being in mask |
| 139 | int Member(OptoReg::Name reg) const { |
| 140 | assert(reg < CHUNK_SIZE, "" ); |
| 141 | return _A[reg>>_LogWordBits] & (1<<(reg&(_WordBits-1))); |
| 142 | } |
| 143 | |
| 144 | // The last bit in the register mask indicates that the mask should repeat |
| 145 | // indefinitely with ONE bits. Returns TRUE if mask is infinite or |
| 146 | // unbounded in size. Returns FALSE if mask is finite size. |
| 147 | int is_AllStack() const { return _A[RM_SIZE-1] >> (_WordBits-1); } |
| 148 | |
| 149 | // Work around an -xO3 optimization problme in WS6U1. The old way: |
| 150 | // void set_AllStack() { _A[RM_SIZE-1] |= (1<<(_WordBits-1)); } |
| 151 | // will cause _A[RM_SIZE-1] to be clobbered, not updated when set_AllStack() |
| 152 | // follows an Insert() loop, like the one found in init_spill_mask(). Using |
| 153 | // Insert() instead works because the index into _A in computed instead of |
| 154 | // constant. See bug 4665841. |
| 155 | void set_AllStack() { Insert(OptoReg::Name(CHUNK_SIZE-1)); } |
| 156 | |
| 157 | // Test for being a not-empty mask. |
| 158 | int is_NotEmpty() const { |
| 159 | assert(valid_watermarks(), "sanity" ); |
| 160 | int tmp = 0; |
| 161 | for (int i = _lwm; i <= _hwm; i++) { |
| 162 | tmp |= _A[i]; |
| 163 | } |
| 164 | return tmp; |
| 165 | } |
| 166 | |
| 167 | // Find lowest-numbered register from mask, or BAD if mask is empty. |
| 168 | OptoReg::Name find_first_elem() const { |
| 169 | assert(valid_watermarks(), "sanity" ); |
| 170 | for (int i = _lwm; i <= _hwm; i++) { |
| 171 | int bits = _A[i]; |
| 172 | if (bits) { |
| 173 | return OptoReg::Name((i<<_LogWordBits) + find_lowest_bit(bits)); |
| 174 | } |
| 175 | } |
| 176 | return OptoReg::Name(OptoReg::Bad); |
| 177 | } |
| 178 | |
| 179 | // Get highest-numbered register from mask, or BAD if mask is empty. |
| 180 | OptoReg::Name find_last_elem() const { |
| 181 | assert(valid_watermarks(), "sanity" ); |
| 182 | for (int i = _hwm; i >= _lwm; i--) { |
| 183 | int bits = _A[i]; |
| 184 | if (bits) { |
| 185 | return OptoReg::Name((i<<_LogWordBits) + find_highest_bit(bits)); |
| 186 | } |
| 187 | } |
| 188 | return OptoReg::Name(OptoReg::Bad); |
| 189 | } |
| 190 | |
| 191 | // Clear out partial bits; leave only aligned adjacent bit pairs. |
| 192 | void clear_to_pairs(); |
| 193 | |
| 194 | #ifdef ASSERT |
| 195 | // Verify watermarks are sane, i.e., within bounds and that no |
| 196 | // register words below or above the watermarks have bits set. |
| 197 | bool valid_watermarks() const { |
| 198 | assert(_hwm >= 0 && _hwm < RM_SIZE, "_hwm out of range: %d" , _hwm); |
| 199 | assert(_lwm >= 0 && _lwm < RM_SIZE, "_lwm out of range: %d" , _lwm); |
| 200 | for (int i = 0; i < _lwm; i++) { |
| 201 | assert(_A[i] == 0, "_lwm too high: %d regs at: %d" , _lwm, i); |
| 202 | } |
| 203 | for (int i = _hwm + 1; i < RM_SIZE; i++) { |
| 204 | assert(_A[i] == 0, "_hwm too low: %d regs at: %d" , _hwm, i); |
| 205 | } |
| 206 | return true; |
| 207 | } |
| 208 | #endif // !ASSERT |
| 209 | |
| 210 | // Test that the mask contains only aligned adjacent bit pairs |
| 211 | bool is_aligned_pairs() const; |
| 212 | |
| 213 | // mask is a pair of misaligned registers |
| 214 | bool is_misaligned_pair() const; |
| 215 | // Test for single register |
| 216 | bool is_bound1() const; |
| 217 | // Test for a single adjacent pair |
| 218 | bool is_bound_pair() const; |
| 219 | // Test for a single adjacent set of ideal register's size. |
| 220 | bool is_bound(uint ireg) const; |
| 221 | |
| 222 | // Find the lowest-numbered register set in the mask. Return the |
| 223 | // HIGHEST register number in the set, or BAD if no sets. |
| 224 | // Assert that the mask contains only bit sets. |
| 225 | OptoReg::Name find_first_set(const int size) const; |
| 226 | |
| 227 | // Clear out partial bits; leave only aligned adjacent bit sets of size. |
| 228 | void clear_to_sets(const int size); |
| 229 | // Smear out partial bits to aligned adjacent bit sets. |
| 230 | void smear_to_sets(const int size); |
| 231 | // Test that the mask contains only aligned adjacent bit sets |
| 232 | bool is_aligned_sets(const int size) const; |
| 233 | |
| 234 | // Test for a single adjacent set |
| 235 | int is_bound_set(const int size) const; |
| 236 | |
| 237 | static bool is_vector(uint ireg); |
| 238 | static int num_registers(uint ireg); |
| 239 | |
| 240 | // Fast overlap test. Non-zero if any registers in common. |
| 241 | int overlap(const RegMask &rm) const { |
| 242 | assert(valid_watermarks() && rm.valid_watermarks(), "sanity" ); |
| 243 | int hwm = MIN2(_hwm, rm._hwm); |
| 244 | int lwm = MAX2(_lwm, rm._lwm); |
| 245 | int result = 0; |
| 246 | for (int i = lwm; i <= hwm; i++) { |
| 247 | result |= _A[i] & rm._A[i]; |
| 248 | } |
| 249 | return result; |
| 250 | } |
| 251 | |
| 252 | // Special test for register pressure based splitting |
| 253 | // UP means register only, Register plus stack, or stack only is DOWN |
| 254 | bool is_UP() const; |
| 255 | |
| 256 | // Clear a register mask |
| 257 | void Clear() { |
| 258 | _lwm = RM_SIZE - 1; |
| 259 | _hwm = 0; |
| 260 | memset(_A, 0, sizeof(int)*RM_SIZE); |
| 261 | assert(valid_watermarks(), "sanity" ); |
| 262 | } |
| 263 | |
| 264 | // Fill a register mask with 1's |
| 265 | void Set_All() { |
| 266 | _lwm = 0; |
| 267 | _hwm = RM_SIZE - 1; |
| 268 | memset(_A, 0xFF, sizeof(int)*RM_SIZE); |
| 269 | assert(valid_watermarks(), "sanity" ); |
| 270 | } |
| 271 | |
| 272 | // Insert register into mask |
| 273 | void Insert(OptoReg::Name reg) { |
| 274 | assert(reg < CHUNK_SIZE, "sanity" ); |
| 275 | assert(valid_watermarks(), "pre-condition" ); |
| 276 | int index = reg>>_LogWordBits; |
| 277 | if (index > _hwm) _hwm = index; |
| 278 | if (index < _lwm) _lwm = index; |
| 279 | _A[index] |= (1<<(reg&(_WordBits-1))); |
| 280 | assert(valid_watermarks(), "post-condition" ); |
| 281 | } |
| 282 | |
| 283 | // Remove register from mask |
| 284 | void Remove(OptoReg::Name reg) { |
| 285 | assert(reg < CHUNK_SIZE, "" ); |
| 286 | _A[reg>>_LogWordBits] &= ~(1<<(reg&(_WordBits-1))); |
| 287 | } |
| 288 | |
| 289 | // OR 'rm' into 'this' |
| 290 | void OR(const RegMask &rm) { |
| 291 | assert(valid_watermarks() && rm.valid_watermarks(), "sanity" ); |
| 292 | // OR widens the live range |
| 293 | if (_lwm > rm._lwm) _lwm = rm._lwm; |
| 294 | if (_hwm < rm._hwm) _hwm = rm._hwm; |
| 295 | for (int i = _lwm; i <= _hwm; i++) { |
| 296 | _A[i] |= rm._A[i]; |
| 297 | } |
| 298 | assert(valid_watermarks(), "sanity" ); |
| 299 | } |
| 300 | |
| 301 | // AND 'rm' into 'this' |
| 302 | void AND(const RegMask &rm) { |
| 303 | assert(valid_watermarks() && rm.valid_watermarks(), "sanity" ); |
| 304 | // Do not evaluate words outside the current watermark range, as they are |
| 305 | // already zero and an &= would not change that |
| 306 | for (int i = _lwm; i <= _hwm; i++) { |
| 307 | _A[i] &= rm._A[i]; |
| 308 | } |
| 309 | // Narrow the watermarks if &rm spans a narrower range. |
| 310 | // Update after to ensure non-overlapping words are zeroed out. |
| 311 | if (_lwm < rm._lwm) _lwm = rm._lwm; |
| 312 | if (_hwm > rm._hwm) _hwm = rm._hwm; |
| 313 | } |
| 314 | |
| 315 | // Subtract 'rm' from 'this' |
| 316 | void SUBTRACT(const RegMask &rm) { |
| 317 | assert(valid_watermarks() && rm.valid_watermarks(), "sanity" ); |
| 318 | int hwm = MIN2(_hwm, rm._hwm); |
| 319 | int lwm = MAX2(_lwm, rm._lwm); |
| 320 | for (int i = lwm; i <= hwm; i++) { |
| 321 | _A[i] &= ~rm._A[i]; |
| 322 | } |
| 323 | } |
| 324 | |
| 325 | // Compute size of register mask: number of bits |
| 326 | uint Size() const; |
| 327 | |
| 328 | #ifndef PRODUCT |
| 329 | void print() const { dump(); } |
| 330 | void dump(outputStream *st = tty) const; // Print a mask |
| 331 | #endif |
| 332 | |
| 333 | static const RegMask Empty; // Common empty mask |
| 334 | |
| 335 | static bool can_represent(OptoReg::Name reg) { |
| 336 | // NOTE: -1 in computation reflects the usage of the last |
| 337 | // bit of the regmask as an infinite stack flag and |
| 338 | // -7 is to keep mask aligned for largest value (VecZ). |
| 339 | return (int)reg < (int)(CHUNK_SIZE-1); |
| 340 | } |
| 341 | static bool can_represent_arg(OptoReg::Name reg) { |
| 342 | // NOTE: -SlotsPerVecZ in computation reflects the need |
| 343 | // to keep mask aligned for largest value (VecZ). |
| 344 | return (int)reg < (int)(CHUNK_SIZE-SlotsPerVecZ); |
| 345 | } |
| 346 | }; |
| 347 | |
| 348 | // Do not use this constant directly in client code! |
| 349 | #undef RM_SIZE |
| 350 | |
| 351 | #endif // SHARE_OPTO_REGMASK_HPP |
| 352 | |