| 1 | /* | 
|---|
| 2 | * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. | 
|---|
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | 
|---|
| 4 | * | 
|---|
| 5 | * This code is free software; you can redistribute it and/or modify it | 
|---|
| 6 | * under the terms of the GNU General Public License version 2 only, as | 
|---|
| 7 | * published by the Free Software Foundation. | 
|---|
| 8 | * | 
|---|
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT | 
|---|
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | 
|---|
| 11 | * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License | 
|---|
| 12 | * version 2 for more details (a copy is included in the LICENSE file that | 
|---|
| 13 | * accompanied this code). | 
|---|
| 14 | * | 
|---|
| 15 | * You should have received a copy of the GNU General Public License version | 
|---|
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, | 
|---|
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | 
|---|
| 18 | * | 
|---|
| 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | 
|---|
| 20 | * or visit www.oracle.com if you need additional information or have any | 
|---|
| 21 | * questions. | 
|---|
| 22 | * | 
|---|
| 23 | */ | 
|---|
| 24 |  | 
|---|
| 25 | #ifndef SHARE_OPTO_REGMASK_HPP | 
|---|
| 26 | #define SHARE_OPTO_REGMASK_HPP | 
|---|
| 27 |  | 
|---|
| 28 | #include "code/vmreg.hpp" | 
|---|
| 29 | #include "opto/optoreg.hpp" | 
|---|
| 30 | #include "utilities/count_leading_zeros.hpp" | 
|---|
| 31 | #include "utilities/count_trailing_zeros.hpp" | 
|---|
| 32 |  | 
|---|
| 33 | //-------------Non-zero bit search methods used by RegMask--------------------- | 
|---|
| 34 | // Find lowest 1, undefined if empty/0 | 
|---|
| 35 | static int find_lowest_bit(uint32_t mask) { | 
|---|
| 36 | return count_trailing_zeros(mask); | 
|---|
| 37 | } | 
|---|
| 38 | // Find highest 1, undefined if empty/0 | 
|---|
| 39 | static int find_highest_bit(uint32_t mask) { | 
|---|
| 40 | return count_leading_zeros(mask) ^ 31; | 
|---|
| 41 | } | 
|---|
| 42 |  | 
|---|
| 43 | //------------------------------RegMask---------------------------------------- | 
|---|
| 44 | // The ADL file describes how to print the machine-specific registers, as well | 
|---|
| 45 | // as any notion of register classes.  We provide a register mask, which is | 
|---|
| 46 | // just a collection of Register numbers. | 
|---|
| 47 |  | 
|---|
| 48 | // The ADLC defines 2 macros, RM_SIZE and FORALL_BODY. | 
|---|
| 49 | // RM_SIZE is the size of a register mask in words. | 
|---|
| 50 | // FORALL_BODY replicates a BODY macro once per word in the register mask. | 
|---|
| 51 | // The usage is somewhat clumsy and limited to the regmask.[h,c]pp files. | 
|---|
| 52 | // However, it means the ADLC can redefine the unroll macro and all loops | 
|---|
| 53 | // over register masks will be unrolled by the correct amount. | 
|---|
| 54 |  | 
|---|
| 55 | class RegMask { | 
|---|
| 56 | union { | 
|---|
| 57 | double _dummy_force_double_alignment[RM_SIZE>>1]; | 
|---|
| 58 | // Array of Register Mask bits.  This array is large enough to cover | 
|---|
| 59 | // all the machine registers and all parameters that need to be passed | 
|---|
| 60 | // on the stack (stack registers) up to some interesting limit.  Methods | 
|---|
| 61 | // that need more parameters will NOT be compiled.  On Intel, the limit | 
|---|
| 62 | // is something like 90+ parameters. | 
|---|
| 63 | int _A[RM_SIZE]; | 
|---|
| 64 | }; | 
|---|
| 65 | // The low and high water marks represents the lowest and highest word | 
|---|
| 66 | // that might contain set register mask bits, respectively. We guarantee | 
|---|
| 67 | // that there are no bits in words outside this range, but any word at | 
|---|
| 68 | // and between the two marks can still be 0. | 
|---|
| 69 | int _lwm; | 
|---|
| 70 | int _hwm; | 
|---|
| 71 |  | 
|---|
| 72 | enum { | 
|---|
| 73 | _WordBits    = BitsPerInt, | 
|---|
| 74 | _LogWordBits = LogBitsPerInt, | 
|---|
| 75 | _RM_SIZE     = RM_SIZE   // local constant, imported, then hidden by #undef | 
|---|
| 76 | }; | 
|---|
| 77 |  | 
|---|
| 78 | public: | 
|---|
| 79 | enum { CHUNK_SIZE = RM_SIZE*_WordBits }; | 
|---|
| 80 |  | 
|---|
| 81 | // SlotsPerLong is 2, since slots are 32 bits and longs are 64 bits. | 
|---|
| 82 | // Also, consider the maximum alignment size for a normally allocated | 
|---|
| 83 | // value.  Since we allocate register pairs but not register quads (at | 
|---|
| 84 | // present), this alignment is SlotsPerLong (== 2).  A normally | 
|---|
| 85 | // aligned allocated register is either a single register, or a pair | 
|---|
| 86 | // of adjacent registers, the lower-numbered being even. | 
|---|
| 87 | // See also is_aligned_Pairs() below, and the padding added before | 
|---|
| 88 | // Matcher::_new_SP to keep allocated pairs aligned properly. | 
|---|
| 89 | // If we ever go to quad-word allocations, SlotsPerQuad will become | 
|---|
| 90 | // the controlling alignment constraint.  Note that this alignment | 
|---|
| 91 | // requirement is internal to the allocator, and independent of any | 
|---|
| 92 | // particular platform. | 
|---|
| 93 | enum { SlotsPerLong = 2, | 
|---|
| 94 | SlotsPerVecS = 1, | 
|---|
| 95 | SlotsPerVecD = 2, | 
|---|
| 96 | SlotsPerVecX = 4, | 
|---|
| 97 | SlotsPerVecY = 8, | 
|---|
| 98 | SlotsPerVecZ = 16 }; | 
|---|
| 99 |  | 
|---|
| 100 | // A constructor only used by the ADLC output.  All mask fields are filled | 
|---|
| 101 | // in directly.  Calls to this look something like RM(1,2,3,4); | 
|---|
| 102 | RegMask( | 
|---|
| 103 | #   define BODY(I) int a##I, | 
|---|
| 104 | FORALL_BODY | 
|---|
| 105 | #   undef BODY | 
|---|
| 106 | int dummy = 0) { | 
|---|
| 107 | #   define BODY(I) _A[I] = a##I; | 
|---|
| 108 | FORALL_BODY | 
|---|
| 109 | #   undef BODY | 
|---|
| 110 | _lwm = 0; | 
|---|
| 111 | _hwm = RM_SIZE - 1; | 
|---|
| 112 | while (_hwm > 0 && _A[_hwm] == 0) _hwm--; | 
|---|
| 113 | while ((_lwm < _hwm) && _A[_lwm] == 0) _lwm++; | 
|---|
| 114 | assert(valid_watermarks(), "post-condition"); | 
|---|
| 115 | } | 
|---|
| 116 |  | 
|---|
| 117 | // Handy copying constructor | 
|---|
| 118 | RegMask(RegMask *rm) { | 
|---|
| 119 | _hwm = rm->_hwm; | 
|---|
| 120 | _lwm = rm->_lwm; | 
|---|
| 121 | for (int i = 0; i < RM_SIZE; i++) { | 
|---|
| 122 | _A[i] = rm->_A[i]; | 
|---|
| 123 | } | 
|---|
| 124 | assert(valid_watermarks(), "post-condition"); | 
|---|
| 125 | } | 
|---|
| 126 |  | 
|---|
| 127 | // Construct an empty mask | 
|---|
| 128 | RegMask() { | 
|---|
| 129 | Clear(); | 
|---|
| 130 | } | 
|---|
| 131 |  | 
|---|
| 132 | // Construct a mask with a single bit | 
|---|
| 133 | RegMask(OptoReg::Name reg) { | 
|---|
| 134 | Clear(); | 
|---|
| 135 | Insert(reg); | 
|---|
| 136 | } | 
|---|
| 137 |  | 
|---|
| 138 | // Check for register being in mask | 
|---|
| 139 | int Member(OptoReg::Name reg) const { | 
|---|
| 140 | assert(reg < CHUNK_SIZE, ""); | 
|---|
| 141 | return _A[reg>>_LogWordBits] & (1<<(reg&(_WordBits-1))); | 
|---|
| 142 | } | 
|---|
| 143 |  | 
|---|
| 144 | // The last bit in the register mask indicates that the mask should repeat | 
|---|
| 145 | // indefinitely with ONE bits.  Returns TRUE if mask is infinite or | 
|---|
| 146 | // unbounded in size.  Returns FALSE if mask is finite size. | 
|---|
| 147 | int is_AllStack() const { return _A[RM_SIZE-1] >> (_WordBits-1); } | 
|---|
| 148 |  | 
|---|
| 149 | // Work around an -xO3 optimization problme in WS6U1. The old way: | 
|---|
| 150 | //   void set_AllStack() { _A[RM_SIZE-1] |= (1<<(_WordBits-1)); } | 
|---|
| 151 | // will cause _A[RM_SIZE-1] to be clobbered, not updated when set_AllStack() | 
|---|
| 152 | // follows an Insert() loop, like the one found in init_spill_mask(). Using | 
|---|
| 153 | // Insert() instead works because the index into _A in computed instead of | 
|---|
| 154 | // constant.  See bug 4665841. | 
|---|
| 155 | void set_AllStack() { Insert(OptoReg::Name(CHUNK_SIZE-1)); } | 
|---|
| 156 |  | 
|---|
| 157 | // Test for being a not-empty mask. | 
|---|
| 158 | int is_NotEmpty() const { | 
|---|
| 159 | assert(valid_watermarks(), "sanity"); | 
|---|
| 160 | int tmp = 0; | 
|---|
| 161 | for (int i = _lwm; i <= _hwm; i++) { | 
|---|
| 162 | tmp |= _A[i]; | 
|---|
| 163 | } | 
|---|
| 164 | return tmp; | 
|---|
| 165 | } | 
|---|
| 166 |  | 
|---|
| 167 | // Find lowest-numbered register from mask, or BAD if mask is empty. | 
|---|
| 168 | OptoReg::Name find_first_elem() const { | 
|---|
| 169 | assert(valid_watermarks(), "sanity"); | 
|---|
| 170 | for (int i = _lwm; i <= _hwm; i++) { | 
|---|
| 171 | int bits = _A[i]; | 
|---|
| 172 | if (bits) { | 
|---|
| 173 | return OptoReg::Name((i<<_LogWordBits) + find_lowest_bit(bits)); | 
|---|
| 174 | } | 
|---|
| 175 | } | 
|---|
| 176 | return OptoReg::Name(OptoReg::Bad); | 
|---|
| 177 | } | 
|---|
| 178 |  | 
|---|
| 179 | // Get highest-numbered register from mask, or BAD if mask is empty. | 
|---|
| 180 | OptoReg::Name find_last_elem() const { | 
|---|
| 181 | assert(valid_watermarks(), "sanity"); | 
|---|
| 182 | for (int i = _hwm; i >= _lwm; i--) { | 
|---|
| 183 | int bits = _A[i]; | 
|---|
| 184 | if (bits) { | 
|---|
| 185 | return OptoReg::Name((i<<_LogWordBits) + find_highest_bit(bits)); | 
|---|
| 186 | } | 
|---|
| 187 | } | 
|---|
| 188 | return OptoReg::Name(OptoReg::Bad); | 
|---|
| 189 | } | 
|---|
| 190 |  | 
|---|
| 191 | // Clear out partial bits; leave only aligned adjacent bit pairs. | 
|---|
| 192 | void clear_to_pairs(); | 
|---|
| 193 |  | 
|---|
| 194 | #ifdef ASSERT | 
|---|
| 195 | // Verify watermarks are sane, i.e., within bounds and that no | 
|---|
| 196 | // register words below or above the watermarks have bits set. | 
|---|
| 197 | bool valid_watermarks() const { | 
|---|
| 198 | assert(_hwm >= 0 && _hwm < RM_SIZE, "_hwm out of range: %d", _hwm); | 
|---|
| 199 | assert(_lwm >= 0 && _lwm < RM_SIZE, "_lwm out of range: %d", _lwm); | 
|---|
| 200 | for (int i = 0; i < _lwm; i++) { | 
|---|
| 201 | assert(_A[i] == 0, "_lwm too high: %d regs at: %d", _lwm, i); | 
|---|
| 202 | } | 
|---|
| 203 | for (int i = _hwm + 1; i < RM_SIZE; i++) { | 
|---|
| 204 | assert(_A[i] == 0, "_hwm too low: %d regs at: %d", _hwm, i); | 
|---|
| 205 | } | 
|---|
| 206 | return true; | 
|---|
| 207 | } | 
|---|
| 208 | #endif // !ASSERT | 
|---|
| 209 |  | 
|---|
| 210 | // Test that the mask contains only aligned adjacent bit pairs | 
|---|
| 211 | bool is_aligned_pairs() const; | 
|---|
| 212 |  | 
|---|
| 213 | // mask is a pair of misaligned registers | 
|---|
| 214 | bool is_misaligned_pair() const; | 
|---|
| 215 | // Test for single register | 
|---|
| 216 | bool is_bound1() const; | 
|---|
| 217 | // Test for a single adjacent pair | 
|---|
| 218 | bool is_bound_pair() const; | 
|---|
| 219 | // Test for a single adjacent set of ideal register's size. | 
|---|
| 220 | bool is_bound(uint ireg) const; | 
|---|
| 221 |  | 
|---|
| 222 | // Find the lowest-numbered register set in the mask.  Return the | 
|---|
| 223 | // HIGHEST register number in the set, or BAD if no sets. | 
|---|
| 224 | // Assert that the mask contains only bit sets. | 
|---|
| 225 | OptoReg::Name find_first_set(const int size) const; | 
|---|
| 226 |  | 
|---|
| 227 | // Clear out partial bits; leave only aligned adjacent bit sets of size. | 
|---|
| 228 | void clear_to_sets(const int size); | 
|---|
| 229 | // Smear out partial bits to aligned adjacent bit sets. | 
|---|
| 230 | void smear_to_sets(const int size); | 
|---|
| 231 | // Test that the mask contains only aligned adjacent bit sets | 
|---|
| 232 | bool is_aligned_sets(const int size) const; | 
|---|
| 233 |  | 
|---|
| 234 | // Test for a single adjacent set | 
|---|
| 235 | int is_bound_set(const int size) const; | 
|---|
| 236 |  | 
|---|
| 237 | static bool is_vector(uint ireg); | 
|---|
| 238 | static int num_registers(uint ireg); | 
|---|
| 239 |  | 
|---|
| 240 | // Fast overlap test.  Non-zero if any registers in common. | 
|---|
| 241 | int overlap(const RegMask &rm) const { | 
|---|
| 242 | assert(valid_watermarks() && rm.valid_watermarks(), "sanity"); | 
|---|
| 243 | int hwm = MIN2(_hwm, rm._hwm); | 
|---|
| 244 | int lwm = MAX2(_lwm, rm._lwm); | 
|---|
| 245 | int result = 0; | 
|---|
| 246 | for (int i = lwm; i <= hwm; i++) { | 
|---|
| 247 | result |= _A[i] & rm._A[i]; | 
|---|
| 248 | } | 
|---|
| 249 | return result; | 
|---|
| 250 | } | 
|---|
| 251 |  | 
|---|
| 252 | // Special test for register pressure based splitting | 
|---|
| 253 | // UP means register only, Register plus stack, or stack only is DOWN | 
|---|
| 254 | bool is_UP() const; | 
|---|
| 255 |  | 
|---|
| 256 | // Clear a register mask | 
|---|
| 257 | void Clear() { | 
|---|
| 258 | _lwm = RM_SIZE - 1; | 
|---|
| 259 | _hwm = 0; | 
|---|
| 260 | memset(_A, 0, sizeof(int)*RM_SIZE); | 
|---|
| 261 | assert(valid_watermarks(), "sanity"); | 
|---|
| 262 | } | 
|---|
| 263 |  | 
|---|
| 264 | // Fill a register mask with 1's | 
|---|
| 265 | void Set_All() { | 
|---|
| 266 | _lwm = 0; | 
|---|
| 267 | _hwm = RM_SIZE - 1; | 
|---|
| 268 | memset(_A, 0xFF, sizeof(int)*RM_SIZE); | 
|---|
| 269 | assert(valid_watermarks(), "sanity"); | 
|---|
| 270 | } | 
|---|
| 271 |  | 
|---|
| 272 | // Insert register into mask | 
|---|
| 273 | void Insert(OptoReg::Name reg) { | 
|---|
| 274 | assert(reg < CHUNK_SIZE, "sanity"); | 
|---|
| 275 | assert(valid_watermarks(), "pre-condition"); | 
|---|
| 276 | int index = reg>>_LogWordBits; | 
|---|
| 277 | if (index > _hwm) _hwm = index; | 
|---|
| 278 | if (index < _lwm) _lwm = index; | 
|---|
| 279 | _A[index] |= (1<<(reg&(_WordBits-1))); | 
|---|
| 280 | assert(valid_watermarks(), "post-condition"); | 
|---|
| 281 | } | 
|---|
| 282 |  | 
|---|
| 283 | // Remove register from mask | 
|---|
| 284 | void Remove(OptoReg::Name reg) { | 
|---|
| 285 | assert(reg < CHUNK_SIZE, ""); | 
|---|
| 286 | _A[reg>>_LogWordBits] &= ~(1<<(reg&(_WordBits-1))); | 
|---|
| 287 | } | 
|---|
| 288 |  | 
|---|
| 289 | // OR 'rm' into 'this' | 
|---|
| 290 | void OR(const RegMask &rm) { | 
|---|
| 291 | assert(valid_watermarks() && rm.valid_watermarks(), "sanity"); | 
|---|
| 292 | // OR widens the live range | 
|---|
| 293 | if (_lwm > rm._lwm) _lwm = rm._lwm; | 
|---|
| 294 | if (_hwm < rm._hwm) _hwm = rm._hwm; | 
|---|
| 295 | for (int i = _lwm; i <= _hwm; i++) { | 
|---|
| 296 | _A[i] |= rm._A[i]; | 
|---|
| 297 | } | 
|---|
| 298 | assert(valid_watermarks(), "sanity"); | 
|---|
| 299 | } | 
|---|
| 300 |  | 
|---|
| 301 | // AND 'rm' into 'this' | 
|---|
| 302 | void AND(const RegMask &rm) { | 
|---|
| 303 | assert(valid_watermarks() && rm.valid_watermarks(), "sanity"); | 
|---|
| 304 | // Do not evaluate words outside the current watermark range, as they are | 
|---|
| 305 | // already zero and an &= would not change that | 
|---|
| 306 | for (int i = _lwm; i <= _hwm; i++) { | 
|---|
| 307 | _A[i] &= rm._A[i]; | 
|---|
| 308 | } | 
|---|
| 309 | // Narrow the watermarks if &rm spans a narrower range. | 
|---|
| 310 | // Update after to ensure non-overlapping words are zeroed out. | 
|---|
| 311 | if (_lwm < rm._lwm) _lwm = rm._lwm; | 
|---|
| 312 | if (_hwm > rm._hwm) _hwm = rm._hwm; | 
|---|
| 313 | } | 
|---|
| 314 |  | 
|---|
| 315 | // Subtract 'rm' from 'this' | 
|---|
| 316 | void SUBTRACT(const RegMask &rm) { | 
|---|
| 317 | assert(valid_watermarks() && rm.valid_watermarks(), "sanity"); | 
|---|
| 318 | int hwm = MIN2(_hwm, rm._hwm); | 
|---|
| 319 | int lwm = MAX2(_lwm, rm._lwm); | 
|---|
| 320 | for (int i = lwm; i <= hwm; i++) { | 
|---|
| 321 | _A[i] &= ~rm._A[i]; | 
|---|
| 322 | } | 
|---|
| 323 | } | 
|---|
| 324 |  | 
|---|
| 325 | // Compute size of register mask: number of bits | 
|---|
| 326 | uint Size() const; | 
|---|
| 327 |  | 
|---|
| 328 | #ifndef PRODUCT | 
|---|
| 329 | void print() const { dump(); } | 
|---|
| 330 | void dump(outputStream *st = tty) const; // Print a mask | 
|---|
| 331 | #endif | 
|---|
| 332 |  | 
|---|
| 333 | static const RegMask Empty;   // Common empty mask | 
|---|
| 334 |  | 
|---|
| 335 | static bool can_represent(OptoReg::Name reg) { | 
|---|
| 336 | // NOTE: -1 in computation reflects the usage of the last | 
|---|
| 337 | //       bit of the regmask as an infinite stack flag and | 
|---|
| 338 | //       -7 is to keep mask aligned for largest value (VecZ). | 
|---|
| 339 | return (int)reg < (int)(CHUNK_SIZE-1); | 
|---|
| 340 | } | 
|---|
| 341 | static bool can_represent_arg(OptoReg::Name reg) { | 
|---|
| 342 | // NOTE: -SlotsPerVecZ in computation reflects the need | 
|---|
| 343 | //       to keep mask aligned for largest value (VecZ). | 
|---|
| 344 | return (int)reg < (int)(CHUNK_SIZE-SlotsPerVecZ); | 
|---|
| 345 | } | 
|---|
| 346 | }; | 
|---|
| 347 |  | 
|---|
| 348 | // Do not use this constant directly in client code! | 
|---|
| 349 | #undef RM_SIZE | 
|---|
| 350 |  | 
|---|
| 351 | #endif // SHARE_OPTO_REGMASK_HPP | 
|---|
| 352 |  | 
|---|