| 1 | /* | 
|---|
| 2 | * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. | 
|---|
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | 
|---|
| 4 | * | 
|---|
| 5 | * This code is free software; you can redistribute it and/or modify it | 
|---|
| 6 | * under the terms of the GNU General Public License version 2 only, as | 
|---|
| 7 | * published by the Free Software Foundation. | 
|---|
| 8 | * | 
|---|
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT | 
|---|
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | 
|---|
| 11 | * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License | 
|---|
| 12 | * version 2 for more details (a copy is included in the LICENSE file that | 
|---|
| 13 | * accompanied this code). | 
|---|
| 14 | * | 
|---|
| 15 | * You should have received a copy of the GNU General Public License version | 
|---|
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, | 
|---|
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | 
|---|
| 18 | * | 
|---|
| 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | 
|---|
| 20 | * or visit www.oracle.com if you need additional information or have any | 
|---|
| 21 | * questions. | 
|---|
| 22 | * | 
|---|
| 23 | */ | 
|---|
| 24 |  | 
|---|
| 25 | #include "precompiled.hpp" | 
|---|
| 26 | #include "code/vmreg.inline.hpp" | 
|---|
| 27 | #include "compiler/oopMap.hpp" | 
|---|
| 28 | #include "memory/resourceArea.hpp" | 
|---|
| 29 | #include "opto/addnode.hpp" | 
|---|
| 30 | #include "opto/callnode.hpp" | 
|---|
| 31 | #include "opto/compile.hpp" | 
|---|
| 32 | #include "opto/machnode.hpp" | 
|---|
| 33 | #include "opto/matcher.hpp" | 
|---|
| 34 | #include "opto/phase.hpp" | 
|---|
| 35 | #include "opto/regalloc.hpp" | 
|---|
| 36 | #include "opto/rootnode.hpp" | 
|---|
| 37 | #include "utilities/align.hpp" | 
|---|
| 38 |  | 
|---|
| 39 | // The functions in this file builds OopMaps after all scheduling is done. | 
|---|
| 40 | // | 
|---|
| 41 | // OopMaps contain a list of all registers and stack-slots containing oops (so | 
|---|
| 42 | // they can be updated by GC).  OopMaps also contain a list of derived-pointer | 
|---|
| 43 | // base-pointer pairs.  When the base is moved, the derived pointer moves to | 
|---|
| 44 | // follow it.  Finally, any registers holding callee-save values are also | 
|---|
| 45 | // recorded.  These might contain oops, but only the caller knows. | 
|---|
| 46 | // | 
|---|
| 47 | // BuildOopMaps implements a simple forward reaching-defs solution.  At each | 
|---|
| 48 | // GC point we'll have the reaching-def Nodes.  If the reaching Nodes are | 
|---|
| 49 | // typed as pointers (no offset), then they are oops.  Pointers+offsets are | 
|---|
| 50 | // derived pointers, and bases can be found from them.  Finally, we'll also | 
|---|
| 51 | // track reaching callee-save values.  Note that a copy of a callee-save value | 
|---|
| 52 | // "kills" it's source, so that only 1 copy of a callee-save value is alive at | 
|---|
| 53 | // a time. | 
|---|
| 54 | // | 
|---|
| 55 | // We run a simple bitvector liveness pass to help trim out dead oops.  Due to | 
|---|
| 56 | // irreducible loops, we can have a reaching def of an oop that only reaches | 
|---|
| 57 | // along one path and no way to know if it's valid or not on the other path. | 
|---|
| 58 | // The bitvectors are quite dense and the liveness pass is fast. | 
|---|
| 59 | // | 
|---|
| 60 | // At GC points, we consult this information to build OopMaps.  All reaching | 
|---|
| 61 | // defs typed as oops are added to the OopMap.  Only 1 instance of a | 
|---|
| 62 | // callee-save register can be recorded.  For derived pointers, we'll have to | 
|---|
| 63 | // find and record the register holding the base. | 
|---|
| 64 | // | 
|---|
| 65 | // The reaching def's is a simple 1-pass worklist approach.  I tried a clever | 
|---|
| 66 | // breadth-first approach but it was worse (showed O(n^2) in the | 
|---|
| 67 | // pick-next-block code). | 
|---|
| 68 | // | 
|---|
| 69 | // The relevant data is kept in a struct of arrays (it could just as well be | 
|---|
| 70 | // an array of structs, but the struct-of-arrays is generally a little more | 
|---|
| 71 | // efficient).  The arrays are indexed by register number (including | 
|---|
| 72 | // stack-slots as registers) and so is bounded by 200 to 300 elements in | 
|---|
| 73 | // practice.  One array will map to a reaching def Node (or NULL for | 
|---|
| 74 | // conflict/dead).  The other array will map to a callee-saved register or | 
|---|
| 75 | // OptoReg::Bad for not-callee-saved. | 
|---|
| 76 |  | 
|---|
| 77 |  | 
|---|
| 78 | // Structure to pass around | 
|---|
| 79 | struct OopFlow : public ResourceObj { | 
|---|
| 80 | short *_callees;              // Array mapping register to callee-saved | 
|---|
| 81 | Node **_defs;                 // array mapping register to reaching def | 
|---|
| 82 | // or NULL if dead/conflict | 
|---|
| 83 | // OopFlow structs, when not being actively modified, describe the _end_ of | 
|---|
| 84 | // this block. | 
|---|
| 85 | Block *_b;                    // Block for this struct | 
|---|
| 86 | OopFlow *_next;               // Next free OopFlow | 
|---|
| 87 | // or NULL if dead/conflict | 
|---|
| 88 | Compile* C; | 
|---|
| 89 |  | 
|---|
| 90 | OopFlow( short *callees, Node **defs, Compile* c ) : _callees(callees), _defs(defs), | 
|---|
| 91 | _b(NULL), _next(NULL), C(c) { } | 
|---|
| 92 |  | 
|---|
| 93 | // Given reaching-defs for this block start, compute it for this block end | 
|---|
| 94 | void compute_reach( PhaseRegAlloc *regalloc, int max_reg, Dict *safehash ); | 
|---|
| 95 |  | 
|---|
| 96 | // Merge these two OopFlows into the 'this' pointer. | 
|---|
| 97 | void merge( OopFlow *flow, int max_reg ); | 
|---|
| 98 |  | 
|---|
| 99 | // Copy a 'flow' over an existing flow | 
|---|
| 100 | void clone( OopFlow *flow, int max_size); | 
|---|
| 101 |  | 
|---|
| 102 | // Make a new OopFlow from scratch | 
|---|
| 103 | static OopFlow *make( Arena *A, int max_size, Compile* C ); | 
|---|
| 104 |  | 
|---|
| 105 | // Build an oopmap from the current flow info | 
|---|
| 106 | OopMap *build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live ); | 
|---|
| 107 | }; | 
|---|
| 108 |  | 
|---|
| 109 | // Given reaching-defs for this block start, compute it for this block end | 
|---|
| 110 | void OopFlow::compute_reach( PhaseRegAlloc *regalloc, int max_reg, Dict *safehash ) { | 
|---|
| 111 |  | 
|---|
| 112 | for( uint i=0; i<_b->number_of_nodes(); i++ ) { | 
|---|
| 113 | Node *n = _b->get_node(i); | 
|---|
| 114 |  | 
|---|
| 115 | if( n->jvms() ) {           // Build an OopMap here? | 
|---|
| 116 | JVMState *jvms = n->jvms(); | 
|---|
| 117 | // no map needed for leaf calls | 
|---|
| 118 | if( n->is_MachSafePoint() && !n->is_MachCallLeaf() ) { | 
|---|
| 119 | int *live = (int*) (*safehash)[n]; | 
|---|
| 120 | assert( live, "must find live"); | 
|---|
| 121 | n->as_MachSafePoint()->set_oop_map( build_oop_map(n,max_reg,regalloc, live) ); | 
|---|
| 122 | } | 
|---|
| 123 | } | 
|---|
| 124 |  | 
|---|
| 125 | // Assign new reaching def's. | 
|---|
| 126 | // Note that I padded the _defs and _callees arrays so it's legal | 
|---|
| 127 | // to index at _defs[OptoReg::Bad]. | 
|---|
| 128 | OptoReg::Name first = regalloc->get_reg_first(n); | 
|---|
| 129 | OptoReg::Name second = regalloc->get_reg_second(n); | 
|---|
| 130 | _defs[first] = n; | 
|---|
| 131 | _defs[second] = n; | 
|---|
| 132 |  | 
|---|
| 133 | // Pass callee-save info around copies | 
|---|
| 134 | int idx = n->is_Copy(); | 
|---|
| 135 | if( idx ) {                 // Copies move callee-save info | 
|---|
| 136 | OptoReg::Name old_first = regalloc->get_reg_first(n->in(idx)); | 
|---|
| 137 | OptoReg::Name old_second = regalloc->get_reg_second(n->in(idx)); | 
|---|
| 138 | int tmp_first = _callees[old_first]; | 
|---|
| 139 | int tmp_second = _callees[old_second]; | 
|---|
| 140 | _callees[old_first] = OptoReg::Bad; // callee-save is moved, dead in old location | 
|---|
| 141 | _callees[old_second] = OptoReg::Bad; | 
|---|
| 142 | _callees[first] = tmp_first; | 
|---|
| 143 | _callees[second] = tmp_second; | 
|---|
| 144 | } else if( n->is_Phi() ) {  // Phis do not mod callee-saves | 
|---|
| 145 | assert( _callees[first] == _callees[regalloc->get_reg_first(n->in(1))], ""); | 
|---|
| 146 | assert( _callees[second] == _callees[regalloc->get_reg_second(n->in(1))], ""); | 
|---|
| 147 | assert( _callees[first] == _callees[regalloc->get_reg_first(n->in(n->req()-1))], ""); | 
|---|
| 148 | assert( _callees[second] == _callees[regalloc->get_reg_second(n->in(n->req()-1))], ""); | 
|---|
| 149 | } else { | 
|---|
| 150 | _callees[first] = OptoReg::Bad; // No longer holding a callee-save value | 
|---|
| 151 | _callees[second] = OptoReg::Bad; | 
|---|
| 152 |  | 
|---|
| 153 | // Find base case for callee saves | 
|---|
| 154 | if( n->is_Proj() && n->in(0)->is_Start() ) { | 
|---|
| 155 | if( OptoReg::is_reg(first) && | 
|---|
| 156 | regalloc->_matcher.is_save_on_entry(first) ) | 
|---|
| 157 | _callees[first] = first; | 
|---|
| 158 | if( OptoReg::is_reg(second) && | 
|---|
| 159 | regalloc->_matcher.is_save_on_entry(second) ) | 
|---|
| 160 | _callees[second] = second; | 
|---|
| 161 | } | 
|---|
| 162 | } | 
|---|
| 163 | } | 
|---|
| 164 | } | 
|---|
| 165 |  | 
|---|
| 166 | // Merge the given flow into the 'this' flow | 
|---|
| 167 | void OopFlow::merge( OopFlow *flow, int max_reg ) { | 
|---|
| 168 | assert( _b == NULL, "merging into a happy flow"); | 
|---|
| 169 | assert( flow->_b, "this flow is still alive"); | 
|---|
| 170 | assert( flow != this, "no self flow"); | 
|---|
| 171 |  | 
|---|
| 172 | // Do the merge.  If there are any differences, drop to 'bottom' which | 
|---|
| 173 | // is OptoReg::Bad or NULL depending. | 
|---|
| 174 | for( int i=0; i<max_reg; i++ ) { | 
|---|
| 175 | // Merge the callee-save's | 
|---|
| 176 | if( _callees[i] != flow->_callees[i] ) | 
|---|
| 177 | _callees[i] = OptoReg::Bad; | 
|---|
| 178 | // Merge the reaching defs | 
|---|
| 179 | if( _defs[i] != flow->_defs[i] ) | 
|---|
| 180 | _defs[i] = NULL; | 
|---|
| 181 | } | 
|---|
| 182 |  | 
|---|
| 183 | } | 
|---|
| 184 |  | 
|---|
| 185 | void OopFlow::clone( OopFlow *flow, int max_size ) { | 
|---|
| 186 | _b = flow->_b; | 
|---|
| 187 | memcpy( _callees, flow->_callees, sizeof(short)*max_size); | 
|---|
| 188 | memcpy( _defs   , flow->_defs   , sizeof(Node*)*max_size); | 
|---|
| 189 | } | 
|---|
| 190 |  | 
|---|
| 191 | OopFlow *OopFlow::make( Arena *A, int max_size, Compile* C ) { | 
|---|
| 192 | short *callees = NEW_ARENA_ARRAY(A,short,max_size+1); | 
|---|
| 193 | Node **defs    = NEW_ARENA_ARRAY(A,Node*,max_size+1); | 
|---|
| 194 | debug_only( memset(defs,0,(max_size+1)*sizeof(Node*)) ); | 
|---|
| 195 | OopFlow *flow = new (A) OopFlow(callees+1, defs+1, C); | 
|---|
| 196 | assert( &flow->_callees[OptoReg::Bad] == callees, "Ok to index at OptoReg::Bad"); | 
|---|
| 197 | assert( &flow->_defs   [OptoReg::Bad] == defs   , "Ok to index at OptoReg::Bad"); | 
|---|
| 198 | return flow; | 
|---|
| 199 | } | 
|---|
| 200 |  | 
|---|
| 201 | static int get_live_bit( int *live, int reg ) { | 
|---|
| 202 | return live[reg>>LogBitsPerInt] &   (1<<(reg&(BitsPerInt-1))); } | 
|---|
| 203 | static void set_live_bit( int *live, int reg ) { | 
|---|
| 204 | live[reg>>LogBitsPerInt] |=  (1<<(reg&(BitsPerInt-1))); } | 
|---|
| 205 | static void clr_live_bit( int *live, int reg ) { | 
|---|
| 206 | live[reg>>LogBitsPerInt] &= ~(1<<(reg&(BitsPerInt-1))); } | 
|---|
| 207 |  | 
|---|
| 208 | // Build an oopmap from the current flow info | 
|---|
| 209 | OopMap *OopFlow::build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live ) { | 
|---|
| 210 | int framesize = regalloc->_framesize; | 
|---|
| 211 | int max_inarg_slot = OptoReg::reg2stack(regalloc->_matcher._new_SP); | 
|---|
| 212 | debug_only( char *dup_check = NEW_RESOURCE_ARRAY(char,OptoReg::stack0()); | 
|---|
| 213 | memset(dup_check,0,OptoReg::stack0()) ); | 
|---|
| 214 |  | 
|---|
| 215 | OopMap *omap = new OopMap( framesize,  max_inarg_slot ); | 
|---|
| 216 | MachCallNode *mcall = n->is_MachCall() ? n->as_MachCall() : NULL; | 
|---|
| 217 | JVMState* jvms = n->jvms(); | 
|---|
| 218 |  | 
|---|
| 219 | // For all registers do... | 
|---|
| 220 | for( int reg=0; reg<max_reg; reg++ ) { | 
|---|
| 221 | if( get_live_bit(live,reg) == 0 ) | 
|---|
| 222 | continue;                 // Ignore if not live | 
|---|
| 223 |  | 
|---|
| 224 | // %%% C2 can use 2 OptoRegs when the physical register is only one 64bit | 
|---|
| 225 | // register in that case we'll get an non-concrete register for the second | 
|---|
| 226 | // half. We only need to tell the map the register once! | 
|---|
| 227 | // | 
|---|
| 228 | // However for the moment we disable this change and leave things as they | 
|---|
| 229 | // were. | 
|---|
| 230 |  | 
|---|
| 231 | VMReg r = OptoReg::as_VMReg(OptoReg::Name(reg), framesize, max_inarg_slot); | 
|---|
| 232 |  | 
|---|
| 233 | if (false && r->is_reg() && !r->is_concrete()) { | 
|---|
| 234 | continue; | 
|---|
| 235 | } | 
|---|
| 236 |  | 
|---|
| 237 | // See if dead (no reaching def). | 
|---|
| 238 | Node *def = _defs[reg];     // Get reaching def | 
|---|
| 239 | assert( def, "since live better have reaching def"); | 
|---|
| 240 |  | 
|---|
| 241 | // Classify the reaching def as oop, derived, callee-save, dead, or other | 
|---|
| 242 | const Type *t = def->bottom_type(); | 
|---|
| 243 | if( t->isa_oop_ptr() ) {    // Oop or derived? | 
|---|
| 244 | assert( !OptoReg::is_valid(_callees[reg]), "oop can't be callee save"); | 
|---|
| 245 | #ifdef _LP64 | 
|---|
| 246 | // 64-bit pointers record oop-ishness on 2 aligned adjacent registers. | 
|---|
| 247 | // Make sure both are record from the same reaching def, but do not | 
|---|
| 248 | // put both into the oopmap. | 
|---|
| 249 | if( (reg&1) == 1 ) {      // High half of oop-pair? | 
|---|
| 250 | assert( _defs[reg-1] == _defs[reg], "both halves from same reaching def"); | 
|---|
| 251 | continue;               // Do not record high parts in oopmap | 
|---|
| 252 | } | 
|---|
| 253 | #endif | 
|---|
| 254 |  | 
|---|
| 255 | // Check for a legal reg name in the oopMap and bailout if it is not. | 
|---|
| 256 | if (!omap->legal_vm_reg_name(r)) { | 
|---|
| 257 | regalloc->C->record_method_not_compilable( "illegal oopMap register name"); | 
|---|
| 258 | continue; | 
|---|
| 259 | } | 
|---|
| 260 | if( t->is_ptr()->_offset == 0 ) { // Not derived? | 
|---|
| 261 | if( mcall ) { | 
|---|
| 262 | // Outgoing argument GC mask responsibility belongs to the callee, | 
|---|
| 263 | // not the caller.  Inspect the inputs to the call, to see if | 
|---|
| 264 | // this live-range is one of them. | 
|---|
| 265 | uint cnt = mcall->tf()->domain()->cnt(); | 
|---|
| 266 | uint j; | 
|---|
| 267 | for( j = TypeFunc::Parms; j < cnt; j++) | 
|---|
| 268 | if( mcall->in(j) == def ) | 
|---|
| 269 | break;            // reaching def is an argument oop | 
|---|
| 270 | if( j < cnt )         // arg oops dont go in GC map | 
|---|
| 271 | continue;           // Continue on to the next register | 
|---|
| 272 | } | 
|---|
| 273 | omap->set_oop(r); | 
|---|
| 274 | } else {                  // Else it's derived. | 
|---|
| 275 | // Find the base of the derived value. | 
|---|
| 276 | uint i; | 
|---|
| 277 | // Fast, common case, scan | 
|---|
| 278 | for( i = jvms->oopoff(); i < n->req(); i+=2 ) | 
|---|
| 279 | if( n->in(i) == def ) break; // Common case | 
|---|
| 280 | if( i == n->req() ) {   // Missed, try a more generous scan | 
|---|
| 281 | // Scan again, but this time peek through copies | 
|---|
| 282 | for( i = jvms->oopoff(); i < n->req(); i+=2 ) { | 
|---|
| 283 | Node *m = n->in(i); // Get initial derived value | 
|---|
| 284 | while( 1 ) { | 
|---|
| 285 | Node *d = def;    // Get initial reaching def | 
|---|
| 286 | while( 1 ) {      // Follow copies of reaching def to end | 
|---|
| 287 | if( m == d ) goto found; // breaks 3 loops | 
|---|
| 288 | int idx = d->is_Copy(); | 
|---|
| 289 | if( !idx ) break; | 
|---|
| 290 | d = d->in(idx);     // Link through copy | 
|---|
| 291 | } | 
|---|
| 292 | int idx = m->is_Copy(); | 
|---|
| 293 | if( !idx ) break; | 
|---|
| 294 | m = m->in(idx); | 
|---|
| 295 | } | 
|---|
| 296 | } | 
|---|
| 297 | guarantee( 0, "must find derived/base pair"); | 
|---|
| 298 | } | 
|---|
| 299 | found: ; | 
|---|
| 300 | Node *base = n->in(i+1); // Base is other half of pair | 
|---|
| 301 | int breg = regalloc->get_reg_first(base); | 
|---|
| 302 | VMReg b = OptoReg::as_VMReg(OptoReg::Name(breg), framesize, max_inarg_slot); | 
|---|
| 303 |  | 
|---|
| 304 | // I record liveness at safepoints BEFORE I make the inputs | 
|---|
| 305 | // live.  This is because argument oops are NOT live at a | 
|---|
| 306 | // safepoint (or at least they cannot appear in the oopmap). | 
|---|
| 307 | // Thus bases of base/derived pairs might not be in the | 
|---|
| 308 | // liveness data but they need to appear in the oopmap. | 
|---|
| 309 | if( get_live_bit(live,breg) == 0 ) {// Not live? | 
|---|
| 310 | // Flag it, so next derived pointer won't re-insert into oopmap | 
|---|
| 311 | set_live_bit(live,breg); | 
|---|
| 312 | // Already missed our turn? | 
|---|
| 313 | if( breg < reg ) { | 
|---|
| 314 | if (b->is_stack() || b->is_concrete() || true ) { | 
|---|
| 315 | omap->set_oop( b); | 
|---|
| 316 | } | 
|---|
| 317 | } | 
|---|
| 318 | } | 
|---|
| 319 | if (b->is_stack() || b->is_concrete() || true ) { | 
|---|
| 320 | omap->set_derived_oop( r, b); | 
|---|
| 321 | } | 
|---|
| 322 | } | 
|---|
| 323 |  | 
|---|
| 324 | } else if( t->isa_narrowoop() ) { | 
|---|
| 325 | assert( !OptoReg::is_valid(_callees[reg]), "oop can't be callee save"); | 
|---|
| 326 | // Check for a legal reg name in the oopMap and bailout if it is not. | 
|---|
| 327 | if (!omap->legal_vm_reg_name(r)) { | 
|---|
| 328 | regalloc->C->record_method_not_compilable( "illegal oopMap register name"); | 
|---|
| 329 | continue; | 
|---|
| 330 | } | 
|---|
| 331 | if( mcall ) { | 
|---|
| 332 | // Outgoing argument GC mask responsibility belongs to the callee, | 
|---|
| 333 | // not the caller.  Inspect the inputs to the call, to see if | 
|---|
| 334 | // this live-range is one of them. | 
|---|
| 335 | uint cnt = mcall->tf()->domain()->cnt(); | 
|---|
| 336 | uint j; | 
|---|
| 337 | for( j = TypeFunc::Parms; j < cnt; j++) | 
|---|
| 338 | if( mcall->in(j) == def ) | 
|---|
| 339 | break;            // reaching def is an argument oop | 
|---|
| 340 | if( j < cnt )         // arg oops dont go in GC map | 
|---|
| 341 | continue;           // Continue on to the next register | 
|---|
| 342 | } | 
|---|
| 343 | omap->set_narrowoop(r); | 
|---|
| 344 | } else if( OptoReg::is_valid(_callees[reg])) { // callee-save? | 
|---|
| 345 | // It's a callee-save value | 
|---|
| 346 | assert( dup_check[_callees[reg]]==0, "trying to callee save same reg twice"); | 
|---|
| 347 | debug_only( dup_check[_callees[reg]]=1; ) | 
|---|
| 348 | VMReg callee = OptoReg::as_VMReg(OptoReg::Name(_callees[reg])); | 
|---|
| 349 | if ( callee->is_concrete() || true ) { | 
|---|
| 350 | omap->set_callee_saved( r, callee); | 
|---|
| 351 | } | 
|---|
| 352 |  | 
|---|
| 353 | } else { | 
|---|
| 354 | // Other - some reaching non-oop value | 
|---|
| 355 | omap->set_value( r); | 
|---|
| 356 | #ifdef ASSERT | 
|---|
| 357 | if( t->isa_rawptr() && C->cfg()->_raw_oops.member(def) ) { | 
|---|
| 358 | def->dump(); | 
|---|
| 359 | n->dump(); | 
|---|
| 360 | assert(false, "there should be a oop in OopMap instead of a live raw oop at safepoint"); | 
|---|
| 361 | } | 
|---|
| 362 | #endif | 
|---|
| 363 | } | 
|---|
| 364 |  | 
|---|
| 365 | } | 
|---|
| 366 |  | 
|---|
| 367 | #ifdef ASSERT | 
|---|
| 368 | /* Nice, Intel-only assert | 
|---|
| 369 | int cnt_callee_saves=0; | 
|---|
| 370 | int reg2 = 0; | 
|---|
| 371 | while (OptoReg::is_reg(reg2)) { | 
|---|
| 372 | if( dup_check[reg2] != 0) cnt_callee_saves++; | 
|---|
| 373 | assert( cnt_callee_saves==3 || cnt_callee_saves==5, "missed some callee-save" ); | 
|---|
| 374 | reg2++; | 
|---|
| 375 | } | 
|---|
| 376 | */ | 
|---|
| 377 | #endif | 
|---|
| 378 |  | 
|---|
| 379 | #ifdef ASSERT | 
|---|
| 380 | for( OopMapStream oms1(omap, OopMapValue::derived_oop_value); !oms1.is_done(); oms1.next()) { | 
|---|
| 381 | OopMapValue omv1 = oms1.current(); | 
|---|
| 382 | bool found = false; | 
|---|
| 383 | for( OopMapStream oms2(omap,OopMapValue::oop_value); !oms2.is_done(); oms2.next()) { | 
|---|
| 384 | if( omv1.content_reg() == oms2.current().reg() ) { | 
|---|
| 385 | found = true; | 
|---|
| 386 | break; | 
|---|
| 387 | } | 
|---|
| 388 | } | 
|---|
| 389 | assert( found, "derived with no base in oopmap"); | 
|---|
| 390 | } | 
|---|
| 391 | #endif | 
|---|
| 392 |  | 
|---|
| 393 | return omap; | 
|---|
| 394 | } | 
|---|
| 395 |  | 
|---|
| 396 | // Compute backwards liveness on registers | 
|---|
| 397 | static void do_liveness(PhaseRegAlloc* regalloc, PhaseCFG* cfg, Block_List* worklist, int max_reg_ints, Arena* A, Dict* safehash) { | 
|---|
| 398 | int* live = NEW_ARENA_ARRAY(A, int, (cfg->number_of_blocks() + 1) * max_reg_ints); | 
|---|
| 399 | int* tmp_live = &live[cfg->number_of_blocks() * max_reg_ints]; | 
|---|
| 400 | Node* root = cfg->get_root_node(); | 
|---|
| 401 | // On CISC platforms, get the node representing the stack pointer  that regalloc | 
|---|
| 402 | // used for spills | 
|---|
| 403 | Node *fp = NodeSentinel; | 
|---|
| 404 | if (UseCISCSpill && root->req() > 1) { | 
|---|
| 405 | fp = root->in(1)->in(TypeFunc::FramePtr); | 
|---|
| 406 | } | 
|---|
| 407 | memset(live, 0, cfg->number_of_blocks() * (max_reg_ints << LogBytesPerInt)); | 
|---|
| 408 | // Push preds onto worklist | 
|---|
| 409 | for (uint i = 1; i < root->req(); i++) { | 
|---|
| 410 | Block* block = cfg->get_block_for_node(root->in(i)); | 
|---|
| 411 | worklist->push(block); | 
|---|
| 412 | } | 
|---|
| 413 |  | 
|---|
| 414 | // ZKM.jar includes tiny infinite loops which are unreached from below. | 
|---|
| 415 | // If we missed any blocks, we'll retry here after pushing all missed | 
|---|
| 416 | // blocks on the worklist.  Normally this outer loop never trips more | 
|---|
| 417 | // than once. | 
|---|
| 418 | while (1) { | 
|---|
| 419 |  | 
|---|
| 420 | while( worklist->size() ) { // Standard worklist algorithm | 
|---|
| 421 | Block *b = worklist->rpop(); | 
|---|
| 422 |  | 
|---|
| 423 | // Copy first successor into my tmp_live space | 
|---|
| 424 | int s0num = b->_succs[0]->_pre_order; | 
|---|
| 425 | int *t = &live[s0num*max_reg_ints]; | 
|---|
| 426 | for( int i=0; i<max_reg_ints; i++ ) | 
|---|
| 427 | tmp_live[i] = t[i]; | 
|---|
| 428 |  | 
|---|
| 429 | // OR in the remaining live registers | 
|---|
| 430 | for( uint j=1; j<b->_num_succs; j++ ) { | 
|---|
| 431 | uint sjnum = b->_succs[j]->_pre_order; | 
|---|
| 432 | int *t = &live[sjnum*max_reg_ints]; | 
|---|
| 433 | for( int i=0; i<max_reg_ints; i++ ) | 
|---|
| 434 | tmp_live[i] |= t[i]; | 
|---|
| 435 | } | 
|---|
| 436 |  | 
|---|
| 437 | // Now walk tmp_live up the block backwards, computing live | 
|---|
| 438 | for( int k=b->number_of_nodes()-1; k>=0; k-- ) { | 
|---|
| 439 | Node *n = b->get_node(k); | 
|---|
| 440 | // KILL def'd bits | 
|---|
| 441 | int first = regalloc->get_reg_first(n); | 
|---|
| 442 | int second = regalloc->get_reg_second(n); | 
|---|
| 443 | if( OptoReg::is_valid(first) ) clr_live_bit(tmp_live,first); | 
|---|
| 444 | if( OptoReg::is_valid(second) ) clr_live_bit(tmp_live,second); | 
|---|
| 445 |  | 
|---|
| 446 | MachNode *m = n->is_Mach() ? n->as_Mach() : NULL; | 
|---|
| 447 |  | 
|---|
| 448 | // Check if m is potentially a CISC alternate instruction (i.e, possibly | 
|---|
| 449 | // synthesized by RegAlloc from a conventional instruction and a | 
|---|
| 450 | // spilled input) | 
|---|
| 451 | bool is_cisc_alternate = false; | 
|---|
| 452 | if (UseCISCSpill && m) { | 
|---|
| 453 | is_cisc_alternate = m->is_cisc_alternate(); | 
|---|
| 454 | } | 
|---|
| 455 |  | 
|---|
| 456 | // GEN use'd bits | 
|---|
| 457 | for( uint l=1; l<n->req(); l++ ) { | 
|---|
| 458 | Node *def = n->in(l); | 
|---|
| 459 | assert(def != 0, "input edge required"); | 
|---|
| 460 | int first = regalloc->get_reg_first(def); | 
|---|
| 461 | int second = regalloc->get_reg_second(def); | 
|---|
| 462 | if( OptoReg::is_valid(first) ) set_live_bit(tmp_live,first); | 
|---|
| 463 | if( OptoReg::is_valid(second) ) set_live_bit(tmp_live,second); | 
|---|
| 464 | // If we use the stack pointer in a cisc-alternative instruction, | 
|---|
| 465 | // check for use as a memory operand.  Then reconstruct the RegName | 
|---|
| 466 | // for this stack location, and set the appropriate bit in the | 
|---|
| 467 | // live vector 4987749. | 
|---|
| 468 | if (is_cisc_alternate && def == fp) { | 
|---|
| 469 | const TypePtr *adr_type = NULL; | 
|---|
| 470 | intptr_t offset; | 
|---|
| 471 | const Node* base = m->get_base_and_disp(offset, adr_type); | 
|---|
| 472 | if (base == NodeSentinel) { | 
|---|
| 473 | // Machnode has multiple memory inputs. We are unable to reason | 
|---|
| 474 | // with these, but are presuming (with trepidation) that not any of | 
|---|
| 475 | // them are oops. This can be fixed by making get_base_and_disp() | 
|---|
| 476 | // look at a specific input instead of all inputs. | 
|---|
| 477 | assert(!def->bottom_type()->isa_oop_ptr(), "expecting non-oop mem input"); | 
|---|
| 478 | } else if (base != fp || offset == Type::OffsetBot) { | 
|---|
| 479 | // Do nothing: the fp operand is either not from a memory use | 
|---|
| 480 | // (base == NULL) OR the fp is used in a non-memory context | 
|---|
| 481 | // (base is some other register) OR the offset is not constant, | 
|---|
| 482 | // so it is not a stack slot. | 
|---|
| 483 | } else { | 
|---|
| 484 | assert(offset >= 0, "unexpected negative offset"); | 
|---|
| 485 | offset -= (offset % jintSize);  // count the whole word | 
|---|
| 486 | int stack_reg = regalloc->offset2reg(offset); | 
|---|
| 487 | if (OptoReg::is_stack(stack_reg)) { | 
|---|
| 488 | set_live_bit(tmp_live, stack_reg); | 
|---|
| 489 | } else { | 
|---|
| 490 | assert(false, "stack_reg not on stack?"); | 
|---|
| 491 | } | 
|---|
| 492 | } | 
|---|
| 493 | } | 
|---|
| 494 | } | 
|---|
| 495 |  | 
|---|
| 496 | if( n->jvms() ) {       // Record liveness at safepoint | 
|---|
| 497 |  | 
|---|
| 498 | // This placement of this stanza means inputs to calls are | 
|---|
| 499 | // considered live at the callsite's OopMap.  Argument oops are | 
|---|
| 500 | // hence live, but NOT included in the oopmap.  See cutout in | 
|---|
| 501 | // build_oop_map.  Debug oops are live (and in OopMap). | 
|---|
| 502 | int *n_live = NEW_ARENA_ARRAY(A, int, max_reg_ints); | 
|---|
| 503 | for( int l=0; l<max_reg_ints; l++ ) | 
|---|
| 504 | n_live[l] = tmp_live[l]; | 
|---|
| 505 | safehash->Insert(n,n_live); | 
|---|
| 506 | } | 
|---|
| 507 |  | 
|---|
| 508 | } | 
|---|
| 509 |  | 
|---|
| 510 | // Now at block top, see if we have any changes.  If so, propagate | 
|---|
| 511 | // to prior blocks. | 
|---|
| 512 | int *old_live = &live[b->_pre_order*max_reg_ints]; | 
|---|
| 513 | int l; | 
|---|
| 514 | for( l=0; l<max_reg_ints; l++ ) | 
|---|
| 515 | if( tmp_live[l] != old_live[l] ) | 
|---|
| 516 | break; | 
|---|
| 517 | if( l<max_reg_ints ) {     // Change! | 
|---|
| 518 | // Copy in new value | 
|---|
| 519 | for( l=0; l<max_reg_ints; l++ ) | 
|---|
| 520 | old_live[l] = tmp_live[l]; | 
|---|
| 521 | // Push preds onto worklist | 
|---|
| 522 | for (l = 1; l < (int)b->num_preds(); l++) { | 
|---|
| 523 | Block* block = cfg->get_block_for_node(b->pred(l)); | 
|---|
| 524 | worklist->push(block); | 
|---|
| 525 | } | 
|---|
| 526 | } | 
|---|
| 527 | } | 
|---|
| 528 |  | 
|---|
| 529 | // Scan for any missing safepoints.  Happens to infinite loops | 
|---|
| 530 | // ala ZKM.jar | 
|---|
| 531 | uint i; | 
|---|
| 532 | for (i = 1; i < cfg->number_of_blocks(); i++) { | 
|---|
| 533 | Block* block = cfg->get_block(i); | 
|---|
| 534 | uint j; | 
|---|
| 535 | for (j = 1; j < block->number_of_nodes(); j++) { | 
|---|
| 536 | if (block->get_node(j)->jvms() && (*safehash)[block->get_node(j)] == NULL) { | 
|---|
| 537 | break; | 
|---|
| 538 | } | 
|---|
| 539 | } | 
|---|
| 540 | if (j < block->number_of_nodes()) { | 
|---|
| 541 | break; | 
|---|
| 542 | } | 
|---|
| 543 | } | 
|---|
| 544 | if (i == cfg->number_of_blocks()) { | 
|---|
| 545 | break;                    // Got 'em all | 
|---|
| 546 | } | 
|---|
| 547 |  | 
|---|
| 548 | if (PrintOpto && Verbose) { | 
|---|
| 549 | tty->print_cr( "retripping live calc"); | 
|---|
| 550 | } | 
|---|
| 551 |  | 
|---|
| 552 | // Force the issue (expensively): recheck everybody | 
|---|
| 553 | for (i = 1; i < cfg->number_of_blocks(); i++) { | 
|---|
| 554 | worklist->push(cfg->get_block(i)); | 
|---|
| 555 | } | 
|---|
| 556 | } | 
|---|
| 557 | } | 
|---|
| 558 |  | 
|---|
| 559 | // Collect GC mask info - where are all the OOPs? | 
|---|
| 560 | void Compile::BuildOopMaps() { | 
|---|
| 561 | TracePhase tp( "bldOopMaps", &timers[_t_buildOopMaps]); | 
|---|
| 562 | // Can't resource-mark because I need to leave all those OopMaps around, | 
|---|
| 563 | // or else I need to resource-mark some arena other than the default. | 
|---|
| 564 | // ResourceMark rm;              // Reclaim all OopFlows when done | 
|---|
| 565 | int max_reg = _regalloc->_max_reg; // Current array extent | 
|---|
| 566 |  | 
|---|
| 567 | Arena *A = Thread::current()->resource_area(); | 
|---|
| 568 | Block_List worklist;          // Worklist of pending blocks | 
|---|
| 569 |  | 
|---|
| 570 | int max_reg_ints = align_up(max_reg, BitsPerInt)>>LogBitsPerInt; | 
|---|
| 571 | Dict *safehash = NULL;        // Used for assert only | 
|---|
| 572 | // Compute a backwards liveness per register.  Needs a bitarray of | 
|---|
| 573 | // #blocks x (#registers, rounded up to ints) | 
|---|
| 574 | safehash = new Dict(cmpkey,hashkey,A); | 
|---|
| 575 | do_liveness( _regalloc, _cfg, &worklist, max_reg_ints, A, safehash ); | 
|---|
| 576 | OopFlow *free_list = NULL;    // Free, unused | 
|---|
| 577 |  | 
|---|
| 578 | // Array mapping blocks to completed oopflows | 
|---|
| 579 | OopFlow **flows = NEW_ARENA_ARRAY(A, OopFlow*, _cfg->number_of_blocks()); | 
|---|
| 580 | memset( flows, 0, _cfg->number_of_blocks() * sizeof(OopFlow*) ); | 
|---|
| 581 |  | 
|---|
| 582 |  | 
|---|
| 583 | // Do the first block 'by hand' to prime the worklist | 
|---|
| 584 | Block *entry = _cfg->get_block(1); | 
|---|
| 585 | OopFlow *rootflow = OopFlow::make(A,max_reg,this); | 
|---|
| 586 | // Initialize to 'bottom' (not 'top') | 
|---|
| 587 | memset( rootflow->_callees, OptoReg::Bad, max_reg*sizeof(short) ); | 
|---|
| 588 | memset( rootflow->_defs   ,            0, max_reg*sizeof(Node*) ); | 
|---|
| 589 | flows[entry->_pre_order] = rootflow; | 
|---|
| 590 |  | 
|---|
| 591 | // Do the first block 'by hand' to prime the worklist | 
|---|
| 592 | rootflow->_b = entry; | 
|---|
| 593 | rootflow->compute_reach( _regalloc, max_reg, safehash ); | 
|---|
| 594 | for( uint i=0; i<entry->_num_succs; i++ ) | 
|---|
| 595 | worklist.push(entry->_succs[i]); | 
|---|
| 596 |  | 
|---|
| 597 | // Now worklist contains blocks which have some, but perhaps not all, | 
|---|
| 598 | // predecessors visited. | 
|---|
| 599 | while( worklist.size() ) { | 
|---|
| 600 | // Scan for a block with all predecessors visited, or any randoms slob | 
|---|
| 601 | // otherwise.  All-preds-visited order allows me to recycle OopFlow | 
|---|
| 602 | // structures rapidly and cut down on the memory footprint. | 
|---|
| 603 | // Note: not all predecessors might be visited yet (must happen for | 
|---|
| 604 | // irreducible loops).  This is OK, since every live value must have the | 
|---|
| 605 | // SAME reaching def for the block, so any reaching def is OK. | 
|---|
| 606 | uint i; | 
|---|
| 607 |  | 
|---|
| 608 | Block *b = worklist.pop(); | 
|---|
| 609 | // Ignore root block | 
|---|
| 610 | if (b == _cfg->get_root_block()) { | 
|---|
| 611 | continue; | 
|---|
| 612 | } | 
|---|
| 613 | // Block is already done?  Happens if block has several predecessors, | 
|---|
| 614 | // he can get on the worklist more than once. | 
|---|
| 615 | if( flows[b->_pre_order] ) continue; | 
|---|
| 616 |  | 
|---|
| 617 | // If this block has a visited predecessor AND that predecessor has this | 
|---|
| 618 | // last block as his only undone child, we can move the OopFlow from the | 
|---|
| 619 | // pred to this block.  Otherwise we have to grab a new OopFlow. | 
|---|
| 620 | OopFlow *flow = NULL;       // Flag for finding optimized flow | 
|---|
| 621 | Block *pred = (Block*)((intptr_t)0xdeadbeef); | 
|---|
| 622 | // Scan this block's preds to find a done predecessor | 
|---|
| 623 | for (uint j = 1; j < b->num_preds(); j++) { | 
|---|
| 624 | Block* p = _cfg->get_block_for_node(b->pred(j)); | 
|---|
| 625 | OopFlow *p_flow = flows[p->_pre_order]; | 
|---|
| 626 | if( p_flow ) {            // Predecessor is done | 
|---|
| 627 | assert( p_flow->_b == p, "cross check"); | 
|---|
| 628 | pred = p;               // Record some predecessor | 
|---|
| 629 | // If all successors of p are done except for 'b', then we can carry | 
|---|
| 630 | // p_flow forward to 'b' without copying, otherwise we have to draw | 
|---|
| 631 | // from the free_list and clone data. | 
|---|
| 632 | uint k; | 
|---|
| 633 | for( k=0; k<p->_num_succs; k++ ) | 
|---|
| 634 | if( !flows[p->_succs[k]->_pre_order] && | 
|---|
| 635 | p->_succs[k] != b ) | 
|---|
| 636 | break; | 
|---|
| 637 |  | 
|---|
| 638 | // Either carry-forward the now-unused OopFlow for b's use | 
|---|
| 639 | // or draw a new one from the free list | 
|---|
| 640 | if( k==p->_num_succs ) { | 
|---|
| 641 | flow = p_flow; | 
|---|
| 642 | break;                // Found an ideal pred, use him | 
|---|
| 643 | } | 
|---|
| 644 | } | 
|---|
| 645 | } | 
|---|
| 646 |  | 
|---|
| 647 | if( flow ) { | 
|---|
| 648 | // We have an OopFlow that's the last-use of a predecessor. | 
|---|
| 649 | // Carry it forward. | 
|---|
| 650 | } else {                    // Draw a new OopFlow from the freelist | 
|---|
| 651 | if( !free_list ) | 
|---|
| 652 | free_list = OopFlow::make(A,max_reg,C); | 
|---|
| 653 | flow = free_list; | 
|---|
| 654 | assert( flow->_b == NULL, "oopFlow is not free"); | 
|---|
| 655 | free_list = flow->_next; | 
|---|
| 656 | flow->_next = NULL; | 
|---|
| 657 |  | 
|---|
| 658 | // Copy/clone over the data | 
|---|
| 659 | flow->clone(flows[pred->_pre_order], max_reg); | 
|---|
| 660 | } | 
|---|
| 661 |  | 
|---|
| 662 | // Mark flow for block.  Blocks can only be flowed over once, | 
|---|
| 663 | // because after the first time they are guarded from entering | 
|---|
| 664 | // this code again. | 
|---|
| 665 | assert( flow->_b == pred, "have some prior flow"); | 
|---|
| 666 | flow->_b = NULL; | 
|---|
| 667 |  | 
|---|
| 668 | // Now push flow forward | 
|---|
| 669 | flows[b->_pre_order] = flow;// Mark flow for this block | 
|---|
| 670 | flow->_b = b; | 
|---|
| 671 | flow->compute_reach( _regalloc, max_reg, safehash ); | 
|---|
| 672 |  | 
|---|
| 673 | // Now push children onto worklist | 
|---|
| 674 | for( i=0; i<b->_num_succs; i++ ) | 
|---|
| 675 | worklist.push(b->_succs[i]); | 
|---|
| 676 |  | 
|---|
| 677 | } | 
|---|
| 678 | } | 
|---|
| 679 |  | 
|---|