| 1 | /* | 
|---|
| 2 | * Copyright (c) 2007, 2019, Oracle and/or its affiliates. All rights reserved. | 
|---|
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | 
|---|
| 4 | * | 
|---|
| 5 | * This code is free software; you can redistribute it and/or modify it | 
|---|
| 6 | * under the terms of the GNU General Public License version 2 only, as | 
|---|
| 7 | * published by the Free Software Foundation. | 
|---|
| 8 | * | 
|---|
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT | 
|---|
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | 
|---|
| 11 | * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License | 
|---|
| 12 | * version 2 for more details (a copy is included in the LICENSE file that | 
|---|
| 13 | * accompanied this code). | 
|---|
| 14 | * | 
|---|
| 15 | * You should have received a copy of the GNU General Public License version | 
|---|
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, | 
|---|
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | 
|---|
| 18 | * | 
|---|
| 19 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA | 
|---|
| 20 | * or visit www.oracle.com if you need additional information or have any | 
|---|
| 21 | * questions. | 
|---|
| 22 | */ | 
|---|
| 23 |  | 
|---|
| 24 | #ifndef SHARE_OPTO_SUPERWORD_HPP | 
|---|
| 25 | #define SHARE_OPTO_SUPERWORD_HPP | 
|---|
| 26 |  | 
|---|
| 27 | #include "opto/loopnode.hpp" | 
|---|
| 28 | #include "opto/node.hpp" | 
|---|
| 29 | #include "opto/phaseX.hpp" | 
|---|
| 30 | #include "opto/vectornode.hpp" | 
|---|
| 31 | #include "utilities/growableArray.hpp" | 
|---|
| 32 | #include "libadt/dict.hpp" | 
|---|
| 33 |  | 
|---|
| 34 | // | 
|---|
| 35 | //                  S U P E R W O R D   T R A N S F O R M | 
|---|
| 36 | // | 
|---|
| 37 | // SuperWords are short, fixed length vectors. | 
|---|
| 38 | // | 
|---|
| 39 | // Algorithm from: | 
|---|
| 40 | // | 
|---|
| 41 | // Exploiting SuperWord Level Parallelism with | 
|---|
| 42 | //   Multimedia Instruction Sets | 
|---|
| 43 | // by | 
|---|
| 44 | //   Samuel Larsen and Saman Amarasinghe | 
|---|
| 45 | //   MIT Laboratory for Computer Science | 
|---|
| 46 | // date | 
|---|
| 47 | //   May 2000 | 
|---|
| 48 | // published in | 
|---|
| 49 | //   ACM SIGPLAN Notices | 
|---|
| 50 | //   Proceedings of ACM PLDI '00,  Volume 35 Issue 5 | 
|---|
| 51 | // | 
|---|
| 52 | // Definition 3.1 A Pack is an n-tuple, <s1, ...,sn>, where | 
|---|
| 53 | // s1,...,sn are independent isomorphic statements in a basic | 
|---|
| 54 | // block. | 
|---|
| 55 | // | 
|---|
| 56 | // Definition 3.2 A PackSet is a set of Packs. | 
|---|
| 57 | // | 
|---|
| 58 | // Definition 3.3 A Pair is a Pack of size two, where the | 
|---|
| 59 | // first statement is considered the left element, and the | 
|---|
| 60 | // second statement is considered the right element. | 
|---|
| 61 |  | 
|---|
| 62 | class SWPointer; | 
|---|
| 63 | class OrderedPair; | 
|---|
| 64 |  | 
|---|
| 65 | // ========================= Dependence Graph ===================== | 
|---|
| 66 |  | 
|---|
| 67 | class DepMem; | 
|---|
| 68 |  | 
|---|
| 69 | //------------------------------DepEdge--------------------------- | 
|---|
| 70 | // An edge in the dependence graph.  The edges incident to a dependence | 
|---|
| 71 | // node are threaded through _next_in for incoming edges and _next_out | 
|---|
| 72 | // for outgoing edges. | 
|---|
| 73 | class DepEdge : public ResourceObj { | 
|---|
| 74 | protected: | 
|---|
| 75 | DepMem* _pred; | 
|---|
| 76 | DepMem* _succ; | 
|---|
| 77 | DepEdge* _next_in;   // list of in edges, null terminated | 
|---|
| 78 | DepEdge* _next_out;  // list of out edges, null terminated | 
|---|
| 79 |  | 
|---|
| 80 | public: | 
|---|
| 81 | DepEdge(DepMem* pred, DepMem* succ, DepEdge* next_in, DepEdge* next_out) : | 
|---|
| 82 | _pred(pred), _succ(succ), _next_in(next_in), _next_out(next_out) {} | 
|---|
| 83 |  | 
|---|
| 84 | DepEdge* next_in()  { return _next_in; } | 
|---|
| 85 | DepEdge* next_out() { return _next_out; } | 
|---|
| 86 | DepMem*  pred()     { return _pred; } | 
|---|
| 87 | DepMem*  succ()     { return _succ; } | 
|---|
| 88 |  | 
|---|
| 89 | void print(); | 
|---|
| 90 | }; | 
|---|
| 91 |  | 
|---|
| 92 | //------------------------------DepMem--------------------------- | 
|---|
| 93 | // A node in the dependence graph.  _in_head starts the threaded list of | 
|---|
| 94 | // incoming edges, and _out_head starts the list of outgoing edges. | 
|---|
| 95 | class DepMem : public ResourceObj { | 
|---|
| 96 | protected: | 
|---|
| 97 | Node*    _node;     // Corresponding ideal node | 
|---|
| 98 | DepEdge* _in_head;  // Head of list of in edges, null terminated | 
|---|
| 99 | DepEdge* _out_head; // Head of list of out edges, null terminated | 
|---|
| 100 |  | 
|---|
| 101 | public: | 
|---|
| 102 | DepMem(Node* node) : _node(node), _in_head(NULL), _out_head(NULL) {} | 
|---|
| 103 |  | 
|---|
| 104 | Node*    node()                { return _node;     } | 
|---|
| 105 | DepEdge* in_head()             { return _in_head;  } | 
|---|
| 106 | DepEdge* out_head()            { return _out_head; } | 
|---|
| 107 | void set_in_head(DepEdge* hd)  { _in_head = hd;    } | 
|---|
| 108 | void set_out_head(DepEdge* hd) { _out_head = hd;   } | 
|---|
| 109 |  | 
|---|
| 110 | int in_cnt();  // Incoming edge count | 
|---|
| 111 | int out_cnt(); // Outgoing edge count | 
|---|
| 112 |  | 
|---|
| 113 | void print(); | 
|---|
| 114 | }; | 
|---|
| 115 |  | 
|---|
| 116 | //------------------------------DepGraph--------------------------- | 
|---|
| 117 | class DepGraph { | 
|---|
| 118 | protected: | 
|---|
| 119 | Arena* _arena; | 
|---|
| 120 | GrowableArray<DepMem*> _map; | 
|---|
| 121 | DepMem* _root; | 
|---|
| 122 | DepMem* _tail; | 
|---|
| 123 |  | 
|---|
| 124 | public: | 
|---|
| 125 | DepGraph(Arena* a) : _arena(a), _map(a, 8,  0, NULL) { | 
|---|
| 126 | _root = new (_arena) DepMem(NULL); | 
|---|
| 127 | _tail = new (_arena) DepMem(NULL); | 
|---|
| 128 | } | 
|---|
| 129 |  | 
|---|
| 130 | DepMem* root() { return _root; } | 
|---|
| 131 | DepMem* tail() { return _tail; } | 
|---|
| 132 |  | 
|---|
| 133 | // Return dependence node corresponding to an ideal node | 
|---|
| 134 | DepMem* dep(Node* node) { return _map.at(node->_idx); } | 
|---|
| 135 |  | 
|---|
| 136 | // Make a new dependence graph node for an ideal node. | 
|---|
| 137 | DepMem* make_node(Node* node); | 
|---|
| 138 |  | 
|---|
| 139 | // Make a new dependence graph edge dprec->dsucc | 
|---|
| 140 | DepEdge* make_edge(DepMem* dpred, DepMem* dsucc); | 
|---|
| 141 |  | 
|---|
| 142 | DepEdge* make_edge(Node* pred,   Node* succ)   { return make_edge(dep(pred), dep(succ)); } | 
|---|
| 143 | DepEdge* make_edge(DepMem* pred, Node* succ)   { return make_edge(pred,      dep(succ)); } | 
|---|
| 144 | DepEdge* make_edge(Node* pred,   DepMem* succ) { return make_edge(dep(pred), succ);      } | 
|---|
| 145 |  | 
|---|
| 146 | void init() { _map.clear(); } // initialize | 
|---|
| 147 |  | 
|---|
| 148 | void print(Node* n)   { dep(n)->print(); } | 
|---|
| 149 | void print(DepMem* d) { d->print(); } | 
|---|
| 150 | }; | 
|---|
| 151 |  | 
|---|
| 152 | //------------------------------DepPreds--------------------------- | 
|---|
| 153 | // Iterator over predecessors in the dependence graph and | 
|---|
| 154 | // non-memory-graph inputs of ideal nodes. | 
|---|
| 155 | class DepPreds : public StackObj { | 
|---|
| 156 | private: | 
|---|
| 157 | Node*    _n; | 
|---|
| 158 | int      _next_idx, _end_idx; | 
|---|
| 159 | DepEdge* _dep_next; | 
|---|
| 160 | Node*    _current; | 
|---|
| 161 | bool     _done; | 
|---|
| 162 |  | 
|---|
| 163 | public: | 
|---|
| 164 | DepPreds(Node* n, DepGraph& dg); | 
|---|
| 165 | Node* current() { return _current; } | 
|---|
| 166 | bool  done()    { return _done; } | 
|---|
| 167 | void  next(); | 
|---|
| 168 | }; | 
|---|
| 169 |  | 
|---|
| 170 | //------------------------------DepSuccs--------------------------- | 
|---|
| 171 | // Iterator over successors in the dependence graph and | 
|---|
| 172 | // non-memory-graph outputs of ideal nodes. | 
|---|
| 173 | class DepSuccs : public StackObj { | 
|---|
| 174 | private: | 
|---|
| 175 | Node*    _n; | 
|---|
| 176 | int      _next_idx, _end_idx; | 
|---|
| 177 | DepEdge* _dep_next; | 
|---|
| 178 | Node*    _current; | 
|---|
| 179 | bool     _done; | 
|---|
| 180 |  | 
|---|
| 181 | public: | 
|---|
| 182 | DepSuccs(Node* n, DepGraph& dg); | 
|---|
| 183 | Node* current() { return _current; } | 
|---|
| 184 | bool  done()    { return _done; } | 
|---|
| 185 | void  next(); | 
|---|
| 186 | }; | 
|---|
| 187 |  | 
|---|
| 188 |  | 
|---|
| 189 | // ========================= SuperWord ===================== | 
|---|
| 190 |  | 
|---|
| 191 | // -----------------------------SWNodeInfo--------------------------------- | 
|---|
| 192 | // Per node info needed by SuperWord | 
|---|
| 193 | class SWNodeInfo { | 
|---|
| 194 | public: | 
|---|
| 195 | int         _alignment; // memory alignment for a node | 
|---|
| 196 | int         _depth;     // Max expression (DAG) depth from block start | 
|---|
| 197 | const Type* _velt_type; // vector element type | 
|---|
| 198 | Node_List*  _my_pack;   // pack containing this node | 
|---|
| 199 |  | 
|---|
| 200 | SWNodeInfo() : _alignment(-1), _depth(0), _velt_type(NULL), _my_pack(NULL) {} | 
|---|
| 201 | static const SWNodeInfo initial; | 
|---|
| 202 | }; | 
|---|
| 203 |  | 
|---|
| 204 | class SuperWord; | 
|---|
| 205 | class CMoveKit { | 
|---|
| 206 | friend class SuperWord; | 
|---|
| 207 | private: | 
|---|
| 208 | SuperWord* _sw; | 
|---|
| 209 | Dict* _dict; | 
|---|
| 210 | CMoveKit(Arena* a, SuperWord* sw) : _sw(sw)  {_dict = new Dict(cmpkey, hashkey, a);} | 
|---|
| 211 | void*     _2p(Node* key)        const  { return (void*)(intptr_t)key; } // 2 conversion functions to make gcc happy | 
|---|
| 212 | Dict*     dict()                const  { return _dict; } | 
|---|
| 213 | void map(Node* key, Node_List* val)    { assert(_dict->operator[](_2p(key)) == NULL, "key existed"); _dict->Insert(_2p(key), (void*)val); } | 
|---|
| 214 | void unmap(Node* key)                  { _dict->Delete(_2p(key)); } | 
|---|
| 215 | Node_List* pack(Node* key)      const  { return (Node_List*)_dict->operator[](_2p(key)); } | 
|---|
| 216 | Node* is_Bool_candidate(Node* nd) const; // if it is the right candidate return corresponding CMove* , | 
|---|
| 217 | Node* is_CmpD_candidate(Node* nd) const; // otherwise return NULL | 
|---|
| 218 | Node_List* make_cmovevd_pack(Node_List* cmovd_pk); | 
|---|
| 219 | bool test_cmpd_pack(Node_List* cmpd_pk, Node_List* cmovd_pk); | 
|---|
| 220 | };//class CMoveKit | 
|---|
| 221 |  | 
|---|
| 222 | // JVMCI: OrderedPair is moved up to deal with compilation issues on Windows | 
|---|
| 223 | //------------------------------OrderedPair--------------------------- | 
|---|
| 224 | // Ordered pair of Node*. | 
|---|
| 225 | class OrderedPair { | 
|---|
| 226 | protected: | 
|---|
| 227 | Node* _p1; | 
|---|
| 228 | Node* _p2; | 
|---|
| 229 | public: | 
|---|
| 230 | OrderedPair() : _p1(NULL), _p2(NULL) {} | 
|---|
| 231 | OrderedPair(Node* p1, Node* p2) { | 
|---|
| 232 | if (p1->_idx < p2->_idx) { | 
|---|
| 233 | _p1 = p1; _p2 = p2; | 
|---|
| 234 | } else { | 
|---|
| 235 | _p1 = p2; _p2 = p1; | 
|---|
| 236 | } | 
|---|
| 237 | } | 
|---|
| 238 |  | 
|---|
| 239 | bool operator==(const OrderedPair &rhs) { | 
|---|
| 240 | return _p1 == rhs._p1 && _p2 == rhs._p2; | 
|---|
| 241 | } | 
|---|
| 242 | void print() { tty->print( "  (%d, %d)", _p1->_idx, _p2->_idx); } | 
|---|
| 243 |  | 
|---|
| 244 | static const OrderedPair initial; | 
|---|
| 245 | }; | 
|---|
| 246 |  | 
|---|
| 247 | // -----------------------------SuperWord--------------------------------- | 
|---|
| 248 | // Transforms scalar operations into packed (superword) operations. | 
|---|
| 249 | class SuperWord : public ResourceObj { | 
|---|
| 250 | friend class SWPointer; | 
|---|
| 251 | friend class CMoveKit; | 
|---|
| 252 | private: | 
|---|
| 253 | PhaseIdealLoop* _phase; | 
|---|
| 254 | Arena*          _arena; | 
|---|
| 255 | PhaseIterGVN   &_igvn; | 
|---|
| 256 |  | 
|---|
| 257 | enum consts { top_align = -1, bottom_align = -666 }; | 
|---|
| 258 |  | 
|---|
| 259 | GrowableArray<Node_List*> _packset;    // Packs for the current block | 
|---|
| 260 |  | 
|---|
| 261 | GrowableArray<int> _bb_idx;            // Map from Node _idx to index within block | 
|---|
| 262 |  | 
|---|
| 263 | GrowableArray<Node*> _block;           // Nodes in current block | 
|---|
| 264 | GrowableArray<Node*> _post_block;      // Nodes in post loop block | 
|---|
| 265 | GrowableArray<Node*> _data_entry;      // Nodes with all inputs from outside | 
|---|
| 266 | GrowableArray<Node*> _mem_slice_head;  // Memory slice head nodes | 
|---|
| 267 | GrowableArray<Node*> _mem_slice_tail;  // Memory slice tail nodes | 
|---|
| 268 | GrowableArray<Node*> _iteration_first; // nodes in the generation that has deps from phi | 
|---|
| 269 | GrowableArray<Node*> _iteration_last;  // nodes in the generation that has deps to   phi | 
|---|
| 270 | GrowableArray<SWNodeInfo> _node_info;  // Info needed per node | 
|---|
| 271 | CloneMap&            _clone_map;       // map of nodes created in cloning | 
|---|
| 272 | CMoveKit             _cmovev_kit;      // support for vectorization of CMov | 
|---|
| 273 | MemNode* _align_to_ref;                // Memory reference that pre-loop will align to | 
|---|
| 274 |  | 
|---|
| 275 | GrowableArray<OrderedPair> _disjoint_ptrs; // runtime disambiguated pointer pairs | 
|---|
| 276 |  | 
|---|
| 277 | DepGraph _dg; // Dependence graph | 
|---|
| 278 |  | 
|---|
| 279 | // Scratch pads | 
|---|
| 280 | VectorSet    _visited;       // Visited set | 
|---|
| 281 | VectorSet    _post_visited;  // Post-visited set | 
|---|
| 282 | Node_Stack   _n_idx_list;    // List of (node,index) pairs | 
|---|
| 283 | GrowableArray<Node*> _nlist; // List of nodes | 
|---|
| 284 | GrowableArray<Node*> _stk;   // Stack of nodes | 
|---|
| 285 |  | 
|---|
| 286 | public: | 
|---|
| 287 | SuperWord(PhaseIdealLoop* phase); | 
|---|
| 288 |  | 
|---|
| 289 | void transform_loop(IdealLoopTree* lpt, bool do_optimization); | 
|---|
| 290 |  | 
|---|
| 291 | void unrolling_analysis(int &local_loop_unroll_factor); | 
|---|
| 292 |  | 
|---|
| 293 | // Accessors for SWPointer | 
|---|
| 294 | PhaseIdealLoop* phase()          { return _phase; } | 
|---|
| 295 | IdealLoopTree* lpt()             { return _lpt; } | 
|---|
| 296 | PhiNode* iv()                    { return _iv; } | 
|---|
| 297 |  | 
|---|
| 298 | bool early_return()              { return _early_return; } | 
|---|
| 299 |  | 
|---|
| 300 | #ifndef PRODUCT | 
|---|
| 301 | bool     is_debug()              { return _vector_loop_debug > 0; } | 
|---|
| 302 | bool     is_trace_alignment()    { return (_vector_loop_debug & 2) > 0; } | 
|---|
| 303 | bool     is_trace_mem_slice()    { return (_vector_loop_debug & 4) > 0; } | 
|---|
| 304 | bool     is_trace_loop()         { return (_vector_loop_debug & 8) > 0; } | 
|---|
| 305 | bool     is_trace_adjacent()     { return (_vector_loop_debug & 16) > 0; } | 
|---|
| 306 | bool     is_trace_cmov()         { return (_vector_loop_debug & 32) > 0; } | 
|---|
| 307 | bool     is_trace_loop_reverse() { return (_vector_loop_debug & 64) > 0; } | 
|---|
| 308 | #endif | 
|---|
| 309 | bool     do_vector_loop()        { return _do_vector_loop; } | 
|---|
| 310 | bool     do_reserve_copy()       { return _do_reserve_copy; } | 
|---|
| 311 | private: | 
|---|
| 312 | IdealLoopTree* _lpt;             // Current loop tree node | 
|---|
| 313 | LoopNode*      _lp;              // Current LoopNode | 
|---|
| 314 | Node*          _bb;              // Current basic block | 
|---|
| 315 | PhiNode*       _iv;              // Induction var | 
|---|
| 316 | bool           _race_possible;   // In cases where SDMU is true | 
|---|
| 317 | bool           _early_return;    // True if we do not initialize | 
|---|
| 318 | bool           _do_vector_loop;  // whether to do vectorization/simd style | 
|---|
| 319 | bool           _do_reserve_copy; // do reserve copy of the graph(loop) before final modification in output | 
|---|
| 320 | int            _num_work_vecs;   // Number of non memory vector operations | 
|---|
| 321 | int            _num_reductions;  // Number of reduction expressions applied | 
|---|
| 322 | int            _ii_first;        // generation with direct deps from mem phi | 
|---|
| 323 | int            _ii_last;         // generation with direct deps to   mem phi | 
|---|
| 324 | GrowableArray<int> _ii_order; | 
|---|
| 325 | #ifndef PRODUCT | 
|---|
| 326 | uintx          _vector_loop_debug; // provide more printing in debug mode | 
|---|
| 327 | #endif | 
|---|
| 328 |  | 
|---|
| 329 | // Accessors | 
|---|
| 330 | Arena* arena()                   { return _arena; } | 
|---|
| 331 |  | 
|---|
| 332 | Node* bb()                       { return _bb; } | 
|---|
| 333 | void  set_bb(Node* bb)           { _bb = bb; } | 
|---|
| 334 |  | 
|---|
| 335 | void set_lpt(IdealLoopTree* lpt) { _lpt = lpt; } | 
|---|
| 336 |  | 
|---|
| 337 | LoopNode* lp()                   { return _lp; } | 
|---|
| 338 | void      set_lp(LoopNode* lp)   { _lp = lp; | 
|---|
| 339 | _iv = lp->as_CountedLoop()->phi()->as_Phi(); } | 
|---|
| 340 | int      iv_stride()             { return lp()->as_CountedLoop()->stride_con(); } | 
|---|
| 341 |  | 
|---|
| 342 | int vector_width(Node* n) { | 
|---|
| 343 | BasicType bt = velt_basic_type(n); | 
|---|
| 344 | return MIN2(ABS(iv_stride()), Matcher::max_vector_size(bt)); | 
|---|
| 345 | } | 
|---|
| 346 | int vector_width_in_bytes(Node* n) { | 
|---|
| 347 | BasicType bt = velt_basic_type(n); | 
|---|
| 348 | return vector_width(n)*type2aelembytes(bt); | 
|---|
| 349 | } | 
|---|
| 350 | int get_vw_bytes_special(MemNode* s); | 
|---|
| 351 | MemNode* align_to_ref()            { return _align_to_ref; } | 
|---|
| 352 | void  set_align_to_ref(MemNode* m) { _align_to_ref = m; } | 
|---|
| 353 |  | 
|---|
| 354 | Node* ctrl(Node* n) const { return _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n; } | 
|---|
| 355 |  | 
|---|
| 356 | // block accessors | 
|---|
| 357 | bool in_bb(Node* n)      { return n != NULL && n->outcnt() > 0 && ctrl(n) == _bb; } | 
|---|
| 358 | int  bb_idx(Node* n)     { assert(in_bb(n), "must be"); return _bb_idx.at(n->_idx); } | 
|---|
| 359 | void set_bb_idx(Node* n, int i) { _bb_idx.at_put_grow(n->_idx, i); } | 
|---|
| 360 |  | 
|---|
| 361 | // visited set accessors | 
|---|
| 362 | void visited_clear()           { _visited.Clear(); } | 
|---|
| 363 | void visited_set(Node* n)      { return _visited.set(bb_idx(n)); } | 
|---|
| 364 | int visited_test(Node* n)      { return _visited.test(bb_idx(n)); } | 
|---|
| 365 | int visited_test_set(Node* n)  { return _visited.test_set(bb_idx(n)); } | 
|---|
| 366 | void post_visited_clear()      { _post_visited.Clear(); } | 
|---|
| 367 | void post_visited_set(Node* n) { return _post_visited.set(bb_idx(n)); } | 
|---|
| 368 | int post_visited_test(Node* n) { return _post_visited.test(bb_idx(n)); } | 
|---|
| 369 |  | 
|---|
| 370 | // Ensure node_info contains element "i" | 
|---|
| 371 | void grow_node_info(int i) { if (i >= _node_info.length()) _node_info.at_put_grow(i, SWNodeInfo::initial); } | 
|---|
| 372 |  | 
|---|
| 373 | // memory alignment for a node | 
|---|
| 374 | int alignment(Node* n)                     { return _node_info.adr_at(bb_idx(n))->_alignment; } | 
|---|
| 375 | void set_alignment(Node* n, int a)         { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_alignment = a; } | 
|---|
| 376 |  | 
|---|
| 377 | // Max expression (DAG) depth from beginning of the block for each node | 
|---|
| 378 | int depth(Node* n)                         { return _node_info.adr_at(bb_idx(n))->_depth; } | 
|---|
| 379 | void set_depth(Node* n, int d)             { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_depth = d; } | 
|---|
| 380 |  | 
|---|
| 381 | // vector element type | 
|---|
| 382 | const Type* velt_type(Node* n)             { return _node_info.adr_at(bb_idx(n))->_velt_type; } | 
|---|
| 383 | BasicType velt_basic_type(Node* n)         { return velt_type(n)->array_element_basic_type(); } | 
|---|
| 384 | void set_velt_type(Node* n, const Type* t) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_velt_type = t; } | 
|---|
| 385 | bool same_velt_type(Node* n1, Node* n2); | 
|---|
| 386 |  | 
|---|
| 387 | // my_pack | 
|---|
| 388 | Node_List* my_pack(Node* n)                 { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; } | 
|---|
| 389 | void set_my_pack(Node* n, Node_List* p)     { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; } | 
|---|
| 390 | // is pack good for converting into one vector node replacing 12 nodes of Cmp, Bool, CMov | 
|---|
| 391 | bool is_cmov_pack(Node_List* p); | 
|---|
| 392 | bool is_cmov_pack_internal_node(Node_List* p, Node* nd) { return is_cmov_pack(p) && !nd->is_CMove(); } | 
|---|
| 393 | // For pack p, are all idx operands the same? | 
|---|
| 394 | bool same_inputs(Node_List* p, int idx); | 
|---|
| 395 | // CloneMap utilities | 
|---|
| 396 | bool same_origin_idx(Node* a, Node* b) const; | 
|---|
| 397 | bool same_generation(Node* a, Node* b) const; | 
|---|
| 398 |  | 
|---|
| 399 | // methods | 
|---|
| 400 |  | 
|---|
| 401 | // Extract the superword level parallelism | 
|---|
| 402 | void (); | 
|---|
| 403 | // Find the adjacent memory references and create pack pairs for them. | 
|---|
| 404 | void find_adjacent_refs(); | 
|---|
| 405 | // Tracing support | 
|---|
| 406 | #ifndef PRODUCT | 
|---|
| 407 | void find_adjacent_refs_trace_1(Node* best_align_to_mem_ref, int best_iv_adjustment); | 
|---|
| 408 | void print_loop(bool whole); | 
|---|
| 409 | #endif | 
|---|
| 410 | // Find a memory reference to align the loop induction variable to. | 
|---|
| 411 | MemNode* find_align_to_ref(Node_List &memops); | 
|---|
| 412 | // Calculate loop's iv adjustment for this memory ops. | 
|---|
| 413 | int get_iv_adjustment(MemNode* mem); | 
|---|
| 414 | // Can the preloop align the reference to position zero in the vector? | 
|---|
| 415 | bool ref_is_alignable(SWPointer& p); | 
|---|
| 416 | // rebuild the graph so all loads in different iterations of cloned loop become dependant on phi node (in _do_vector_loop only) | 
|---|
| 417 | bool hoist_loads_in_graph(); | 
|---|
| 418 | // Test whether MemNode::Memory dependency to the same load but in the first iteration of this loop is coming from memory phi | 
|---|
| 419 | // Return false if failed | 
|---|
| 420 | Node* find_phi_for_mem_dep(LoadNode* ld); | 
|---|
| 421 | // Return same node but from the first generation. Return 0, if not found | 
|---|
| 422 | Node* first_node(Node* nd); | 
|---|
| 423 | // Return same node as this but from the last generation. Return 0, if not found | 
|---|
| 424 | Node* last_node(Node* n); | 
|---|
| 425 | // Mark nodes belonging to first and last generation | 
|---|
| 426 | // returns first generation index or -1 if vectorization/simd is impossible | 
|---|
| 427 | int mark_generations(); | 
|---|
| 428 | // swapping inputs of commutative instruction (Add or Mul) | 
|---|
| 429 | bool fix_commutative_inputs(Node* gold, Node* fix); | 
|---|
| 430 | // make packs forcefully (in _do_vector_loop only) | 
|---|
| 431 | bool pack_parallel(); | 
|---|
| 432 | // Construct dependency graph. | 
|---|
| 433 | void dependence_graph(); | 
|---|
| 434 | // Return a memory slice (node list) in predecessor order starting at "start" | 
|---|
| 435 | void mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds); | 
|---|
| 436 | // Can s1 and s2 be in a pack with s1 immediately preceding s2 and  s1 aligned at "align" | 
|---|
| 437 | bool stmts_can_pack(Node* s1, Node* s2, int align); | 
|---|
| 438 | // Does s exist in a pack at position pos? | 
|---|
| 439 | bool exists_at(Node* s, uint pos); | 
|---|
| 440 | // Is s1 immediately before s2 in memory? | 
|---|
| 441 | bool are_adjacent_refs(Node* s1, Node* s2); | 
|---|
| 442 | // Are s1 and s2 similar? | 
|---|
| 443 | bool isomorphic(Node* s1, Node* s2); | 
|---|
| 444 | // Is there no data path from s1 to s2 or s2 to s1? | 
|---|
| 445 | bool independent(Node* s1, Node* s2); | 
|---|
| 446 | // For a node pair (s1, s2) which is isomorphic and independent, | 
|---|
| 447 | // do s1 and s2 have similar input edges? | 
|---|
| 448 | bool have_similar_inputs(Node* s1, Node* s2); | 
|---|
| 449 | // Is there a data path between s1 and s2 and both are reductions? | 
|---|
| 450 | bool reduction(Node* s1, Node* s2); | 
|---|
| 451 | // Helper for independent | 
|---|
| 452 | bool independent_path(Node* shallow, Node* deep, uint dp=0); | 
|---|
| 453 | void set_alignment(Node* s1, Node* s2, int align); | 
|---|
| 454 | int data_size(Node* s); | 
|---|
| 455 | // Extend packset by following use->def and def->use links from pack members. | 
|---|
| 456 | void extend_packlist(); | 
|---|
| 457 | // Extend the packset by visiting operand definitions of nodes in pack p | 
|---|
| 458 | bool follow_use_defs(Node_List* p); | 
|---|
| 459 | // Extend the packset by visiting uses of nodes in pack p | 
|---|
| 460 | bool follow_def_uses(Node_List* p); | 
|---|
| 461 | // For extended packsets, ordinally arrange uses packset by major component | 
|---|
| 462 | void order_def_uses(Node_List* p); | 
|---|
| 463 | // Estimate the savings from executing s1 and s2 as a pack | 
|---|
| 464 | int est_savings(Node* s1, Node* s2); | 
|---|
| 465 | int adjacent_profit(Node* s1, Node* s2); | 
|---|
| 466 | int pack_cost(int ct); | 
|---|
| 467 | int unpack_cost(int ct); | 
|---|
| 468 | // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last | 
|---|
| 469 | void combine_packs(); | 
|---|
| 470 | // Construct the map from nodes to packs. | 
|---|
| 471 | void construct_my_pack_map(); | 
|---|
| 472 | // Remove packs that are not implemented or not profitable. | 
|---|
| 473 | void filter_packs(); | 
|---|
| 474 | // Merge CMoveD into new vector-nodes | 
|---|
| 475 | void merge_packs_to_cmovd(); | 
|---|
| 476 | // Adjust the memory graph for the packed operations | 
|---|
| 477 | void schedule(); | 
|---|
| 478 | // Remove "current" from its current position in the memory graph and insert | 
|---|
| 479 | // it after the appropriate insert points (lip or uip); | 
|---|
| 480 | void remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip, Node *uip, Unique_Node_List &schd_before); | 
|---|
| 481 | // Within a store pack, schedule stores together by moving out the sandwiched memory ops according | 
|---|
| 482 | // to dependence info; and within a load pack, move loads down to the last executed load. | 
|---|
| 483 | void co_locate_pack(Node_List* p); | 
|---|
| 484 | // Convert packs into vector node operations | 
|---|
| 485 | void output(); | 
|---|
| 486 | // Create a vector operand for the nodes in pack p for operand: in(opd_idx) | 
|---|
| 487 | Node* vector_opd(Node_List* p, int opd_idx); | 
|---|
| 488 | // Can code be generated for pack p? | 
|---|
| 489 | bool implemented(Node_List* p); | 
|---|
| 490 | // For pack p, are all operands and all uses (with in the block) vector? | 
|---|
| 491 | bool profitable(Node_List* p); | 
|---|
| 492 | // If a use of pack p is not a vector use, then replace the use with an extract operation. | 
|---|
| 493 | void (Node_List* p); | 
|---|
| 494 | // Is use->in(u_idx) a vector use? | 
|---|
| 495 | bool is_vector_use(Node* use, int u_idx); | 
|---|
| 496 | // Construct reverse postorder list of block members | 
|---|
| 497 | bool construct_bb(); | 
|---|
| 498 | // Initialize per node info | 
|---|
| 499 | void initialize_bb(); | 
|---|
| 500 | // Insert n into block after pos | 
|---|
| 501 | void bb_insert_after(Node* n, int pos); | 
|---|
| 502 | // Compute max depth for expressions from beginning of block | 
|---|
| 503 | void compute_max_depth(); | 
|---|
| 504 | // Compute necessary vector element type for expressions | 
|---|
| 505 | void compute_vector_element_type(); | 
|---|
| 506 | // Are s1 and s2 in a pack pair and ordered as s1,s2? | 
|---|
| 507 | bool in_packset(Node* s1, Node* s2); | 
|---|
| 508 | // Is s in pack p? | 
|---|
| 509 | Node_List* in_pack(Node* s, Node_List* p); | 
|---|
| 510 | // Remove the pack at position pos in the packset | 
|---|
| 511 | void remove_pack_at(int pos); | 
|---|
| 512 | // Return the node executed first in pack p. | 
|---|
| 513 | Node* executed_first(Node_List* p); | 
|---|
| 514 | // Return the node executed last in pack p. | 
|---|
| 515 | Node* executed_last(Node_List* p); | 
|---|
| 516 | static LoadNode::ControlDependency control_dependency(Node_List* p); | 
|---|
| 517 | // Alignment within a vector memory reference | 
|---|
| 518 | int memory_alignment(MemNode* s, int iv_adjust); | 
|---|
| 519 | // (Start, end] half-open range defining which operands are vector | 
|---|
| 520 | void vector_opd_range(Node* n, uint* start, uint* end); | 
|---|
| 521 | // Smallest type containing range of values | 
|---|
| 522 | const Type* container_type(Node* n); | 
|---|
| 523 | // Adjust pre-loop limit so that in main loop, a load/store reference | 
|---|
| 524 | // to align_to_ref will be a position zero in the vector. | 
|---|
| 525 | void align_initial_loop_index(MemNode* align_to_ref); | 
|---|
| 526 | // Find pre loop end from main loop.  Returns null if none. | 
|---|
| 527 | CountedLoopEndNode* get_pre_loop_end(CountedLoopNode *cl); | 
|---|
| 528 | // Is the use of d1 in u1 at the same operand position as d2 in u2? | 
|---|
| 529 | bool opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2); | 
|---|
| 530 | void init(); | 
|---|
| 531 | // clean up some basic structures - used if the ideal graph was rebuilt | 
|---|
| 532 | void restart(); | 
|---|
| 533 |  | 
|---|
| 534 | // print methods | 
|---|
| 535 | void print_packset(); | 
|---|
| 536 | void print_pack(Node_List* p); | 
|---|
| 537 | void print_bb(); | 
|---|
| 538 | void print_stmt(Node* s); | 
|---|
| 539 | char* blank(uint depth); | 
|---|
| 540 |  | 
|---|
| 541 | void packset_sort(int n); | 
|---|
| 542 | }; | 
|---|
| 543 |  | 
|---|
| 544 |  | 
|---|
| 545 |  | 
|---|
| 546 | //------------------------------SWPointer--------------------------- | 
|---|
| 547 | // Information about an address for dependence checking and vector alignment | 
|---|
| 548 | class SWPointer { | 
|---|
| 549 | protected: | 
|---|
| 550 | MemNode*   _mem;           // My memory reference node | 
|---|
| 551 | SuperWord* _slp;           // SuperWord class | 
|---|
| 552 |  | 
|---|
| 553 | Node* _base;               // NULL if unsafe nonheap reference | 
|---|
| 554 | Node* _adr;                // address pointer | 
|---|
| 555 | jint  _scale;              // multiplier for iv (in bytes), 0 if no loop iv | 
|---|
| 556 | jint  _offset;             // constant offset (in bytes) | 
|---|
| 557 | Node* _invar;              // invariant offset (in bytes), NULL if none | 
|---|
| 558 | bool  _negate_invar;       // if true then use: (0 - _invar) | 
|---|
| 559 | Node_Stack* _nstack;       // stack used to record a swpointer trace of variants | 
|---|
| 560 | bool        _analyze_only; // Used in loop unrolling only for swpointer trace | 
|---|
| 561 | uint        _stack_idx;    // Used in loop unrolling only for swpointer trace | 
|---|
| 562 |  | 
|---|
| 563 | PhaseIdealLoop* phase() { return _slp->phase(); } | 
|---|
| 564 | IdealLoopTree*  lpt()   { return _slp->lpt(); } | 
|---|
| 565 | PhiNode*        iv()    { return _slp->iv();  } // Induction var | 
|---|
| 566 |  | 
|---|
| 567 | bool invariant(Node* n); | 
|---|
| 568 |  | 
|---|
| 569 | // Match: k*iv + offset | 
|---|
| 570 | bool scaled_iv_plus_offset(Node* n); | 
|---|
| 571 | // Match: k*iv where k is a constant that's not zero | 
|---|
| 572 | bool scaled_iv(Node* n); | 
|---|
| 573 | // Match: offset is (k [+/- invariant]) | 
|---|
| 574 | bool offset_plus_k(Node* n, bool negate = false); | 
|---|
| 575 |  | 
|---|
| 576 | public: | 
|---|
| 577 | enum CMP { | 
|---|
| 578 | Less          = 1, | 
|---|
| 579 | Greater       = 2, | 
|---|
| 580 | Equal         = 4, | 
|---|
| 581 | NotEqual      = (Less | Greater), | 
|---|
| 582 | NotComparable = (Less | Greater | Equal) | 
|---|
| 583 | }; | 
|---|
| 584 |  | 
|---|
| 585 | SWPointer(MemNode* mem, SuperWord* slp, Node_Stack *nstack, bool analyze_only); | 
|---|
| 586 | // Following is used to create a temporary object during | 
|---|
| 587 | // the pattern match of an address expression. | 
|---|
| 588 | SWPointer(SWPointer* p); | 
|---|
| 589 |  | 
|---|
| 590 | bool valid()  { return _adr != NULL; } | 
|---|
| 591 | bool has_iv() { return _scale != 0; } | 
|---|
| 592 |  | 
|---|
| 593 | Node* base()             { return _base; } | 
|---|
| 594 | Node* adr()              { return _adr; } | 
|---|
| 595 | MemNode* mem()           { return _mem; } | 
|---|
| 596 | int   scale_in_bytes()   { return _scale; } | 
|---|
| 597 | Node* invar()            { return _invar; } | 
|---|
| 598 | bool  negate_invar()     { return _negate_invar; } | 
|---|
| 599 | int   offset_in_bytes()  { return _offset; } | 
|---|
| 600 | int   memory_size()      { return _mem->memory_size(); } | 
|---|
| 601 | Node_Stack* node_stack() { return _nstack; } | 
|---|
| 602 |  | 
|---|
| 603 | // Comparable? | 
|---|
| 604 | int cmp(SWPointer& q) { | 
|---|
| 605 | if (valid() && q.valid() && | 
|---|
| 606 | (_adr == q._adr || (_base == _adr && q._base == q._adr)) && | 
|---|
| 607 | _scale == q._scale   && | 
|---|
| 608 | _invar == q._invar   && | 
|---|
| 609 | _negate_invar == q._negate_invar) { | 
|---|
| 610 | bool overlap = q._offset <   _offset +   memory_size() && | 
|---|
| 611 | _offset < q._offset + q.memory_size(); | 
|---|
| 612 | return overlap ? Equal : (_offset < q._offset ? Less : Greater); | 
|---|
| 613 | } else { | 
|---|
| 614 | return NotComparable; | 
|---|
| 615 | } | 
|---|
| 616 | } | 
|---|
| 617 |  | 
|---|
| 618 | bool not_equal(SWPointer& q)    { return not_equal(cmp(q)); } | 
|---|
| 619 | bool equal(SWPointer& q)        { return equal(cmp(q)); } | 
|---|
| 620 | bool comparable(SWPointer& q)   { return comparable(cmp(q)); } | 
|---|
| 621 | static bool not_equal(int cmp)  { return cmp <= NotEqual; } | 
|---|
| 622 | static bool equal(int cmp)      { return cmp == Equal; } | 
|---|
| 623 | static bool comparable(int cmp) { return cmp < NotComparable; } | 
|---|
| 624 |  | 
|---|
| 625 | void print(); | 
|---|
| 626 |  | 
|---|
| 627 | #ifndef PRODUCT | 
|---|
| 628 | class Tracer { | 
|---|
| 629 | friend class SuperWord; | 
|---|
| 630 | friend class SWPointer; | 
|---|
| 631 | SuperWord*   _slp; | 
|---|
| 632 | static int   _depth; | 
|---|
| 633 | int _depth_save; | 
|---|
| 634 | void print_depth(); | 
|---|
| 635 | int  depth() const    { return _depth; } | 
|---|
| 636 | void set_depth(int d) { _depth = d; } | 
|---|
| 637 | void inc_depth()      { _depth++;} | 
|---|
| 638 | void dec_depth()      { if (_depth > 0) _depth--;} | 
|---|
| 639 | void store_depth()    {_depth_save = _depth;} | 
|---|
| 640 | void restore_depth()  {_depth = _depth_save;} | 
|---|
| 641 |  | 
|---|
| 642 | class Depth { | 
|---|
| 643 | friend class Tracer; | 
|---|
| 644 | friend class SWPointer; | 
|---|
| 645 | friend class SuperWord; | 
|---|
| 646 | Depth()  { ++_depth; } | 
|---|
| 647 | Depth(int x)  { _depth = 0; } | 
|---|
| 648 | ~Depth() { if (_depth > 0) --_depth;} | 
|---|
| 649 | }; | 
|---|
| 650 | Tracer (SuperWord* slp) : _slp(slp) {} | 
|---|
| 651 |  | 
|---|
| 652 | // tracing functions | 
|---|
| 653 | void ctor_1(Node* mem); | 
|---|
| 654 | void ctor_2(Node* adr); | 
|---|
| 655 | void ctor_3(Node* adr, int i); | 
|---|
| 656 | void ctor_4(Node* adr, int i); | 
|---|
| 657 | void ctor_5(Node* adr, Node* base,  int i); | 
|---|
| 658 | void ctor_6(Node* mem); | 
|---|
| 659 |  | 
|---|
| 660 | void invariant_1(Node *n, Node *n_c); | 
|---|
| 661 |  | 
|---|
| 662 | void scaled_iv_plus_offset_1(Node* n); | 
|---|
| 663 | void scaled_iv_plus_offset_2(Node* n); | 
|---|
| 664 | void scaled_iv_plus_offset_3(Node* n); | 
|---|
| 665 | void scaled_iv_plus_offset_4(Node* n); | 
|---|
| 666 | void scaled_iv_plus_offset_5(Node* n); | 
|---|
| 667 | void scaled_iv_plus_offset_6(Node* n); | 
|---|
| 668 | void scaled_iv_plus_offset_7(Node* n); | 
|---|
| 669 | void scaled_iv_plus_offset_8(Node* n); | 
|---|
| 670 |  | 
|---|
| 671 | void scaled_iv_1(Node* n); | 
|---|
| 672 | void scaled_iv_2(Node* n, int scale); | 
|---|
| 673 | void scaled_iv_3(Node* n, int scale); | 
|---|
| 674 | void scaled_iv_4(Node* n, int scale); | 
|---|
| 675 | void scaled_iv_5(Node* n, int scale); | 
|---|
| 676 | void scaled_iv_6(Node* n, int scale); | 
|---|
| 677 | void scaled_iv_7(Node* n); | 
|---|
| 678 | void scaled_iv_8(Node* n, SWPointer* tmp); | 
|---|
| 679 | void scaled_iv_9(Node* n, int _scale, int _offset, int mult); | 
|---|
| 680 | void scaled_iv_10(Node* n); | 
|---|
| 681 |  | 
|---|
| 682 | void offset_plus_k_1(Node* n); | 
|---|
| 683 | void offset_plus_k_2(Node* n, int _offset); | 
|---|
| 684 | void offset_plus_k_3(Node* n, int _offset); | 
|---|
| 685 | void offset_plus_k_4(Node* n); | 
|---|
| 686 | void offset_plus_k_5(Node* n, Node* _invar); | 
|---|
| 687 | void offset_plus_k_6(Node* n, Node* _invar, bool _negate_invar, int _offset); | 
|---|
| 688 | void offset_plus_k_7(Node* n, Node* _invar, bool _negate_invar, int _offset); | 
|---|
| 689 | void offset_plus_k_8(Node* n, Node* _invar, bool _negate_invar, int _offset); | 
|---|
| 690 | void offset_plus_k_9(Node* n, Node* _invar, bool _negate_invar, int _offset); | 
|---|
| 691 | void offset_plus_k_10(Node* n, Node* _invar, bool _negate_invar, int _offset); | 
|---|
| 692 | void offset_plus_k_11(Node* n); | 
|---|
| 693 |  | 
|---|
| 694 | } _tracer;//TRacer; | 
|---|
| 695 | #endif | 
|---|
| 696 | }; | 
|---|
| 697 |  | 
|---|
| 698 | #endif // SHARE_OPTO_SUPERWORD_HPP | 
|---|
| 699 |  | 
|---|