1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
6XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
7XX XX
8XX LoopCloning XX
9XX XX
10XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
11XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
12
13 Loop cloning optimizations comprise of the following steps:
14 - Loop detection logic which is existing logic in the JIT that records
15 loop information with loop flags.
16 - The next step is to identify loop optimization candidates. This is done
17 by optObtainLoopCloningOpts. The loop context variable is updated with
18 all the necessary information (for ex: block, stmt, tree information)
19 to do the optimization later.
20 a) This involves checking if the loop is well-formed with respect to
21 the optimization being performed.
22 b) In array bounds check case, reconstructing the morphed GT_INDEX
23 nodes back to their array representation.
24 i) The array index is stored in the "context" variable with
25 additional block, tree, stmt info.
26 - Once the optimization candidates are identified, we derive cloning conditions
27 For ex: to clone a simple "for (i=0; i<n; ++i) { a[i] }" loop, we need the
28 following conditions:
29 (a != null) && ((n >= 0) & (n <= a.length) & (stride > 0))
30 a) Note the short circuit AND for (a != null). These are called block
31 conditions or deref-conditions since these conditions need to be in their
32 own blocks to be able to short-circuit.
33 i) For a doubly nested loop on i, j, we would then have
34 conditions like
35 (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len)
36 all short-circuiting creating blocks.
37
38 Advantage:
39 All conditions are checked before we enter the fast path. So fast
40 path gets as fast as it can be.
41
42 Disadvantage:
43 Creation of blocks.
44
45 Heuristic:
46 Therefore we will not clone if we exceed creating 4 blocks.
47
48 b) The other conditions called cloning conditions are transformed into LC_Condition
49 structs which are then optimized.
50 i) Optimization of conditions involves removing redundant condition checks.
51 ii) If some conditions evaluate to true statically, then they are removed.
52 iii) If any condition evaluates to false statically, then loop cloning is
53 aborted for that loop.
54 - Then the block splitting occurs and loop cloning conditions is transformed into
55 GenTree and added to the loop cloning choice block.
56
57 Preconditions
58 - Loop detection should have completed and the loop table should be
59 populated with the loop dscs.
60 - The loops that will be considered are the ones with the LPFLG_ITER
61 marked on them.
62
63 Limitations
64 - For array based optimizations the loop choice condition is checked
65 before the loop body. This implies that the loop initializer statement
66 has not executed at the time of the check. So any loop cloning condition
67 involving the initial value of the loop counter cannot be condition checked
68 as it hasn't been assigned yet at the time of condition checking. Therefore
69 the initial value has to be statically known. This can be fixed with further
70 effort.
71
72 Assumption
73 - The assumption is that the optimization candidates collected during the
74 identification phase will be the ones that will be optimized. In other words,
75 the loop that is present originally will be the fast path. Explicitly, the cloned
76 path will be the slow path and will be unoptimized. This allows us to
77 collect additional information at the same time of identifying the optimization
78 candidates. This later helps us to perform the optimizations during actual cloning.
79 - All loop cloning choice conditions will automatically be "AND"-ed. These are
80 bitwise AND operations.
81 - Perform short circuit AND for (array != null) side effect check
82 before hoisting (limit <= a.length) check.
83 For ex: to clone a simple "for (i=0; i<n; ++i) { a[i] }" loop, we need the
84 following conditions:
85 (a != null) && ((n >= 0) & (n <= a.length) & (stride > 0))
86
87*/
88#pragma once
89
90class Compiler;
91
92/**
93 *
94 * Represents an array access and associated bounds checks.
95 * Array access is required have the array and indices in local variables.
96 * This struct is constructed using a GT_INDEX node that is broken into
97 * its sub trees.
98 *
99 */
100struct ArrIndex
101{
102 unsigned arrLcl; // The array base local num
103 JitExpandArrayStack<unsigned> indLcls; // The indices local nums
104 JitExpandArrayStack<GenTree*> bndsChks; // The bounds checks nodes along each dimension.
105 unsigned rank; // Rank of the array
106 BasicBlock* useBlock; // Block where the [] occurs
107
108 ArrIndex(CompAllocator alloc) : arrLcl(BAD_VAR_NUM), indLcls(alloc), bndsChks(alloc), rank(0), useBlock(nullptr)
109 {
110 }
111
112#ifdef DEBUG
113 void Print(unsigned dim = -1)
114 {
115 printf("V%02d", arrLcl);
116 for (unsigned i = 0; i < ((dim == -1) ? rank : dim); ++i)
117 {
118 printf("[V%02d]", indLcls.GetRef(i));
119 }
120 }
121#endif
122};
123
124// Forward declarations
125#define LC_OPT(en) struct en##OptInfo;
126#include "loopcloningopts.h"
127
128/**
129 *
130 * LcOptInfo represents the optimization information for loop cloning,
131 * other classes are supposed to derive from this base class.
132 *
133 * Example usage:
134 * LcMdArrayOptInfo is multi-dimensional array optimization for which the
135 * loop can be cloned.
136 * LcArrIndexOptInfo is a jagged array optimization for which the loop
137 * can be cloned.
138 *
139 * So LcOptInfo represents any type of optimization opportunity that
140 * occurs in a loop and the metadata for the optimization is stored in
141 * this class.
142 */
143struct LcOptInfo
144{
145 enum OptType
146 {
147#undef LC_OPT
148#define LC_OPT(en) en,
149#include "loopcloningopts.h"
150 };
151
152 void* optInfo;
153 OptType optType;
154 LcOptInfo(void* optInfo, OptType optType) : optInfo(optInfo), optType(optType)
155 {
156 }
157
158 OptType GetOptType()
159 {
160 return optType;
161 }
162#undef LC_OPT
163#define LC_OPT(en) \
164 en##OptInfo* As##en##OptInfo() \
165 { \
166 assert(optType == en); \
167 return reinterpret_cast<en##OptInfo*>(this); \
168 }
169#include "loopcloningopts.h"
170};
171
172/**
173 *
174 * Optimization info for a multi-dimensional array.
175 */
176struct LcMdArrayOptInfo : public LcOptInfo
177{
178 GenTreeArrElem* arrElem; // "arrElem" node of an MD array.
179 unsigned dim; // "dim" represents upto what level of the rank this optimization applies to.
180 // For example, a[i,j,k] could be the MD array "arrElem" but if "dim" is 2,
181 // then this node is treated as though it were a[i,j]
182 ArrIndex* index; // "index" cached computation in the form of an ArrIndex representation.
183
184 LcMdArrayOptInfo(GenTreeArrElem* arrElem, unsigned dim)
185 : LcOptInfo(this, LcMdArray), arrElem(arrElem), dim(dim), index(nullptr)
186 {
187 }
188
189 ArrIndex* GetArrIndexForDim(CompAllocator alloc)
190 {
191 if (index == nullptr)
192 {
193 index = new (alloc) ArrIndex(alloc);
194 index->rank = arrElem->gtArrRank;
195 for (unsigned i = 0; i < dim; ++i)
196 {
197 index->indLcls.Push(arrElem->gtArrInds[i]->gtLclVarCommon.gtLclNum);
198 }
199 index->arrLcl = arrElem->gtArrObj->gtLclVarCommon.gtLclNum;
200 }
201 return index;
202 }
203};
204
205/**
206 *
207 * Optimization info for a jagged array.
208 */
209struct LcJaggedArrayOptInfo : public LcOptInfo
210{
211 unsigned dim; // "dim" represents upto what level of the rank this optimization applies to.
212 // For example, a[i][j][k] could be the jagged array but if "dim" is 2,
213 // then this node is treated as though it were a[i][j]
214 ArrIndex arrIndex; // ArrIndex representation of the array.
215 GenTree* stmt; // "stmt" where the optimization opportunity occurs.
216
217 LcJaggedArrayOptInfo(ArrIndex& arrIndex, unsigned dim, GenTree* stmt)
218 : LcOptInfo(this, LcJaggedArray), dim(dim), arrIndex(arrIndex), stmt(stmt)
219 {
220 }
221};
222
223/**
224 *
225 * Symbolic representation of a.length, or a[i][j].length or a[i,j].length and so on.
226 * OperType decides whether "arrLength" is invoked on the array or if it is just an array.
227 */
228struct LC_Array
229{
230 enum ArrType
231 {
232 Invalid,
233 Jagged,
234 MdArray
235 };
236
237 enum OperType
238 {
239 None,
240 ArrLen,
241 };
242
243 ArrType type; // The type of the array on which to invoke length operator.
244 ArrIndex* arrIndex; // ArrIndex representation of this array.
245
246 OperType oper;
247
248#ifdef DEBUG
249 void Print()
250 {
251 arrIndex->Print(dim);
252 if (oper == ArrLen)
253 {
254 printf(".Length");
255 }
256 }
257#endif
258
259 int dim; // "dim" = which index to invoke arrLen on, if -1 invoke on the whole array
260 // Example 1: a[0][1][2] and dim = 2 implies a[0][1].length
261 // Example 2: a[0][1][2] and dim = -1 implies a[0][1][2].length
262 LC_Array() : type(Invalid), dim(-1)
263 {
264 }
265 LC_Array(ArrType type, ArrIndex* arrIndex, int dim, OperType oper)
266 : type(type), arrIndex(arrIndex), oper(oper), dim(dim)
267 {
268 }
269
270 LC_Array(ArrType type, ArrIndex* arrIndex, OperType oper) : type(type), arrIndex(arrIndex), oper(oper), dim(-1)
271 {
272 }
273
274 // Equality operator
275 bool operator==(const LC_Array& that) const
276 {
277 assert(type != Invalid && that.type != Invalid);
278
279 // Types match and the array base matches.
280 if (type != that.type || arrIndex->arrLcl != that.arrIndex->arrLcl || oper != that.oper)
281 {
282 return false;
283 }
284
285 // If the dim ranks are not matching, quit.
286 int rank1 = GetDimRank();
287 int rank2 = that.GetDimRank();
288 if (rank1 != rank2)
289 {
290 return false;
291 }
292
293 // Check for the indices.
294 for (int i = 0; i < rank1; ++i)
295 {
296 if (arrIndex->indLcls[i] != that.arrIndex->indLcls[i])
297 {
298 return false;
299 }
300 }
301 return true;
302 }
303
304 // The max dim on which length is invoked.
305 int GetDimRank() const
306 {
307 return (dim < 0) ? (int)arrIndex->rank : dim;
308 }
309
310 // Get a tree representation for this symbolic a.length
311 GenTree* ToGenTree(Compiler* comp);
312};
313
314/**
315 *
316 * Symbolic representation of either a constant like 1, 2 or a variable V02, V03 etc. or an "LC_Array" or the null
317 * constant.
318 */
319struct LC_Ident
320{
321 enum IdentType
322 {
323 Invalid,
324 Const,
325 Var,
326 ArrLen,
327 Null,
328 };
329
330 unsigned constant; // The constant value if this node is of type "Const", or the lcl num if "Var"
331 LC_Array arrLen; // The LC_Array if the type is "ArrLen"
332 IdentType type; // The type of this object
333
334 // Equality operator
335 bool operator==(const LC_Ident& that) const
336 {
337 switch (type)
338 {
339 case Const:
340 case Var:
341 return (type == that.type) && constant == that.constant;
342 case ArrLen:
343 return (type == that.type) && (arrLen == that.arrLen);
344 case Null:
345 return (type == that.type);
346 default:
347 assert(!"Unknown LC_Ident type");
348 unreached();
349 }
350 }
351
352#ifdef DEBUG
353 void Print()
354 {
355 switch (type)
356 {
357 case Const:
358 printf("%u", constant);
359 break;
360 case Var:
361 printf("V%02d", constant);
362 break;
363 case ArrLen:
364 arrLen.Print();
365 break;
366 case Null:
367 printf("null");
368 break;
369 default:
370 assert(false);
371 break;
372 }
373 }
374#endif
375
376 LC_Ident() : type(Invalid)
377 {
378 }
379 LC_Ident(unsigned constant, IdentType type) : constant(constant), type(type)
380 {
381 }
382 explicit LC_Ident(IdentType type) : type(type)
383 {
384 }
385 explicit LC_Ident(const LC_Array& arrLen) : arrLen(arrLen), type(ArrLen)
386 {
387 }
388
389 // Convert this symbolic representation into a tree node.
390 GenTree* ToGenTree(Compiler* comp);
391};
392
393/**
394 *
395 * Symbolic representation of an expr that involves an "LC_Ident"
396 */
397struct LC_Expr
398{
399 enum ExprType
400 {
401 Invalid,
402 Ident,
403 };
404
405 LC_Ident ident;
406 ExprType type;
407
408 // Equality operator
409 bool operator==(const LC_Expr& that) const
410 {
411 assert(type != Invalid && that.type != Invalid);
412
413 // If the types don't match quit.
414 if (type != that.type)
415 {
416 return false;
417 }
418
419 // Check if the ident match.
420 return (ident == that.ident);
421 }
422
423#ifdef DEBUG
424 void Print()
425 {
426 if (type == Ident)
427 {
428 ident.Print();
429 }
430 }
431#endif
432
433 LC_Expr() : type(Invalid)
434 {
435 }
436 explicit LC_Expr(const LC_Ident& ident) : ident(ident), type(Ident)
437 {
438 }
439
440 // Convert LC_Expr into a tree node.
441 GenTree* ToGenTree(Compiler* comp);
442};
443
444/**
445 *
446 * Symbolic representation of a conditional operation involving two "LC_Expr":
447 * LC_Expr < LC_Expr, for example: i > 0, i < a.length
448 */
449struct LC_Condition
450{
451 LC_Expr op1;
452 LC_Expr op2;
453 genTreeOps oper;
454
455#ifdef DEBUG
456 void Print()
457 {
458 op1.Print();
459 printf(" %s ", GenTree::OpName(oper));
460 op2.Print();
461 }
462#endif
463
464 // Check if the condition evaluates statically to true or false, i < i => false, a.length > 0 => true
465 // The result is put in "pResult" parameter and is valid if the method returns "true". Otherwise, the
466 // condition could not be evaluated.
467 bool Evaluates(bool* pResult);
468
469 // Check if two conditions can be combined to yield one condition.
470 bool Combines(const LC_Condition& cond, LC_Condition* newCond);
471
472 LC_Condition()
473 {
474 }
475 LC_Condition(genTreeOps oper, const LC_Expr& op1, const LC_Expr& op2) : op1(op1), op2(op2), oper(oper)
476 {
477 }
478
479 // Convert this conditional operation into a GenTree.
480 GenTree* ToGenTree(Compiler* comp);
481};
482
483/**
484 * A deref tree of an array expression.
485 * a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop, then, the tree would be:
486 * a => {
487 * i => {
488 * j => {
489 * k => {}
490 * },
491 * y => {
492 * k => {}
493 * },
494 * }
495 * },
496 * b => {
497 * i => {}
498 * }
499 */
500struct LC_Deref
501{
502 const LC_Array array;
503 JitExpandArrayStack<LC_Deref*>* children;
504
505 unsigned level;
506
507 LC_Deref(const LC_Array& array, unsigned level) : array(array), children(nullptr), level(level)
508 {
509 }
510
511 LC_Deref* Find(unsigned lcl);
512
513 unsigned Lcl();
514
515 bool HasChildren();
516 void EnsureChildren(CompAllocator alloc);
517 static LC_Deref* Find(JitExpandArrayStack<LC_Deref*>* children, unsigned lcl);
518
519 void DeriveLevelConditions(JitExpandArrayStack<JitExpandArrayStack<LC_Condition>*>* len);
520#ifdef DEBUG
521 void Print(unsigned indent = 0)
522 {
523 unsigned tab = 4 * indent;
524 printf("%*s%d,%d => {", tab, "", Lcl(), level);
525 if (children != nullptr)
526 {
527 for (unsigned i = 0; i < children->Size(); ++i)
528 {
529 if (i > 0)
530 {
531 printf(",");
532 }
533 printf("\n");
534#ifdef _MSC_VER
535 (*children)[i]->Print(indent + 1);
536#else // _MSC_VER
537 (*((JitExpandArray<LC_Deref*>*)children))[i]->Print(indent + 1);
538#endif // _MSC_VER
539 }
540 }
541 printf("\n%*s}", tab, "");
542 }
543#endif
544};
545
546/**
547 *
548 * The "context" represents data that is used for making loop-cloning decisions.
549 * - The data is the collection of optimization opportunities
550 * - and the conditions (LC_Condition) that decide between the fast
551 * path or the slow path.
552 *
553 * BNF for LC_Condition:
554 * LC_Condition : LC_Expr genTreeOps LC_Expr
555 * LC_Expr : LC_Ident | LC_Ident + Constant
556 * LC_Ident : Constant | Var | LC_Array
557 * LC_Array : .
558 * genTreeOps : GT_GE | GT_LE | GT_GT | GT_LT
559 *
560 */
561struct LoopCloneContext
562{
563 CompAllocator alloc; // The allocator
564 JitExpandArrayStack<LcOptInfo*>** optInfo; // The array of optimization opportunities found in each loop. (loop x
565 // optimization-opportunities)
566 JitExpandArrayStack<LC_Condition>** conditions; // The array of conditions that influence which path to take for
567 // each
568 // loop. (loop x cloning-conditions)
569 JitExpandArrayStack<LC_Array>** derefs; // The array of dereference conditions found in each loop. (loop x
570 // deref-conditions)
571 JitExpandArrayStack<JitExpandArrayStack<LC_Condition>*>** blockConditions; // The array of block levels of
572 // conditions for
573 // each loop. (loop x level x conditions)
574
575 LoopCloneContext(unsigned loopCount, CompAllocator alloc) : alloc(alloc)
576 {
577 optInfo = new (alloc) JitExpandArrayStack<LcOptInfo*>*[loopCount];
578 conditions = new (alloc) JitExpandArrayStack<LC_Condition>*[loopCount];
579 derefs = new (alloc) JitExpandArrayStack<LC_Array>*[loopCount];
580 blockConditions = new (alloc) JitExpandArrayStack<JitExpandArrayStack<LC_Condition>*>*[loopCount];
581 for (unsigned i = 0; i < loopCount; ++i)
582 {
583 optInfo[i] = nullptr;
584 conditions[i] = nullptr;
585 derefs[i] = nullptr;
586 blockConditions[i] = nullptr;
587 }
588 }
589
590 // Evaluate conditions into a JTRUE stmt and put it in the block. Reverse condition if 'reverse' is true.
591 void CondToStmtInBlock(Compiler* comp, JitExpandArrayStack<LC_Condition>& conds, BasicBlock* block, bool reverse);
592
593 // Get all the optimization information for loop "loopNum"; This information is held in "optInfo" array.
594 // If NULL this allocates the optInfo[loopNum] array for "loopNum"
595 JitExpandArrayStack<LcOptInfo*>* EnsureLoopOptInfo(unsigned loopNum);
596
597 // Get all the optimization information for loop "loopNum"; This information is held in "optInfo" array.
598 // If NULL this does not allocate the optInfo[loopNum] array for "loopNum"
599 JitExpandArrayStack<LcOptInfo*>* GetLoopOptInfo(unsigned loopNum);
600
601 // Cancel all optimizations for loop "loopNum" by clearing out the "conditions" member if non-null
602 // and setting the optInfo to "null.", If "null", then the user of this class is not supposed to
603 // clone this loop.
604 void CancelLoopOptInfo(unsigned loopNum);
605
606 // Get the conditions that decide which loop to take for "loopNum." If NULL allocate an empty array.
607 JitExpandArrayStack<LC_Condition>* EnsureConditions(unsigned loopNum);
608
609 // Get the conditions for loop. No allocation is performed.
610 JitExpandArrayStack<LC_Condition>* GetConditions(unsigned loopNum);
611
612 // Ensure that the "deref" conditions array is allocated.
613 JitExpandArrayStack<LC_Array>* EnsureDerefs(unsigned loopNum);
614
615 // Get block conditions for each loop, no allocation is performed.
616 JitExpandArrayStack<JitExpandArrayStack<LC_Condition>*>* GetBlockConditions(unsigned loopNum);
617
618 // Ensure that the block condition is present, if not allocate space.
619 JitExpandArrayStack<JitExpandArrayStack<LC_Condition>*>* EnsureBlockConditions(unsigned loopNum,
620 unsigned totalBlocks);
621
622 // Print the block conditions for the loop.
623 void PrintBlockConditions(unsigned loopNum);
624
625 // Does the loop have block conditions?
626 bool HasBlockConditions(unsigned loopNum);
627
628 // Evaluate the conditions for "loopNum" and indicate if they are either all true or any of them are false.
629 // "pAllTrue" implies all the conditions are statically known to be true.
630 // "pAnyFalse" implies at least one condition is statically known to be false.
631 // If neither of them are true, then some conditions' evaluations are statically unknown.
632 //
633 // If all conditions yield true, then the caller doesn't need to clone the loop, but it can perform
634 // fast path optimizations.
635 // If any condition yields false, then the caller needs to abort cloning the loop (neither clone nor
636 // fast path optimizations.)
637 //
638 // Assumes the conditions involve an AND join operator.
639 void EvaluateConditions(unsigned loopNum, bool* pAllTrue, bool* pAnyFalse DEBUGARG(bool verbose));
640
641private:
642 void OptimizeConditions(JitExpandArrayStack<LC_Condition>& conds);
643
644public:
645 // Optimize conditions to remove redundant conditions.
646 void OptimizeConditions(unsigned loopNum DEBUGARG(bool verbose));
647
648 void OptimizeBlockConditions(unsigned loopNum DEBUGARG(bool verbose));
649
650#ifdef DEBUG
651 void PrintConditions(unsigned loopNum);
652#endif
653};
654