1// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file
2// for details. All rights reserved. Use of this source code is governed by a
3// BSD-style license that can be found in the LICENSE file.
4
5#include "vm/compiler/backend/redundancy_elimination.h"
6
7#include "vm/bit_vector.h"
8#include "vm/compiler/backend/flow_graph.h"
9#include "vm/compiler/backend/il.h"
10#include "vm/compiler/backend/il_printer.h"
11#include "vm/compiler/backend/loops.h"
12#include "vm/hash_map.h"
13#include "vm/stack_frame.h"
14
15namespace dart {
16
17DEFINE_FLAG(bool, dead_store_elimination, true, "Eliminate dead stores");
18DEFINE_FLAG(bool, load_cse, true, "Use redundant load elimination.");
19DEFINE_FLAG(bool,
20 optimize_lazy_initializer_calls,
21 true,
22 "Eliminate redundant lazy initializer calls.");
23DEFINE_FLAG(bool,
24 trace_load_optimization,
25 false,
26 "Print live sets for load optimization pass.");
27
28// Quick access to the current zone.
29#define Z (zone())
30
31class CSEInstructionMap : public ValueObject {
32 public:
33 CSEInstructionMap() : map_() {}
34 explicit CSEInstructionMap(const CSEInstructionMap& other)
35 : ValueObject(), map_(other.map_) {}
36
37 Instruction* Lookup(Instruction* other) const {
38 ASSERT(other->AllowsCSE());
39 return map_.LookupValue(other);
40 }
41
42 void Insert(Instruction* instr) {
43 ASSERT(instr->AllowsCSE());
44 return map_.Insert(instr);
45 }
46
47 private:
48 typedef DirectChainedHashMap<PointerKeyValueTrait<Instruction> > Map;
49
50 Map map_;
51};
52
53// Place describes an abstract location (e.g. field) that IR can load
54// from or store to.
55//
56// Places are also used to describe wild-card locations also known as aliases,
57// that essentially represent sets of places that alias each other. Places A
58// and B are said to alias each other if store into A can affect load from B.
59//
60// We distinguish the following aliases:
61//
62// - for fields
63// - *.f - field inside some object;
64// - X.f - field inside an allocated object X;
65// - f - static fields
66//
67// - for indexed accesses
68// - *[*] - non-constant index inside some object;
69// - *[C] - constant index inside some object;
70// - X[*] - non-constant index inside an allocated object X;
71// - X[C] - constant index inside an allocated object X.
72//
73// Constant indexed places are divided into two subcategories:
74//
75// - Access to homogeneous array-like objects: Array, ImmutableArray,
76// OneByteString, TwoByteString. These objects can only be accessed
77// on element by element basis with all elements having the same size.
78// This means X[C] aliases X[K] if and only if C === K.
79// - TypedData accesses. TypedData allow to read one of the primitive
80// data types at the given byte offset. When TypedData is accessed through
81// index operator on a typed array or a typed array view it is guaranteed
82// that the byte offset is always aligned by the element size. We write
83// these accesses as X[C|S], where C is constant byte offset and S is size
84// of the data type. Obviously X[C|S] and X[K|U] alias if and only if either
85// C = RoundDown(K, S) or K = RoundDown(C, U).
86// Note that not all accesses to typed data are aligned: e.g. ByteData
87// allows unanaligned access through it's get*/set* methods.
88// Check in Place::SetIndex ensures that we never create a place X[C|S]
89// such that C is not aligned by S.
90//
91// Separating allocations from other objects improves precision of the
92// load forwarding pass because of the following two properties:
93//
94// - if X can be proven to have no aliases itself (i.e. there is no other SSA
95// variable that points to X) then no place inside X can be aliased with any
96// wildcard dependent place (*.f, *.@offs, *[*], *[C]);
97// - given allocations X and Y no place inside X can be aliased with any place
98// inside Y even if any of them or both escape.
99//
100// It is important to realize that single place can belong to multiple aliases.
101// For example place X.f with aliased allocation X belongs both to X.f and *.f
102// aliases. Likewise X[C] with non-aliased allocation X belongs to X[C] and X[*]
103// aliases.
104//
105class Place : public ValueObject {
106 public:
107 enum Kind {
108 kNone,
109
110 // Static field location. Is represented as a Field object with a
111 // nullptr instance.
112 kStaticField,
113
114 // Instance field location. It is reprensented by a pair of instance
115 // and a Slot.
116 kInstanceField,
117
118 // Indexed location with a non-constant index.
119 kIndexed,
120
121 // Indexed location with a constant index.
122 kConstantIndexed,
123 };
124
125 // Size of the element accessed by constant index. Size is only important
126 // for TypedData because those accesses can alias even when constant indexes
127 // are not the same: X[0|4] aliases X[0|2] and X[2|2].
128 enum ElementSize {
129 // If indexed access is not a TypedData access then element size is not
130 // important because there is only a single possible access size depending
131 // on the receiver - X[C] aliases X[K] if and only if C == K.
132 // This is the size set for Array, ImmutableArray, OneByteString and
133 // TwoByteString accesses.
134 kNoSize,
135
136 // 1 byte (Int8List, Uint8List, Uint8ClampedList).
137 kInt8,
138
139 // 2 bytes (Int16List, Uint16List).
140 kInt16,
141
142 // 4 bytes (Int32List, Uint32List, Float32List).
143 kInt32,
144
145 // 8 bytes (Int64List, Uint64List, Float64List).
146 kInt64,
147
148 // 16 bytes (Int32x4List, Float32x4List, Float64x2List).
149 kInt128,
150
151 kLargestElementSize = kInt128,
152 };
153
154 Place(const Place& other)
155 : ValueObject(),
156 flags_(other.flags_),
157 instance_(other.instance_),
158 raw_selector_(other.raw_selector_),
159 id_(other.id_) {}
160
161 // Construct a place from instruction if instruction accesses any place.
162 // Otherwise constructs kNone place.
163 Place(Instruction* instr, bool* is_load, bool* is_store)
164 : flags_(0), instance_(nullptr), raw_selector_(0), id_(0) {
165 switch (instr->tag()) {
166 case Instruction::kLoadField: {
167 LoadFieldInstr* load_field = instr->AsLoadField();
168 set_representation(load_field->representation());
169 instance_ = load_field->instance()->definition()->OriginalDefinition();
170 set_kind(kInstanceField);
171 instance_field_ = &load_field->slot();
172 *is_load = true;
173 break;
174 }
175
176 case Instruction::kStoreInstanceField: {
177 StoreInstanceFieldInstr* store = instr->AsStoreInstanceField();
178 set_representation(store->RequiredInputRepresentation(
179 StoreInstanceFieldInstr::kValuePos));
180 instance_ = store->instance()->definition()->OriginalDefinition();
181 set_kind(kInstanceField);
182 instance_field_ = &store->slot();
183 *is_store = true;
184 break;
185 }
186
187 case Instruction::kLoadStaticField:
188 set_kind(kStaticField);
189 set_representation(instr->AsLoadStaticField()->representation());
190 static_field_ = &instr->AsLoadStaticField()->field();
191 *is_load = true;
192 break;
193
194 case Instruction::kStoreStaticField:
195 set_kind(kStaticField);
196 set_representation(
197 instr->AsStoreStaticField()->RequiredInputRepresentation(
198 StoreStaticFieldInstr::kValuePos));
199 static_field_ = &instr->AsStoreStaticField()->field();
200 *is_store = true;
201 break;
202
203 case Instruction::kLoadIndexed: {
204 LoadIndexedInstr* load_indexed = instr->AsLoadIndexed();
205 set_representation(load_indexed->representation());
206 instance_ = load_indexed->array()->definition()->OriginalDefinition();
207 SetIndex(load_indexed->index()->definition()->OriginalDefinition(),
208 load_indexed->index_scale(), load_indexed->class_id());
209 *is_load = true;
210 break;
211 }
212
213 case Instruction::kStoreIndexed: {
214 StoreIndexedInstr* store_indexed = instr->AsStoreIndexed();
215 set_representation(store_indexed->RequiredInputRepresentation(
216 StoreIndexedInstr::kValuePos));
217 instance_ = store_indexed->array()->definition()->OriginalDefinition();
218 SetIndex(store_indexed->index()->definition()->OriginalDefinition(),
219 store_indexed->index_scale(), store_indexed->class_id());
220 *is_store = true;
221 break;
222 }
223
224 default:
225 break;
226 }
227 }
228
229 bool IsConstant(Object* value) const {
230 switch (kind()) {
231 case kInstanceField:
232 return (instance() != nullptr) && instance()->IsConstant() &&
233 LoadFieldInstr::TryEvaluateLoad(
234 instance()->AsConstant()->constant_value(), instance_field(),
235 value);
236 default:
237 return false;
238 }
239 }
240
241 // Create object representing *[*] alias.
242 static Place* CreateAnyInstanceAnyIndexAlias(Zone* zone, intptr_t id) {
243 return Wrap(
244 zone, Place(EncodeFlags(kIndexed, kNoRepresentation, kNoSize), NULL, 0),
245 id);
246 }
247
248 // Return least generic alias for this place. Given that aliases are
249 // essentially sets of places we define least generic alias as a smallest
250 // alias that contains this place.
251 //
252 // We obtain such alias by a simple transformation:
253 //
254 // - for places that depend on an instance X.f, X.@offs, X[i], X[C]
255 // we drop X if X is not an allocation because in this case X does not
256 // possess an identity obtaining aliases *.f, *.@offs, *[i] and *[C]
257 // respectively;
258 // - for non-constant indexed places X[i] we drop information about the
259 // index obtaining alias X[*].
260 // - we drop information about representation, but keep element size
261 // if any.
262 //
263 Place ToAlias() const {
264 return Place(
265 RepresentationBits::update(kNoRepresentation, flags_),
266 (DependsOnInstance() && IsAllocation(instance())) ? instance() : NULL,
267 (kind() == kIndexed) ? 0 : raw_selector_);
268 }
269
270 bool DependsOnInstance() const {
271 switch (kind()) {
272 case kInstanceField:
273 case kIndexed:
274 case kConstantIndexed:
275 return true;
276
277 case kStaticField:
278 case kNone:
279 return false;
280 }
281
282 UNREACHABLE();
283 return false;
284 }
285
286 // Given instance dependent alias X.f, X.@offs, X[C], X[*] return
287 // wild-card dependent alias *.f, *.@offs, *[C] or *[*] respectively.
288 Place CopyWithoutInstance() const {
289 ASSERT(DependsOnInstance());
290 return Place(flags_, NULL, raw_selector_);
291 }
292
293 // Given alias X[C] or *[C] return X[*] and *[*] respectively.
294 Place CopyWithoutIndex() const {
295 ASSERT(kind() == kConstantIndexed);
296 return Place(EncodeFlags(kIndexed, kNoRepresentation, kNoSize), instance_,
297 0);
298 }
299
300 // Given alias X[ByteOffs|S] and a larger element size S', return
301 // alias X[RoundDown(ByteOffs, S')|S'] - this is the byte offset of a larger
302 // typed array element that contains this typed array element.
303 // In other words this method computes the only possible place with the given
304 // size that can alias this place (due to alignment restrictions).
305 // For example for X[9|kInt8] and target size kInt32 we would return
306 // X[8|kInt32].
307 Place ToLargerElement(ElementSize to) const {
308 ASSERT(kind() == kConstantIndexed);
309 ASSERT(element_size() != kNoSize);
310 ASSERT(element_size() < to);
311 return Place(ElementSizeBits::update(to, flags_), instance_,
312 RoundByteOffset(to, index_constant_));
313 }
314
315 // Given alias X[ByteOffs|S], smaller element size S' and index from 0 to
316 // S/S' - 1 return alias X[ByteOffs + S'*index|S'] - this is the byte offset
317 // of a smaller typed array element which is contained within this typed
318 // array element.
319 // For example X[8|kInt32] contains inside X[8|kInt16] (index is 0) and
320 // X[10|kInt16] (index is 1).
321 Place ToSmallerElement(ElementSize to, intptr_t index) const {
322 ASSERT(kind() == kConstantIndexed);
323 ASSERT(element_size() != kNoSize);
324 ASSERT(element_size() > to);
325 ASSERT(index >= 0);
326 ASSERT(index <
327 ElementSizeMultiplier(element_size()) / ElementSizeMultiplier(to));
328 return Place(ElementSizeBits::update(to, flags_), instance_,
329 ByteOffsetToSmallerElement(to, index, index_constant_));
330 }
331
332 intptr_t id() const { return id_; }
333
334 Kind kind() const { return KindBits::decode(flags_); }
335
336 Representation representation() const {
337 return RepresentationBits::decode(flags_);
338 }
339
340 Definition* instance() const {
341 ASSERT(DependsOnInstance());
342 return instance_;
343 }
344
345 void set_instance(Definition* def) {
346 ASSERT(DependsOnInstance());
347 instance_ = def->OriginalDefinition();
348 }
349
350 const Field& static_field() const {
351 ASSERT(kind() == kStaticField);
352 ASSERT(static_field_->is_static());
353 return *static_field_;
354 }
355
356 const Slot& instance_field() const {
357 ASSERT(kind() == kInstanceField);
358 return *instance_field_;
359 }
360
361 Definition* index() const {
362 ASSERT(kind() == kIndexed);
363 return index_;
364 }
365
366 ElementSize element_size() const { return ElementSizeBits::decode(flags_); }
367
368 intptr_t index_constant() const {
369 ASSERT(kind() == kConstantIndexed);
370 return index_constant_;
371 }
372
373 static const char* DefinitionName(Definition* def) {
374 if (def == NULL) {
375 return "*";
376 } else {
377 return Thread::Current()->zone()->PrintToString("v%" Pd,
378 def->ssa_temp_index());
379 }
380 }
381
382 const char* ToCString() const {
383 switch (kind()) {
384 case kNone:
385 return "<none>";
386
387 case kStaticField: {
388 const char* field_name =
389 String::Handle(static_field().name()).ToCString();
390 return Thread::Current()->zone()->PrintToString("<%s>", field_name);
391 }
392
393 case kInstanceField:
394 return Thread::Current()->zone()->PrintToString(
395 "<%s.%s[%p]>", DefinitionName(instance()), instance_field().Name(),
396 &instance_field());
397
398 case kIndexed:
399 return Thread::Current()->zone()->PrintToString(
400 "<%s[%s]>", DefinitionName(instance()), DefinitionName(index()));
401
402 case kConstantIndexed:
403 if (element_size() == kNoSize) {
404 return Thread::Current()->zone()->PrintToString(
405 "<%s[%" Pd "]>", DefinitionName(instance()), index_constant());
406 } else {
407 return Thread::Current()->zone()->PrintToString(
408 "<%s[%" Pd "|%" Pd "]>", DefinitionName(instance()),
409 index_constant(), ElementSizeMultiplier(element_size()));
410 }
411 }
412 UNREACHABLE();
413 return "<?>";
414 }
415
416 // Fields that are considered immutable by load optimization.
417 // Handle static finals as non-final with precompilation because
418 // they may be reset to uninitialized after compilation.
419 bool IsImmutableField() const {
420 switch (kind()) {
421 case kInstanceField:
422 return instance_field().is_immutable();
423 case kStaticField:
424 return static_field().is_final() && !FLAG_fields_may_be_reset;
425 default:
426 return false;
427 }
428 }
429
430 intptr_t Hashcode() const {
431 return (flags_ * 63 + reinterpret_cast<intptr_t>(instance_)) * 31 +
432 FieldHashcode();
433 }
434
435 bool Equals(const Place* other) const {
436 return (flags_ == other->flags_) && (instance_ == other->instance_) &&
437 SameField(other);
438 }
439
440 // Create a zone allocated copy of this place and assign given id to it.
441 static Place* Wrap(Zone* zone, const Place& place, intptr_t id);
442
443 static bool IsAllocation(Definition* defn) {
444 return (defn != NULL) &&
445 (defn->IsAllocateObject() || defn->IsCreateArray() ||
446 defn->IsAllocateUninitializedContext() ||
447 (defn->IsStaticCall() &&
448 defn->AsStaticCall()->IsRecognizedFactory()));
449 }
450
451 private:
452 Place(uword flags, Definition* instance, intptr_t selector)
453 : flags_(flags), instance_(instance), raw_selector_(selector), id_(0) {}
454
455 bool SameField(const Place* other) const {
456 return (kind() == kStaticField)
457 ? (static_field().Original() == other->static_field().Original())
458 : (raw_selector_ == other->raw_selector_);
459 }
460
461 intptr_t FieldHashcode() const {
462 return (kind() == kStaticField)
463 ? String::Handle(Field::Handle(static_field().Original()).name())
464 .Hash()
465 : raw_selector_;
466 }
467
468 void set_representation(Representation rep) {
469 flags_ = RepresentationBits::update(rep, flags_);
470 }
471
472 void set_kind(Kind kind) { flags_ = KindBits::update(kind, flags_); }
473
474 void set_element_size(ElementSize scale) {
475 flags_ = ElementSizeBits::update(scale, flags_);
476 }
477
478 void SetIndex(Definition* index, intptr_t scale, intptr_t class_id) {
479 ConstantInstr* index_constant = index->AsConstant();
480 if ((index_constant != NULL) && index_constant->value().IsSmi()) {
481 const intptr_t index_value = Smi::Cast(index_constant->value()).Value();
482 const ElementSize size = ElementSizeFor(class_id);
483 const bool is_typed_access = (size != kNoSize);
484 // Indexing into [RawTypedDataView]/[RawExternalTypedData happens via a
485 // untagged load of the `_data` field (which points to C memory).
486 //
487 // Indexing into dart:ffi's [RawPointer] happens via loading of the
488 // `c_memory_address_`, converting it to an integer, doing some arithmetic
489 // and finally using IntConverterInstr to convert to a untagged
490 // representation.
491 //
492 // In both cases the array used for load/store has untagged
493 // representation.
494 const bool can_be_view = instance_->representation() == kUntagged;
495
496 // If we are writing into the typed data scale the index to
497 // get byte offset. Otherwise ignore the scale.
498 if (!is_typed_access) {
499 scale = 1;
500 }
501
502 // Guard against potential multiplication overflow and negative indices.
503 if ((0 <= index_value) && (index_value < (kMaxInt32 / scale))) {
504 const intptr_t scaled_index = index_value * scale;
505
506 // Guard against unaligned byte offsets and access through raw
507 // memory pointer (which can be pointing into another typed data).
508 if (!is_typed_access ||
509 (!can_be_view &&
510 Utils::IsAligned(scaled_index, ElementSizeMultiplier(size)))) {
511 set_kind(kConstantIndexed);
512 set_element_size(size);
513 index_constant_ = scaled_index;
514 return;
515 }
516 }
517
518 // Fallthrough: create generic _[*] place.
519 }
520
521 set_kind(kIndexed);
522 index_ = index;
523 }
524
525 static uword EncodeFlags(Kind kind, Representation rep, ElementSize scale) {
526 ASSERT((kind == kConstantIndexed) || (scale == kNoSize));
527 return KindBits::encode(kind) | RepresentationBits::encode(rep) |
528 ElementSizeBits::encode(scale);
529 }
530
531 static ElementSize ElementSizeFor(intptr_t class_id) {
532 switch (class_id) {
533 case kArrayCid:
534 case kImmutableArrayCid:
535 case kOneByteStringCid:
536 case kTwoByteStringCid:
537 case kExternalOneByteStringCid:
538 case kExternalTwoByteStringCid:
539 // Object arrays and strings do not allow accessing them through
540 // different types. No need to attach scale.
541 return kNoSize;
542
543 case kTypedDataInt8ArrayCid:
544 case kTypedDataUint8ArrayCid:
545 case kTypedDataUint8ClampedArrayCid:
546 case kExternalTypedDataUint8ArrayCid:
547 case kExternalTypedDataUint8ClampedArrayCid:
548 return kInt8;
549
550 case kTypedDataInt16ArrayCid:
551 case kTypedDataUint16ArrayCid:
552 return kInt16;
553
554 case kTypedDataInt32ArrayCid:
555 case kTypedDataUint32ArrayCid:
556 case kTypedDataFloat32ArrayCid:
557 return kInt32;
558
559 case kTypedDataInt64ArrayCid:
560 case kTypedDataUint64ArrayCid:
561 case kTypedDataFloat64ArrayCid:
562 return kInt64;
563
564 case kTypedDataInt32x4ArrayCid:
565 case kTypedDataFloat32x4ArrayCid:
566 case kTypedDataFloat64x2ArrayCid:
567 return kInt128;
568
569 default:
570 UNREACHABLE();
571 return kNoSize;
572 }
573 }
574
575 static intptr_t ElementSizeMultiplier(ElementSize size) {
576 return 1 << (static_cast<intptr_t>(size) - static_cast<intptr_t>(kInt8));
577 }
578
579 static intptr_t RoundByteOffset(ElementSize size, intptr_t offset) {
580 return offset & ~(ElementSizeMultiplier(size) - 1);
581 }
582
583 static intptr_t ByteOffsetToSmallerElement(ElementSize size,
584 intptr_t index,
585 intptr_t base_offset) {
586 return base_offset + index * ElementSizeMultiplier(size);
587 }
588
589 class KindBits : public BitField<uword, Kind, 0, 3> {};
590 class RepresentationBits
591 : public BitField<uword, Representation, KindBits::kNextBit, 11> {};
592 class ElementSizeBits
593 : public BitField<uword, ElementSize, RepresentationBits::kNextBit, 3> {};
594
595 uword flags_;
596 Definition* instance_;
597 union {
598 intptr_t raw_selector_;
599 const Field* static_field_;
600 const Slot* instance_field_;
601 intptr_t index_constant_;
602 Definition* index_;
603 };
604
605 intptr_t id_;
606};
607
608class ZonePlace : public ZoneAllocated {
609 public:
610 explicit ZonePlace(const Place& place) : place_(place) {}
611
612 Place* place() { return &place_; }
613
614 private:
615 Place place_;
616};
617
618Place* Place::Wrap(Zone* zone, const Place& place, intptr_t id) {
619 Place* wrapped = (new (zone) ZonePlace(place))->place();
620 wrapped->id_ = id;
621 return wrapped;
622}
623
624// Correspondence between places connected through outgoing phi moves on the
625// edge that targets join.
626class PhiPlaceMoves : public ZoneAllocated {
627 public:
628 // Record a move from the place with id |from| to the place with id |to| at
629 // the given block.
630 void CreateOutgoingMove(Zone* zone,
631 BlockEntryInstr* block,
632 intptr_t from,
633 intptr_t to) {
634 const intptr_t block_num = block->preorder_number();
635 moves_.EnsureLength(block_num + 1, nullptr);
636
637 if (moves_[block_num] == nullptr) {
638 moves_[block_num] = new (zone) ZoneGrowableArray<Move>(5);
639 }
640
641 moves_[block_num]->Add(Move(from, to));
642 }
643
644 class Move {
645 public:
646 Move(intptr_t from, intptr_t to) : from_(from), to_(to) {}
647
648 intptr_t from() const { return from_; }
649 intptr_t to() const { return to_; }
650
651 private:
652 intptr_t from_;
653 intptr_t to_;
654 };
655
656 typedef const ZoneGrowableArray<Move>* MovesList;
657
658 MovesList GetOutgoingMoves(BlockEntryInstr* block) const {
659 const intptr_t block_num = block->preorder_number();
660 return (block_num < moves_.length()) ? moves_[block_num] : NULL;
661 }
662
663 private:
664 GrowableArray<ZoneGrowableArray<Move>*> moves_;
665};
666
667// A map from aliases to a set of places sharing the alias. Additionally
668// carries a set of places that can be aliased by side-effects, essentially
669// those that are affected by calls.
670class AliasedSet : public ZoneAllocated {
671 public:
672 AliasedSet(Zone* zone,
673 DirectChainedHashMap<PointerKeyValueTrait<Place> >* places_map,
674 ZoneGrowableArray<Place*>* places,
675 PhiPlaceMoves* phi_moves)
676 : zone_(zone),
677 places_map_(places_map),
678 places_(*places),
679 phi_moves_(phi_moves),
680 aliases_(5),
681 aliases_map_(),
682 typed_data_access_sizes_(),
683 representatives_(),
684 killed_(),
685 aliased_by_effects_(new (zone) BitVector(zone, places->length())) {
686 InsertAlias(Place::CreateAnyInstanceAnyIndexAlias(
687 zone_, kAnyInstanceAnyIndexAlias));
688 for (intptr_t i = 0; i < places_.length(); i++) {
689 AddRepresentative(places_[i]);
690 }
691 ComputeKillSets();
692 }
693
694 intptr_t LookupAliasId(const Place& alias) {
695 const Place* result = aliases_map_.LookupValue(&alias);
696 return (result != NULL) ? result->id() : static_cast<intptr_t>(kNoAlias);
697 }
698
699 BitVector* GetKilledSet(intptr_t alias) {
700 return (alias < killed_.length()) ? killed_[alias] : NULL;
701 }
702
703 intptr_t max_place_id() const { return places().length(); }
704 bool IsEmpty() const { return max_place_id() == 0; }
705
706 BitVector* aliased_by_effects() const { return aliased_by_effects_; }
707
708 const ZoneGrowableArray<Place*>& places() const { return places_; }
709
710 Place* LookupCanonical(Place* place) const {
711 return places_map_->LookupValue(place);
712 }
713
714 void PrintSet(BitVector* set) {
715 bool comma = false;
716 for (BitVector::Iterator it(set); !it.Done(); it.Advance()) {
717 if (comma) {
718 THR_Print(", ");
719 }
720 THR_Print("%s", places_[it.Current()]->ToCString());
721 comma = true;
722 }
723 }
724
725 const PhiPlaceMoves* phi_moves() const { return phi_moves_; }
726
727 void RollbackAliasedIdentites() {
728 for (intptr_t i = 0; i < identity_rollback_.length(); ++i) {
729 identity_rollback_[i]->SetIdentity(AliasIdentity::Unknown());
730 }
731 }
732
733 // Returns false if the result of an allocation instruction can't be aliased
734 // by another SSA variable and true otherwise.
735 bool CanBeAliased(Definition* alloc) {
736 if (!Place::IsAllocation(alloc)) {
737 return true;
738 }
739
740 if (alloc->Identity().IsUnknown()) {
741 ComputeAliasing(alloc);
742 }
743
744 return !alloc->Identity().IsNotAliased();
745 }
746
747 enum { kNoAlias = 0 };
748
749 private:
750 enum {
751 // Artificial alias that is used to collect all representatives of the
752 // *[C], X[C] aliases for arbitrary C.
753 kAnyConstantIndexedAlias = 1,
754
755 // Artificial alias that is used to collect all representatives of
756 // *[C] alias for arbitrary C.
757 kUnknownInstanceConstantIndexedAlias = 2,
758
759 // Artificial alias that is used to collect all representatives of
760 // X[*] alias for all X.
761 kAnyAllocationIndexedAlias = 3,
762
763 // *[*] alias.
764 kAnyInstanceAnyIndexAlias = 4
765 };
766
767 // Compute least generic alias for the place and assign alias id to it.
768 void AddRepresentative(Place* place) {
769 if (!place->IsImmutableField()) {
770 const Place* alias = CanonicalizeAlias(place->ToAlias());
771 EnsureSet(&representatives_, alias->id())->Add(place->id());
772
773 // Update cumulative representative sets that are used during
774 // killed sets computation.
775 if (alias->kind() == Place::kConstantIndexed) {
776 if (CanBeAliased(alias->instance())) {
777 EnsureSet(&representatives_, kAnyConstantIndexedAlias)
778 ->Add(place->id());
779 }
780
781 if (alias->instance() == NULL) {
782 EnsureSet(&representatives_, kUnknownInstanceConstantIndexedAlias)
783 ->Add(place->id());
784 }
785
786 // Collect all element sizes used to access TypedData arrays in
787 // the function. This is used to skip sizes without representatives
788 // when computing kill sets.
789 if (alias->element_size() != Place::kNoSize) {
790 typed_data_access_sizes_.Add(alias->element_size());
791 }
792 } else if ((alias->kind() == Place::kIndexed) &&
793 CanBeAliased(place->instance())) {
794 EnsureSet(&representatives_, kAnyAllocationIndexedAlias)
795 ->Add(place->id());
796 }
797
798 if (!IsIndependentFromEffects(place)) {
799 aliased_by_effects_->Add(place->id());
800 }
801 }
802 }
803
804 void ComputeKillSets() {
805 for (intptr_t i = 0; i < aliases_.length(); ++i) {
806 const Place* alias = aliases_[i];
807 // Add all representatives to the kill set.
808 AddAllRepresentatives(alias->id(), alias->id());
809 ComputeKillSet(alias);
810 }
811
812 if (FLAG_trace_load_optimization) {
813 THR_Print("Aliases KILL sets:\n");
814 for (intptr_t i = 0; i < aliases_.length(); ++i) {
815 const Place* alias = aliases_[i];
816 BitVector* kill = GetKilledSet(alias->id());
817
818 THR_Print("%s: ", alias->ToCString());
819 if (kill != NULL) {
820 PrintSet(kill);
821 }
822 THR_Print("\n");
823 }
824 }
825 }
826
827 void InsertAlias(const Place* alias) {
828 aliases_map_.Insert(alias);
829 aliases_.Add(alias);
830 }
831
832 const Place* CanonicalizeAlias(const Place& alias) {
833 const Place* canonical = aliases_map_.LookupValue(&alias);
834 if (canonical == NULL) {
835 canonical = Place::Wrap(zone_, alias,
836 kAnyInstanceAnyIndexAlias + aliases_.length());
837 InsertAlias(canonical);
838 }
839 ASSERT(aliases_map_.LookupValue(&alias) == canonical);
840 return canonical;
841 }
842
843 BitVector* GetRepresentativesSet(intptr_t alias) {
844 return (alias < representatives_.length()) ? representatives_[alias] : NULL;
845 }
846
847 BitVector* EnsureSet(GrowableArray<BitVector*>* sets, intptr_t alias) {
848 while (sets->length() <= alias) {
849 sets->Add(NULL);
850 }
851
852 BitVector* set = (*sets)[alias];
853 if (set == NULL) {
854 (*sets)[alias] = set = new (zone_) BitVector(zone_, max_place_id());
855 }
856 return set;
857 }
858
859 void AddAllRepresentatives(const Place* to, intptr_t from) {
860 AddAllRepresentatives(to->id(), from);
861 }
862
863 void AddAllRepresentatives(intptr_t to, intptr_t from) {
864 BitVector* from_set = GetRepresentativesSet(from);
865 if (from_set != NULL) {
866 EnsureSet(&killed_, to)->AddAll(from_set);
867 }
868 }
869
870 void CrossAlias(const Place* to, const Place& from) {
871 const intptr_t from_id = LookupAliasId(from);
872 if (from_id == kNoAlias) {
873 return;
874 }
875 CrossAlias(to, from_id);
876 }
877
878 void CrossAlias(const Place* to, intptr_t from) {
879 AddAllRepresentatives(to->id(), from);
880 AddAllRepresentatives(from, to->id());
881 }
882
883 // When computing kill sets we let less generic alias insert its
884 // representatives into more generic alias'es kill set. For example
885 // when visiting alias X[*] instead of searching for all aliases X[C]
886 // and inserting their representatives into kill set for X[*] we update
887 // kill set for X[*] each time we visit new X[C] for some C.
888 // There is an exception however: if both aliases are parametric like *[C]
889 // and X[*] which cross alias when X is an aliased allocation then we use
890 // artificial aliases that contain all possible representatives for the given
891 // alias for any value of the parameter to compute resulting kill set.
892 void ComputeKillSet(const Place* alias) {
893 switch (alias->kind()) {
894 case Place::kIndexed: // Either *[*] or X[*] alias.
895 if (alias->instance() == NULL) {
896 // *[*] aliases with X[*], X[C], *[C].
897 AddAllRepresentatives(alias, kAnyConstantIndexedAlias);
898 AddAllRepresentatives(alias, kAnyAllocationIndexedAlias);
899 } else if (CanBeAliased(alias->instance())) {
900 // X[*] aliases with X[C].
901 // If X can be aliased then X[*] also aliases with *[C], *[*].
902 CrossAlias(alias, kAnyInstanceAnyIndexAlias);
903 AddAllRepresentatives(alias, kUnknownInstanceConstantIndexedAlias);
904 }
905 break;
906
907 case Place::kConstantIndexed: // Either X[C] or *[C] alias.
908 if (alias->element_size() != Place::kNoSize) {
909 const bool has_aliased_instance =
910 (alias->instance() != NULL) && CanBeAliased(alias->instance());
911
912 // If this is a TypedData access then X[C|S] aliases larger elements
913 // covering this one X[RoundDown(C, S')|S'] for all S' > S and
914 // all smaller elements being covered by this one X[C'|S'] for
915 // some S' < S and all C' such that C = RoundDown(C', S).
916 // In the loop below it's enough to only propagate aliasing to
917 // larger aliases because propagation is symmetric: smaller aliases
918 // (if there are any) would update kill set for this alias when they
919 // are visited.
920 for (intptr_t i = static_cast<intptr_t>(alias->element_size()) + 1;
921 i <= Place::kLargestElementSize; i++) {
922 // Skip element sizes that a guaranteed to have no representatives.
923 if (!typed_data_access_sizes_.Contains(alias->element_size())) {
924 continue;
925 }
926
927 // X[C|S] aliases with X[RoundDown(C, S')|S'] and likewise
928 // *[C|S] aliases with *[RoundDown(C, S')|S'].
929 CrossAlias(alias, alias->ToLargerElement(
930 static_cast<Place::ElementSize>(i)));
931 }
932
933 if (has_aliased_instance) {
934 // If X is an aliased instance then X[C|S] aliases *[C'|S'] for all
935 // related combinations of C' and S'.
936 // Caveat: this propagation is not symmetric (we would not know
937 // to propagate aliasing from *[C'|S'] to X[C|S] when visiting
938 // *[C'|S']) and thus we need to handle both element sizes smaller
939 // and larger than S.
940 const Place no_instance_alias = alias->CopyWithoutInstance();
941 for (intptr_t i = Place::kInt8; i <= Place::kLargestElementSize;
942 i++) {
943 // Skip element sizes that a guaranteed to have no
944 // representatives.
945 if (!typed_data_access_sizes_.Contains(alias->element_size())) {
946 continue;
947 }
948
949 const auto other_size = static_cast<Place::ElementSize>(i);
950 if (other_size > alias->element_size()) {
951 // X[C|S] aliases all larger elements which cover it:
952 // *[RoundDown(C, S')|S'] for S' > S.
953 CrossAlias(alias,
954 no_instance_alias.ToLargerElement(other_size));
955 } else if (other_size < alias->element_size()) {
956 // X[C|S] aliases all sub-elements of smaller size:
957 // *[C+j*S'|S'] for S' < S and j from 0 to S/S' - 1.
958 const auto num_smaller_elements =
959 1 << (alias->element_size() - other_size);
960 for (intptr_t j = 0; j < num_smaller_elements; j++) {
961 CrossAlias(alias,
962 no_instance_alias.ToSmallerElement(other_size, j));
963 }
964 }
965 }
966 }
967 }
968
969 if (alias->instance() == NULL) {
970 // *[C] aliases with X[C], X[*], *[*].
971 AddAllRepresentatives(alias, kAnyAllocationIndexedAlias);
972 CrossAlias(alias, kAnyInstanceAnyIndexAlias);
973 } else {
974 // X[C] aliases with X[*].
975 // If X can be aliased then X[C] also aliases with *[C], *[*].
976 CrossAlias(alias, alias->CopyWithoutIndex());
977 if (CanBeAliased(alias->instance())) {
978 CrossAlias(alias, alias->CopyWithoutInstance());
979 CrossAlias(alias, kAnyInstanceAnyIndexAlias);
980 }
981 }
982 break;
983
984 case Place::kStaticField:
985 // Nothing to do.
986 break;
987
988 case Place::kInstanceField:
989 if (CanBeAliased(alias->instance())) {
990 // X.f alias with *.f.
991 CrossAlias(alias, alias->CopyWithoutInstance());
992 }
993 break;
994
995 case Place::kNone:
996 UNREACHABLE();
997 }
998 }
999
1000 // Returns true if the given load is unaffected by external side-effects.
1001 // This essentially means that no stores to the same location can
1002 // occur in other functions.
1003 bool IsIndependentFromEffects(Place* place) {
1004 if (place->IsImmutableField()) {
1005 return true;
1006 }
1007
1008 return (place->kind() == Place::kInstanceField) &&
1009 !CanBeAliased(place->instance());
1010 }
1011
1012 // Returns true if there are direct loads from the given place.
1013 bool HasLoadsFromPlace(Definition* defn, const Place* place) {
1014 ASSERT(place->kind() == Place::kInstanceField);
1015
1016 for (Value* use = defn->input_use_list(); use != NULL;
1017 use = use->next_use()) {
1018 Instruction* instr = use->instruction();
1019 if (UseIsARedefinition(use) &&
1020 HasLoadsFromPlace(instr->Cast<Definition>(), place)) {
1021 return true;
1022 }
1023 bool is_load = false, is_store;
1024 Place load_place(instr, &is_load, &is_store);
1025
1026 if (is_load && load_place.Equals(place)) {
1027 return true;
1028 }
1029 }
1030
1031 return false;
1032 }
1033
1034 // Returns true if the given [use] is a redefinition (e.g. RedefinitionInstr,
1035 // CheckNull, CheckArrayBound, etc).
1036 static bool UseIsARedefinition(Value* use) {
1037 Instruction* instr = use->instruction();
1038 return instr->IsDefinition() &&
1039 (instr->Cast<Definition>()->RedefinedValue() == use);
1040 }
1041
1042 // Check if any use of the definition can create an alias.
1043 // Can add more objects into aliasing_worklist_.
1044 bool AnyUseCreatesAlias(Definition* defn) {
1045 for (Value* use = defn->input_use_list(); use != NULL;
1046 use = use->next_use()) {
1047 Instruction* instr = use->instruction();
1048 if (instr->HasUnknownSideEffects() || instr->IsLoadUntagged() ||
1049 (instr->IsStoreIndexed() &&
1050 (use->use_index() == StoreIndexedInstr::kValuePos)) ||
1051 instr->IsStoreStaticField() || instr->IsPhi()) {
1052 return true;
1053 } else if (UseIsARedefinition(use) &&
1054 AnyUseCreatesAlias(instr->Cast<Definition>())) {
1055 return true;
1056 } else if ((instr->IsStoreInstanceField() &&
1057 (use->use_index() !=
1058 StoreInstanceFieldInstr::kInstancePos))) {
1059 ASSERT(use->use_index() == StoreInstanceFieldInstr::kValuePos);
1060 // If we store this value into an object that is not aliased itself
1061 // and we never load again then the store does not create an alias.
1062 StoreInstanceFieldInstr* store = instr->AsStoreInstanceField();
1063 Definition* instance =
1064 store->instance()->definition()->OriginalDefinition();
1065 if (Place::IsAllocation(instance) &&
1066 !instance->Identity().IsAliased()) {
1067 bool is_load, is_store;
1068 Place store_place(instr, &is_load, &is_store);
1069
1070 if (!HasLoadsFromPlace(instance, &store_place)) {
1071 // No loads found that match this store. If it is yet unknown if
1072 // the object is not aliased then optimistically assume this but
1073 // add it to the worklist to check its uses transitively.
1074 if (instance->Identity().IsUnknown()) {
1075 instance->SetIdentity(AliasIdentity::NotAliased());
1076 aliasing_worklist_.Add(instance);
1077 }
1078 continue;
1079 }
1080 }
1081 return true;
1082 }
1083 }
1084 return false;
1085 }
1086
1087 // Mark any value stored into the given object as potentially aliased.
1088 void MarkStoredValuesEscaping(Definition* defn) {
1089 // Find all stores into this object.
1090 for (Value* use = defn->input_use_list(); use != NULL;
1091 use = use->next_use()) {
1092 auto instr = use->instruction();
1093 if (UseIsARedefinition(use)) {
1094 MarkStoredValuesEscaping(instr->AsDefinition());
1095 continue;
1096 }
1097 if ((use->use_index() == StoreInstanceFieldInstr::kInstancePos) &&
1098 instr->IsStoreInstanceField()) {
1099 StoreInstanceFieldInstr* store = instr->AsStoreInstanceField();
1100 Definition* value = store->value()->definition()->OriginalDefinition();
1101 if (value->Identity().IsNotAliased()) {
1102 value->SetIdentity(AliasIdentity::Aliased());
1103 identity_rollback_.Add(value);
1104
1105 // Add to worklist to propagate the mark transitively.
1106 aliasing_worklist_.Add(value);
1107 }
1108 }
1109 }
1110 }
1111
1112 // Determine if the given definition can't be aliased.
1113 void ComputeAliasing(Definition* alloc) {
1114 ASSERT(Place::IsAllocation(alloc));
1115 ASSERT(alloc->Identity().IsUnknown());
1116 ASSERT(aliasing_worklist_.is_empty());
1117
1118 alloc->SetIdentity(AliasIdentity::NotAliased());
1119 aliasing_worklist_.Add(alloc);
1120
1121 while (!aliasing_worklist_.is_empty()) {
1122 Definition* defn = aliasing_worklist_.RemoveLast();
1123 ASSERT(Place::IsAllocation(defn));
1124 // If the definition in the worklist was optimistically marked as
1125 // not-aliased check that optimistic assumption still holds: check if
1126 // any of its uses can create an alias.
1127 if (!defn->Identity().IsAliased() && AnyUseCreatesAlias(defn)) {
1128 defn->SetIdentity(AliasIdentity::Aliased());
1129 identity_rollback_.Add(defn);
1130 }
1131
1132 // If the allocation site is marked as aliased conservatively mark
1133 // any values stored into the object aliased too.
1134 if (defn->Identity().IsAliased()) {
1135 MarkStoredValuesEscaping(defn);
1136 }
1137 }
1138 }
1139
1140 Zone* zone_;
1141
1142 DirectChainedHashMap<PointerKeyValueTrait<Place> >* places_map_;
1143
1144 const ZoneGrowableArray<Place*>& places_;
1145
1146 const PhiPlaceMoves* phi_moves_;
1147
1148 // A list of all seen aliases and a map that allows looking up canonical
1149 // alias object.
1150 GrowableArray<const Place*> aliases_;
1151 DirectChainedHashMap<PointerKeyValueTrait<const Place> > aliases_map_;
1152
1153 SmallSet<Place::ElementSize> typed_data_access_sizes_;
1154
1155 // Maps alias id to set of ids of places representing the alias.
1156 // Place represents an alias if this alias is least generic alias for
1157 // the place.
1158 // (see ToAlias for the definition of least generic alias).
1159 GrowableArray<BitVector*> representatives_;
1160
1161 // Maps alias id to set of ids of places aliased.
1162 GrowableArray<BitVector*> killed_;
1163
1164 // Set of ids of places that can be affected by side-effects other than
1165 // explicit stores (i.e. through calls).
1166 BitVector* aliased_by_effects_;
1167
1168 // Worklist used during alias analysis.
1169 GrowableArray<Definition*> aliasing_worklist_;
1170
1171 // List of definitions that had their identity set to Aliased. At the end
1172 // of load optimization their identity will be rolled back to Unknown to
1173 // avoid treating them as Aliased at later stages without checking first
1174 // as optimizations can potentially eliminate instructions leading to
1175 // aliasing.
1176 GrowableArray<Definition*> identity_rollback_;
1177};
1178
1179static Definition* GetStoredValue(Instruction* instr) {
1180 if (instr->IsStoreIndexed()) {
1181 return instr->AsStoreIndexed()->value()->definition();
1182 }
1183
1184 StoreInstanceFieldInstr* store_instance_field = instr->AsStoreInstanceField();
1185 if (store_instance_field != NULL) {
1186 return store_instance_field->value()->definition();
1187 }
1188
1189 StoreStaticFieldInstr* store_static_field = instr->AsStoreStaticField();
1190 if (store_static_field != NULL) {
1191 return store_static_field->value()->definition();
1192 }
1193
1194 UNREACHABLE(); // Should only be called for supported store instructions.
1195 return NULL;
1196}
1197
1198static bool IsPhiDependentPlace(Place* place) {
1199 return (place->kind() == Place::kInstanceField) &&
1200 (place->instance() != NULL) && place->instance()->IsPhi();
1201}
1202
1203// For each place that depends on a phi ensure that equivalent places
1204// corresponding to phi input are numbered and record outgoing phi moves
1205// for each block which establish correspondence between phi dependent place
1206// and phi input's place that is flowing in.
1207static PhiPlaceMoves* ComputePhiMoves(
1208 DirectChainedHashMap<PointerKeyValueTrait<Place> >* map,
1209 ZoneGrowableArray<Place*>* places) {
1210 Thread* thread = Thread::Current();
1211 Zone* zone = thread->zone();
1212 PhiPlaceMoves* phi_moves = new (zone) PhiPlaceMoves();
1213
1214 for (intptr_t i = 0; i < places->length(); i++) {
1215 Place* place = (*places)[i];
1216
1217 if (IsPhiDependentPlace(place)) {
1218 PhiInstr* phi = place->instance()->AsPhi();
1219 BlockEntryInstr* block = phi->GetBlock();
1220
1221 if (FLAG_trace_optimization) {
1222 THR_Print("phi dependent place %s\n", place->ToCString());
1223 }
1224
1225 Place input_place(*place);
1226 for (intptr_t j = 0; j < phi->InputCount(); j++) {
1227 input_place.set_instance(phi->InputAt(j)->definition());
1228
1229 Place* result = map->LookupValue(&input_place);
1230 if (result == NULL) {
1231 result = Place::Wrap(zone, input_place, places->length());
1232 map->Insert(result);
1233 places->Add(result);
1234 if (FLAG_trace_optimization) {
1235 THR_Print(" adding place %s as %" Pd "\n", result->ToCString(),
1236 result->id());
1237 }
1238 }
1239 phi_moves->CreateOutgoingMove(zone, block->PredecessorAt(j),
1240 result->id(), place->id());
1241 }
1242 }
1243 }
1244
1245 return phi_moves;
1246}
1247
1248DART_FORCE_INLINE static void SetPlaceId(Instruction* instr, intptr_t id) {
1249 instr->SetPassSpecificId(CompilerPass::kCSE, id);
1250}
1251
1252DART_FORCE_INLINE static bool HasPlaceId(const Instruction* instr) {
1253 return instr->HasPassSpecificId(CompilerPass::kCSE);
1254}
1255
1256DART_FORCE_INLINE static intptr_t GetPlaceId(const Instruction* instr) {
1257 ASSERT(HasPlaceId(instr));
1258 return instr->GetPassSpecificId(CompilerPass::kCSE);
1259}
1260
1261enum CSEMode { kOptimizeLoads, kOptimizeStores };
1262
1263static AliasedSet* NumberPlaces(
1264 FlowGraph* graph,
1265 DirectChainedHashMap<PointerKeyValueTrait<Place> >* map,
1266 CSEMode mode) {
1267 // Loads representing different expression ids will be collected and
1268 // used to build per offset kill sets.
1269 Zone* zone = graph->zone();
1270 ZoneGrowableArray<Place*>* places = new (zone) ZoneGrowableArray<Place*>(10);
1271
1272 bool has_loads = false;
1273 bool has_stores = false;
1274 for (BlockIterator it = graph->reverse_postorder_iterator(); !it.Done();
1275 it.Advance()) {
1276 BlockEntryInstr* block = it.Current();
1277
1278 for (ForwardInstructionIterator instr_it(block); !instr_it.Done();
1279 instr_it.Advance()) {
1280 Instruction* instr = instr_it.Current();
1281 Place place(instr, &has_loads, &has_stores);
1282 if (place.kind() == Place::kNone) {
1283 continue;
1284 }
1285
1286 Place* result = map->LookupValue(&place);
1287 if (result == NULL) {
1288 result = Place::Wrap(zone, place, places->length());
1289 map->Insert(result);
1290 places->Add(result);
1291
1292 if (FLAG_trace_optimization) {
1293 THR_Print("numbering %s as %" Pd "\n", result->ToCString(),
1294 result->id());
1295 }
1296 }
1297
1298 SetPlaceId(instr, result->id());
1299 }
1300 }
1301
1302 if ((mode == kOptimizeLoads) && !has_loads) {
1303 return NULL;
1304 }
1305 if ((mode == kOptimizeStores) && !has_stores) {
1306 return NULL;
1307 }
1308
1309 PhiPlaceMoves* phi_moves = ComputePhiMoves(map, places);
1310
1311 // Build aliasing sets mapping aliases to loads.
1312 return new (zone) AliasedSet(zone, map, places, phi_moves);
1313}
1314
1315// Load instructions handled by load elimination.
1316static bool IsLoadEliminationCandidate(Instruction* instr) {
1317 return instr->IsLoadField() || instr->IsLoadIndexed() ||
1318 instr->IsLoadStaticField();
1319}
1320
1321static bool IsLoopInvariantLoad(ZoneGrowableArray<BitVector*>* sets,
1322 intptr_t loop_header_index,
1323 Instruction* instr) {
1324 return IsLoadEliminationCandidate(instr) && (sets != NULL) &&
1325 HasPlaceId(instr) &&
1326 (*sets)[loop_header_index]->Contains(GetPlaceId(instr));
1327}
1328
1329LICM::LICM(FlowGraph* flow_graph) : flow_graph_(flow_graph) {
1330 ASSERT(flow_graph->is_licm_allowed());
1331}
1332
1333void LICM::Hoist(ForwardInstructionIterator* it,
1334 BlockEntryInstr* pre_header,
1335 Instruction* current) {
1336 if (current->IsCheckClass()) {
1337 current->AsCheckClass()->set_licm_hoisted(true);
1338 } else if (current->IsCheckSmi()) {
1339 current->AsCheckSmi()->set_licm_hoisted(true);
1340 } else if (current->IsCheckEitherNonSmi()) {
1341 current->AsCheckEitherNonSmi()->set_licm_hoisted(true);
1342 } else if (current->IsCheckArrayBound()) {
1343 ASSERT(!CompilerState::Current().is_aot()); // speculative in JIT only
1344 current->AsCheckArrayBound()->set_licm_hoisted(true);
1345 } else if (current->IsGenericCheckBound()) {
1346 ASSERT(CompilerState::Current().is_aot()); // non-speculative in AOT only
1347 // Does not deopt, so no need for licm_hoisted flag.
1348 } else if (current->IsTestCids()) {
1349 current->AsTestCids()->set_licm_hoisted(true);
1350 }
1351 if (FLAG_trace_optimization) {
1352 THR_Print("Hoisting instruction %s:%" Pd " from B%" Pd " to B%" Pd "\n",
1353 current->DebugName(), current->GetDeoptId(),
1354 current->GetBlock()->block_id(), pre_header->block_id());
1355 }
1356 // Move the instruction out of the loop.
1357 current->RemoveEnvironment();
1358 if (it != NULL) {
1359 it->RemoveCurrentFromGraph();
1360 } else {
1361 current->RemoveFromGraph();
1362 }
1363 GotoInstr* last = pre_header->last_instruction()->AsGoto();
1364 // Using kind kEffect will not assign a fresh ssa temporary index.
1365 flow_graph()->InsertBefore(last, current, last->env(), FlowGraph::kEffect);
1366 current->CopyDeoptIdFrom(*last);
1367}
1368
1369void LICM::TrySpecializeSmiPhi(PhiInstr* phi,
1370 BlockEntryInstr* header,
1371 BlockEntryInstr* pre_header) {
1372 if (phi->Type()->ToCid() == kSmiCid) {
1373 return;
1374 }
1375
1376 // Check if there is only a single kDynamicCid input to the phi that
1377 // comes from the pre-header.
1378 const intptr_t kNotFound = -1;
1379 intptr_t non_smi_input = kNotFound;
1380 for (intptr_t i = 0; i < phi->InputCount(); ++i) {
1381 Value* input = phi->InputAt(i);
1382 if (input->Type()->ToCid() != kSmiCid) {
1383 if ((non_smi_input != kNotFound) ||
1384 (input->Type()->ToCid() != kDynamicCid)) {
1385 // There are multiple kDynamicCid inputs or there is an input that is
1386 // known to be non-smi.
1387 return;
1388 } else {
1389 non_smi_input = i;
1390 }
1391 }
1392 }
1393
1394 if ((non_smi_input == kNotFound) ||
1395 (phi->block()->PredecessorAt(non_smi_input) != pre_header)) {
1396 return;
1397 }
1398
1399 CheckSmiInstr* check = NULL;
1400 for (Value* use = phi->input_use_list(); (use != NULL) && (check == NULL);
1401 use = use->next_use()) {
1402 check = use->instruction()->AsCheckSmi();
1403 }
1404
1405 if (check == NULL) {
1406 return;
1407 }
1408
1409 // Host CheckSmi instruction and make this phi smi one.
1410 Hoist(NULL, pre_header, check);
1411
1412 // Replace value we are checking with phi's input.
1413 check->value()->BindTo(phi->InputAt(non_smi_input)->definition());
1414 check->value()->SetReachingType(phi->InputAt(non_smi_input)->Type());
1415
1416 phi->UpdateType(CompileType::FromCid(kSmiCid));
1417}
1418
1419void LICM::OptimisticallySpecializeSmiPhis() {
1420 if (flow_graph()->function().ProhibitsHoistingCheckClass() ||
1421 CompilerState::Current().is_aot()) {
1422 // Do not hoist any: Either deoptimized on a hoisted check,
1423 // or compiling precompiled code where we can't do optimistic
1424 // hoisting of checks.
1425 return;
1426 }
1427
1428 const ZoneGrowableArray<BlockEntryInstr*>& loop_headers =
1429 flow_graph()->GetLoopHierarchy().headers();
1430
1431 for (intptr_t i = 0; i < loop_headers.length(); ++i) {
1432 JoinEntryInstr* header = loop_headers[i]->AsJoinEntry();
1433 // Skip loop that don't have a pre-header block.
1434 BlockEntryInstr* pre_header = header->ImmediateDominator();
1435 if (pre_header == NULL) continue;
1436
1437 for (PhiIterator it(header); !it.Done(); it.Advance()) {
1438 TrySpecializeSmiPhi(it.Current(), header, pre_header);
1439 }
1440 }
1441}
1442
1443// Returns true if instruction may have a "visible" effect,
1444static bool MayHaveVisibleEffect(Instruction* instr) {
1445 switch (instr->tag()) {
1446 case Instruction::kStoreInstanceField:
1447 case Instruction::kStoreStaticField:
1448 case Instruction::kStoreIndexed:
1449 case Instruction::kStoreIndexedUnsafe:
1450 case Instruction::kStoreUntagged:
1451 return true;
1452 default:
1453 return instr->HasUnknownSideEffects() || instr->MayThrow();
1454 }
1455}
1456
1457void LICM::Optimize() {
1458 if (flow_graph()->function().ProhibitsHoistingCheckClass()) {
1459 // Do not hoist any.
1460 return;
1461 }
1462
1463 // Compute loops and induction in flow graph.
1464 const LoopHierarchy& loop_hierarchy = flow_graph()->GetLoopHierarchy();
1465 const ZoneGrowableArray<BlockEntryInstr*>& loop_headers =
1466 loop_hierarchy.headers();
1467 loop_hierarchy.ComputeInduction();
1468
1469 ZoneGrowableArray<BitVector*>* loop_invariant_loads =
1470 flow_graph()->loop_invariant_loads();
1471
1472 // Iterate over all loops.
1473 for (intptr_t i = 0; i < loop_headers.length(); ++i) {
1474 BlockEntryInstr* header = loop_headers[i];
1475
1476 // Skip loops that don't have a pre-header block.
1477 BlockEntryInstr* pre_header = header->ImmediateDominator();
1478 if (pre_header == nullptr) {
1479 continue;
1480 }
1481
1482 // Flag that remains true as long as the loop has not seen any instruction
1483 // that may have a "visible" effect (write, throw, or other side-effect).
1484 bool seen_visible_effect = false;
1485
1486 // Iterate over all blocks in the loop.
1487 LoopInfo* loop = header->loop_info();
1488 for (BitVector::Iterator loop_it(loop->blocks()); !loop_it.Done();
1489 loop_it.Advance()) {
1490 BlockEntryInstr* block = flow_graph()->preorder()[loop_it.Current()];
1491
1492 // Preserve the "visible" effect flag as long as the preorder traversal
1493 // sees always-taken blocks. This way, we can only hoist invariant
1494 // may-throw instructions that are always seen during the first iteration.
1495 if (!seen_visible_effect && !loop->IsAlwaysTaken(block)) {
1496 seen_visible_effect = true;
1497 }
1498 // Iterate over all instructions in the block.
1499 for (ForwardInstructionIterator it(block); !it.Done(); it.Advance()) {
1500 Instruction* current = it.Current();
1501
1502 // Treat loads of static final fields specially: we can CSE them but
1503 // we should not move them around unless the field is initialized.
1504 // Otherwise we might move load past the initialization.
1505 if (LoadStaticFieldInstr* load = current->AsLoadStaticField()) {
1506 if (load->AllowsCSE() && !load->IsFieldInitialized()) {
1507 seen_visible_effect = true;
1508 continue;
1509 }
1510 }
1511
1512 // Determine if we can hoist loop invariant code. Even may-throw
1513 // instructions can be hoisted as long as its exception is still
1514 // the very first "visible" effect of the loop.
1515 bool is_loop_invariant = false;
1516 if ((current->AllowsCSE() ||
1517 IsLoopInvariantLoad(loop_invariant_loads, i, current)) &&
1518 (!seen_visible_effect || !current->MayThrow())) {
1519 is_loop_invariant = true;
1520 for (intptr_t i = 0; i < current->InputCount(); ++i) {
1521 Definition* input_def = current->InputAt(i)->definition();
1522 if (!input_def->GetBlock()->Dominates(pre_header)) {
1523 is_loop_invariant = false;
1524 break;
1525 }
1526 }
1527 }
1528
1529 // Hoist if all inputs are loop invariant. If not hoisted, any
1530 // instruction that writes, may throw, or has an unknown side
1531 // effect invalidates the first "visible" effect flag.
1532 if (is_loop_invariant) {
1533 Hoist(&it, pre_header, current);
1534 } else if (!seen_visible_effect && MayHaveVisibleEffect(current)) {
1535 seen_visible_effect = true;
1536 }
1537 }
1538 }
1539 }
1540}
1541
1542void DelayAllocations::Optimize(FlowGraph* graph) {
1543 // Go through all AllocateObject instructions and move them down to their
1544 // dominant use when doing so is sound.
1545 DirectChainedHashMap<IdentitySetKeyValueTrait<Instruction*>> moved;
1546 for (BlockIterator block_it = graph->reverse_postorder_iterator();
1547 !block_it.Done(); block_it.Advance()) {
1548 BlockEntryInstr* block = block_it.Current();
1549
1550 for (ForwardInstructionIterator instr_it(block); !instr_it.Done();
1551 instr_it.Advance()) {
1552 Definition* def = instr_it.Current()->AsDefinition();
1553 if (def != nullptr && (def->IsAllocateObject() || def->IsCreateArray()) &&
1554 def->env() == nullptr && !moved.HasKey(def)) {
1555 Instruction* use = DominantUse(def);
1556 if (use != nullptr && !use->IsPhi() && IsOneTimeUse(use, def)) {
1557 instr_it.RemoveCurrentFromGraph();
1558 def->InsertBefore(use);
1559 moved.Insert(def);
1560 }
1561 }
1562 }
1563 }
1564}
1565
1566Instruction* DelayAllocations::DominantUse(Definition* def) {
1567 // Find the use that dominates all other uses.
1568
1569 // Collect all uses.
1570 DirectChainedHashMap<IdentitySetKeyValueTrait<Instruction*>> uses;
1571 for (Value::Iterator it(def->input_use_list()); !it.Done(); it.Advance()) {
1572 Instruction* use = it.Current()->instruction();
1573 uses.Insert(use);
1574 }
1575 for (Value::Iterator it(def->env_use_list()); !it.Done(); it.Advance()) {
1576 Instruction* use = it.Current()->instruction();
1577 uses.Insert(use);
1578 }
1579
1580 // Find the dominant use.
1581 Instruction* dominant_use = nullptr;
1582 auto use_it = uses.GetIterator();
1583 while (auto use = use_it.Next()) {
1584 // Start with the instruction before the use, then walk backwards through
1585 // blocks in the dominator chain until we hit the definition or another use.
1586 Instruction* instr = nullptr;
1587 if (auto phi = (*use)->AsPhi()) {
1588 // For phi uses, the dominant use only has to dominate the
1589 // predecessor block corresponding to the phi input.
1590 ASSERT(phi->InputCount() == phi->block()->PredecessorCount());
1591 for (intptr_t i = 0; i < phi->InputCount(); i++) {
1592 if (phi->InputAt(i)->definition() == def) {
1593 instr = phi->block()->PredecessorAt(i)->last_instruction();
1594 break;
1595 }
1596 }
1597 ASSERT(instr != nullptr);
1598 } else {
1599 instr = (*use)->previous();
1600 }
1601
1602 bool dominated = false;
1603 while (instr != def) {
1604 if (uses.HasKey(instr)) {
1605 // We hit another use.
1606 dominated = true;
1607 break;
1608 }
1609 if (auto block = instr->AsBlockEntry()) {
1610 instr = block->dominator()->last_instruction();
1611 } else {
1612 instr = instr->previous();
1613 }
1614 }
1615 if (!dominated) {
1616 if (dominant_use != nullptr) {
1617 // More than one use reached the definition, which means no use
1618 // dominates all other uses.
1619 return nullptr;
1620 }
1621 dominant_use = *use;
1622 }
1623 }
1624
1625 return dominant_use;
1626}
1627
1628bool DelayAllocations::IsOneTimeUse(Instruction* use, Definition* def) {
1629 // Check that this use is always executed at most once for each execution of
1630 // the definition, i.e. that there is no path from the use to itself that
1631 // doesn't pass through the definition.
1632 BlockEntryInstr* use_block = use->GetBlock();
1633 BlockEntryInstr* def_block = def->GetBlock();
1634 if (use_block == def_block) return true;
1635
1636 DirectChainedHashMap<IdentitySetKeyValueTrait<BlockEntryInstr*>> seen;
1637 GrowableArray<BlockEntryInstr*> worklist;
1638 worklist.Add(use_block);
1639
1640 while (!worklist.is_empty()) {
1641 BlockEntryInstr* block = worklist.RemoveLast();
1642 for (intptr_t i = 0; i < block->PredecessorCount(); i++) {
1643 BlockEntryInstr* pred = block->PredecessorAt(i);
1644 if (pred == use_block) return false;
1645 if (pred == def_block) continue;
1646 if (seen.HasKey(pred)) continue;
1647 seen.Insert(pred);
1648 worklist.Add(pred);
1649 }
1650 }
1651 return true;
1652}
1653
1654class LoadOptimizer : public ValueObject {
1655 public:
1656 LoadOptimizer(FlowGraph* graph, AliasedSet* aliased_set)
1657 : graph_(graph),
1658 aliased_set_(aliased_set),
1659 in_(graph_->preorder().length()),
1660 out_(graph_->preorder().length()),
1661 gen_(graph_->preorder().length()),
1662 kill_(graph_->preorder().length()),
1663 exposed_values_(graph_->preorder().length()),
1664 out_values_(graph_->preorder().length()),
1665 phis_(5),
1666 worklist_(5),
1667 congruency_worklist_(6),
1668 in_worklist_(NULL),
1669 forwarded_(false) {
1670 const intptr_t num_blocks = graph_->preorder().length();
1671 for (intptr_t i = 0; i < num_blocks; i++) {
1672 out_.Add(NULL);
1673 gen_.Add(new (Z) BitVector(Z, aliased_set_->max_place_id()));
1674 kill_.Add(new (Z) BitVector(Z, aliased_set_->max_place_id()));
1675 in_.Add(new (Z) BitVector(Z, aliased_set_->max_place_id()));
1676
1677 exposed_values_.Add(NULL);
1678 out_values_.Add(NULL);
1679 }
1680 }
1681
1682 ~LoadOptimizer() { aliased_set_->RollbackAliasedIdentites(); }
1683
1684 Isolate* isolate() const { return graph_->isolate(); }
1685 Zone* zone() const { return graph_->zone(); }
1686
1687 static bool OptimizeGraph(FlowGraph* graph) {
1688 ASSERT(FLAG_load_cse);
1689
1690 // For now, bail out for large functions to avoid OOM situations.
1691 // TODO(fschneider): Fix the memory consumption issue.
1692 intptr_t function_length = graph->function().end_token_pos().Pos() -
1693 graph->function().token_pos().Pos();
1694 if (function_length >= FLAG_huge_method_cutoff_in_tokens) {
1695 return false;
1696 }
1697
1698 DirectChainedHashMap<PointerKeyValueTrait<Place> > map;
1699 AliasedSet* aliased_set = NumberPlaces(graph, &map, kOptimizeLoads);
1700 if ((aliased_set != NULL) && !aliased_set->IsEmpty()) {
1701 // If any loads were forwarded return true from Optimize to run load
1702 // forwarding again. This will allow to forward chains of loads.
1703 // This is especially important for context variables as they are built
1704 // as loads from loaded context.
1705 // TODO(vegorov): renumber newly discovered congruences during the
1706 // forwarding to forward chains without running whole pass twice.
1707 LoadOptimizer load_optimizer(graph, aliased_set);
1708 return load_optimizer.Optimize();
1709 }
1710 return false;
1711 }
1712
1713 private:
1714 bool Optimize() {
1715 // Initializer calls should be eliminated before ComputeInitialSets()
1716 // in order to calculate kill sets more precisely.
1717 OptimizeLazyInitialization();
1718
1719 ComputeInitialSets();
1720 ComputeOutSets();
1721 ComputeOutValues();
1722 if (graph_->is_licm_allowed()) {
1723 MarkLoopInvariantLoads();
1724 }
1725 ForwardLoads();
1726 EmitPhis();
1727 return forwarded_;
1728 }
1729
1730 bool CallsInitializer(Instruction* instr) {
1731 if (auto* load_field = instr->AsLoadField()) {
1732 return load_field->calls_initializer();
1733 } else if (auto* load_static = instr->AsLoadStaticField()) {
1734 return load_static->calls_initializer();
1735 }
1736 return false;
1737 }
1738
1739 void ClearCallsInitializer(Instruction* instr) {
1740 if (auto* load_field = instr->AsLoadField()) {
1741 load_field->set_calls_initializer(false);
1742 } else if (auto* load_static = instr->AsLoadStaticField()) {
1743 load_static->set_calls_initializer(false);
1744 } else {
1745 UNREACHABLE();
1746 }
1747 }
1748
1749 // Returns true if given instruction stores the sentinel value.
1750 // Such a store doesn't initialize corresponding field.
1751 bool IsSentinelStore(Instruction* instr) {
1752 Value* value = nullptr;
1753 if (auto* store_field = instr->AsStoreInstanceField()) {
1754 value = store_field->value();
1755 } else if (auto* store_static = instr->AsStoreStaticField()) {
1756 value = store_static->value();
1757 }
1758 return value != nullptr && value->BindsToConstant() &&
1759 (value->BoundConstant().raw() == Object::sentinel().raw());
1760 }
1761
1762 // This optimization pass tries to get rid of lazy initializer calls in
1763 // LoadField and LoadStaticField instructions. The "initialized" state of
1764 // places is propagated through the flow graph.
1765 void OptimizeLazyInitialization() {
1766 if (!FLAG_optimize_lazy_initializer_calls) {
1767 return;
1768 }
1769
1770 // 1) Populate 'gen' sets with places which are initialized at each basic
1771 // block. Optimize lazy initializer calls within basic block and
1772 // figure out if there are lazy intializer calls left to optimize.
1773 bool has_lazy_initializer_calls = false;
1774 for (BlockIterator block_it = graph_->reverse_postorder_iterator();
1775 !block_it.Done(); block_it.Advance()) {
1776 BlockEntryInstr* block = block_it.Current();
1777 BitVector* gen = gen_[block->preorder_number()];
1778
1779 for (ForwardInstructionIterator instr_it(block); !instr_it.Done();
1780 instr_it.Advance()) {
1781 Instruction* instr = instr_it.Current();
1782
1783 bool is_load = false, is_store = false;
1784 Place place(instr, &is_load, &is_store);
1785
1786 if (is_store && !IsSentinelStore(instr)) {
1787 gen->Add(GetPlaceId(instr));
1788 } else if (is_load) {
1789 const auto place_id = GetPlaceId(instr);
1790 if (CallsInitializer(instr)) {
1791 if (gen->Contains(place_id)) {
1792 ClearCallsInitializer(instr);
1793 } else {
1794 has_lazy_initializer_calls = true;
1795 }
1796 }
1797 gen->Add(place_id);
1798 }
1799 }
1800
1801 // Spread initialized state through outgoing phis.
1802 PhiPlaceMoves::MovesList phi_moves =
1803 aliased_set_->phi_moves()->GetOutgoingMoves(block);
1804 if (phi_moves != nullptr) {
1805 for (intptr_t i = 0, n = phi_moves->length(); i < n; ++i) {
1806 const intptr_t from = (*phi_moves)[i].from();
1807 const intptr_t to = (*phi_moves)[i].to();
1808 if ((from != to) && gen->Contains(from)) {
1809 gen->Add(to);
1810 }
1811 }
1812 }
1813 }
1814
1815 if (has_lazy_initializer_calls) {
1816 // 2) Propagate initialized state between blocks, calculating
1817 // incoming initialized state. Iterate until reaching fixed point.
1818 BitVector* temp = new (Z) BitVector(Z, aliased_set_->max_place_id());
1819 bool changed = true;
1820 while (changed) {
1821 changed = false;
1822
1823 for (BlockIterator block_it = graph_->reverse_postorder_iterator();
1824 !block_it.Done(); block_it.Advance()) {
1825 BlockEntryInstr* block = block_it.Current();
1826 BitVector* block_in = in_[block->preorder_number()];
1827 BitVector* gen = gen_[block->preorder_number()];
1828
1829 // Incoming initialized state is the intersection of all
1830 // outgoing initialized states of predecessors.
1831 if (block->IsGraphEntry()) {
1832 temp->Clear();
1833 } else {
1834 temp->SetAll();
1835 ASSERT(block->PredecessorCount() > 0);
1836 for (intptr_t i = 0, pred_count = block->PredecessorCount();
1837 i < pred_count; ++i) {
1838 BlockEntryInstr* pred = block->PredecessorAt(i);
1839 BitVector* pred_out = gen_[pred->preorder_number()];
1840 temp->Intersect(pred_out);
1841 }
1842 }
1843
1844 if (!temp->Equals(*block_in)) {
1845 ASSERT(block_in->SubsetOf(*temp));
1846 block_in->AddAll(temp);
1847 gen->AddAll(temp);
1848 changed = true;
1849 }
1850 }
1851 }
1852
1853 // 3) Single pass through basic blocks to optimize lazy
1854 // initializer calls using calculated incoming inter-block
1855 // initialized state.
1856 for (BlockIterator block_it = graph_->reverse_postorder_iterator();
1857 !block_it.Done(); block_it.Advance()) {
1858 BlockEntryInstr* block = block_it.Current();
1859 BitVector* block_in = in_[block->preorder_number()];
1860
1861 for (ForwardInstructionIterator instr_it(block); !instr_it.Done();
1862 instr_it.Advance()) {
1863 Instruction* instr = instr_it.Current();
1864 if (CallsInitializer(instr) &&
1865 block_in->Contains(GetPlaceId(instr))) {
1866 ClearCallsInitializer(instr);
1867 }
1868 }
1869 }
1870 }
1871
1872 // Clear sets which are also used in the main part of load forwarding.
1873 for (intptr_t i = 0, n = graph_->preorder().length(); i < n; ++i) {
1874 gen_[i]->Clear();
1875 in_[i]->Clear();
1876 }
1877 }
1878
1879 // Only forward stores to normal arrays, float64, and simd arrays
1880 // to loads because other array stores (intXX/uintXX/float32)
1881 // may implicitly convert the value stored.
1882 bool CanForwardStore(StoreIndexedInstr* array_store) {
1883 return ((array_store == nullptr) ||
1884 (array_store->class_id() == kArrayCid) ||
1885 (array_store->class_id() == kTypedDataFloat64ArrayCid) ||
1886 (array_store->class_id() == kTypedDataFloat32ArrayCid) ||
1887 (array_store->class_id() == kTypedDataFloat32x4ArrayCid));
1888 }
1889
1890 // Compute sets of loads generated and killed by each block.
1891 // Additionally compute upwards exposed and generated loads for each block.
1892 // Exposed loads are those that can be replaced if a corresponding
1893 // reaching load will be found.
1894 // Loads that are locally redundant will be replaced as we go through
1895 // instructions.
1896 void ComputeInitialSets() {
1897 for (BlockIterator block_it = graph_->reverse_postorder_iterator();
1898 !block_it.Done(); block_it.Advance()) {
1899 BlockEntryInstr* block = block_it.Current();
1900 const intptr_t preorder_number = block->preorder_number();
1901
1902 BitVector* kill = kill_[preorder_number];
1903 BitVector* gen = gen_[preorder_number];
1904
1905 ZoneGrowableArray<Definition*>* exposed_values = NULL;
1906 ZoneGrowableArray<Definition*>* out_values = NULL;
1907
1908 for (ForwardInstructionIterator instr_it(block); !instr_it.Done();
1909 instr_it.Advance()) {
1910 Instruction* instr = instr_it.Current();
1911
1912 bool is_load = false, is_store = false;
1913 Place place(instr, &is_load, &is_store);
1914
1915 BitVector* killed = NULL;
1916 if (is_store) {
1917 const intptr_t alias_id =
1918 aliased_set_->LookupAliasId(place.ToAlias());
1919 if (alias_id != AliasedSet::kNoAlias) {
1920 killed = aliased_set_->GetKilledSet(alias_id);
1921 } else if (!place.IsImmutableField()) {
1922 // We encountered unknown alias: this means intrablock load
1923 // forwarding refined parameter of this store, for example
1924 //
1925 // o <- alloc()
1926 // a.f <- o
1927 // u <- a.f
1928 // u.x <- null ;; this store alias is *.x
1929 //
1930 // after intrablock load forwarding
1931 //
1932 // o <- alloc()
1933 // a.f <- o
1934 // o.x <- null ;; this store alias is o.x
1935 //
1936 // In this case we fallback to using place id recorded in the
1937 // instruction that still points to the old place with a more
1938 // generic alias.
1939 const intptr_t old_alias_id = aliased_set_->LookupAliasId(
1940 aliased_set_->places()[GetPlaceId(instr)]->ToAlias());
1941 killed = aliased_set_->GetKilledSet(old_alias_id);
1942 }
1943
1944 // Find canonical place of store.
1945 Place* canonical_place = nullptr;
1946 if (CanForwardStore(instr->AsStoreIndexed())) {
1947 canonical_place = aliased_set_->LookupCanonical(&place);
1948 if (canonical_place != nullptr) {
1949 // Is this a redundant store (stored value already resides
1950 // in this field)?
1951 const intptr_t place_id = canonical_place->id();
1952 if (gen->Contains(place_id)) {
1953 ASSERT((out_values != nullptr) &&
1954 ((*out_values)[place_id] != nullptr));
1955 if ((*out_values)[place_id] == GetStoredValue(instr)) {
1956 if (FLAG_trace_optimization) {
1957 THR_Print("Removing redundant store to place %" Pd
1958 " in block B%" Pd "\n",
1959 GetPlaceId(instr), block->block_id());
1960 }
1961 instr_it.RemoveCurrentFromGraph();
1962 continue;
1963 }
1964 }
1965 }
1966 }
1967
1968 // Update kill/gen/out_values (after inspection of incoming values).
1969 if (killed != nullptr) {
1970 kill->AddAll(killed);
1971 // There is no need to clear out_values when clearing GEN set
1972 // because only those values that are in the GEN set
1973 // will ever be used.
1974 gen->RemoveAll(killed);
1975 }
1976 if (canonical_place != nullptr) {
1977 // Store has a corresponding numbered place that might have a
1978 // load. Try forwarding stored value to it.
1979 gen->Add(canonical_place->id());
1980 if (out_values == nullptr) out_values = CreateBlockOutValues();
1981 (*out_values)[canonical_place->id()] = GetStoredValue(instr);
1982 }
1983
1984 ASSERT(!instr->IsDefinition() ||
1985 !IsLoadEliminationCandidate(instr->AsDefinition()));
1986 continue;
1987 } else if (is_load) {
1988 // Check if this load needs renumbering because of the intrablock
1989 // load forwarding.
1990 const Place* canonical = aliased_set_->LookupCanonical(&place);
1991 if ((canonical != NULL) &&
1992 (canonical->id() != GetPlaceId(instr->AsDefinition()))) {
1993 SetPlaceId(instr->AsDefinition(), canonical->id());
1994 }
1995 }
1996
1997 // If instruction has effects then kill all loads affected.
1998 if (instr->HasUnknownSideEffects()) {
1999 kill->AddAll(aliased_set_->aliased_by_effects());
2000 // There is no need to clear out_values when removing values from GEN
2001 // set because only those values that are in the GEN set
2002 // will ever be used.
2003 gen->RemoveAll(aliased_set_->aliased_by_effects());
2004 }
2005
2006 Definition* defn = instr->AsDefinition();
2007 if (defn == NULL) {
2008 continue;
2009 }
2010
2011 // For object allocation forward initial values of the fields to
2012 // subsequent loads (and potential dead stores) except for final
2013 // fields of escaping objects. Final fields are initialized in
2014 // constructor which potentially was not inlined into the function
2015 // that we are currently optimizing. However at the same time we
2016 // assume that values of the final fields can be forwarded across
2017 // side-effects. If we add 'null' as known values for these fields
2018 // here we will incorrectly propagate this null across constructor
2019 // invocation.
2020 if (auto alloc = instr->AsAllocateObject()) {
2021 for (Value* use = alloc->input_use_list(); use != NULL;
2022 use = use->next_use()) {
2023 // Look for all immediate loads/stores from this object.
2024 if (use->use_index() != 0) {
2025 continue;
2026 }
2027 const Slot* slot = nullptr;
2028 intptr_t place_id = 0;
2029 if (auto load = use->instruction()->AsLoadField()) {
2030 slot = &load->slot();
2031 place_id = GetPlaceId(load);
2032 } else if (auto store =
2033 use->instruction()->AsStoreInstanceField()) {
2034 slot = &store->slot();
2035 place_id = GetPlaceId(store);
2036 }
2037
2038 if (slot != nullptr) {
2039 // Found a load/store. Initialize current value of the field
2040 // to null for normal fields, or with type arguments.
2041
2042 // If the object escapes then don't forward final fields - see
2043 // the comment above for explanation.
2044 if (aliased_set_->CanBeAliased(alloc) && slot->IsDartField() &&
2045 slot->is_immutable()) {
2046 continue;
2047 }
2048
2049 Definition* forward_def = graph_->constant_null();
2050 if (alloc->type_arguments() != nullptr) {
2051 const Slot& type_args_slot = Slot::GetTypeArgumentsSlotFor(
2052 graph_->thread(), alloc->cls());
2053 if (slot->IsIdentical(type_args_slot)) {
2054 forward_def = alloc->type_arguments()->definition();
2055 }
2056 }
2057 gen->Add(place_id);
2058 if (out_values == nullptr) out_values = CreateBlockOutValues();
2059 (*out_values)[place_id] = forward_def;
2060 }
2061 }
2062 continue;
2063 }
2064
2065 if (!IsLoadEliminationCandidate(defn)) {
2066 continue;
2067 }
2068
2069 const intptr_t place_id = GetPlaceId(defn);
2070 if (gen->Contains(place_id)) {
2071 // This is a locally redundant load.
2072 ASSERT((out_values != NULL) && ((*out_values)[place_id] != NULL));
2073
2074 Definition* replacement = (*out_values)[place_id];
2075 graph_->EnsureSSATempIndex(defn, replacement);
2076 if (FLAG_trace_optimization) {
2077 THR_Print("Replacing load v%" Pd " with v%" Pd "\n",
2078 defn->ssa_temp_index(), replacement->ssa_temp_index());
2079 }
2080
2081 defn->ReplaceUsesWith(replacement);
2082 instr_it.RemoveCurrentFromGraph();
2083 forwarded_ = true;
2084 continue;
2085 } else if (!kill->Contains(place_id)) {
2086 // This is an exposed load: it is the first representative of a
2087 // given expression id and it is not killed on the path from
2088 // the block entry.
2089 if (exposed_values == NULL) {
2090 static const intptr_t kMaxExposedValuesInitialSize = 5;
2091 exposed_values = new (Z) ZoneGrowableArray<Definition*>(
2092 Utils::Minimum(kMaxExposedValuesInitialSize,
2093 aliased_set_->max_place_id()));
2094 }
2095
2096 exposed_values->Add(defn);
2097 }
2098
2099 gen->Add(place_id);
2100
2101 if (out_values == NULL) out_values = CreateBlockOutValues();
2102 (*out_values)[place_id] = defn;
2103 }
2104
2105 exposed_values_[preorder_number] = exposed_values;
2106 out_values_[preorder_number] = out_values;
2107 }
2108 }
2109
2110 static void PerformPhiMoves(PhiPlaceMoves::MovesList phi_moves,
2111 BitVector* out,
2112 BitVector* forwarded_loads) {
2113 forwarded_loads->Clear();
2114
2115 for (intptr_t i = 0; i < phi_moves->length(); i++) {
2116 const intptr_t from = (*phi_moves)[i].from();
2117 const intptr_t to = (*phi_moves)[i].to();
2118 if (from == to) continue;
2119
2120 if (out->Contains(from)) {
2121 forwarded_loads->Add(to);
2122 }
2123 }
2124
2125 for (intptr_t i = 0; i < phi_moves->length(); i++) {
2126 const intptr_t from = (*phi_moves)[i].from();
2127 const intptr_t to = (*phi_moves)[i].to();
2128 if (from == to) continue;
2129
2130 out->Remove(to);
2131 }
2132
2133 out->AddAll(forwarded_loads);
2134 }
2135
2136 // Compute OUT sets by propagating them iteratively until fix point
2137 // is reached.
2138 void ComputeOutSets() {
2139 BitVector* temp = new (Z) BitVector(Z, aliased_set_->max_place_id());
2140 BitVector* forwarded_loads =
2141 new (Z) BitVector(Z, aliased_set_->max_place_id());
2142 BitVector* temp_out = new (Z) BitVector(Z, aliased_set_->max_place_id());
2143
2144 bool changed = true;
2145 while (changed) {
2146 changed = false;
2147
2148 for (BlockIterator block_it = graph_->reverse_postorder_iterator();
2149 !block_it.Done(); block_it.Advance()) {
2150 BlockEntryInstr* block = block_it.Current();
2151
2152 const intptr_t preorder_number = block->preorder_number();
2153
2154 BitVector* block_in = in_[preorder_number];
2155 BitVector* block_out = out_[preorder_number];
2156 BitVector* block_kill = kill_[preorder_number];
2157 BitVector* block_gen = gen_[preorder_number];
2158
2159 // Compute block_in as the intersection of all out(p) where p
2160 // is a predecessor of the current block.
2161 if (block->IsGraphEntry()) {
2162 temp->Clear();
2163 } else {
2164 temp->SetAll();
2165 ASSERT(block->PredecessorCount() > 0);
2166 for (intptr_t i = 0; i < block->PredecessorCount(); i++) {
2167 BlockEntryInstr* pred = block->PredecessorAt(i);
2168 BitVector* pred_out = out_[pred->preorder_number()];
2169 if (pred_out == NULL) continue;
2170 PhiPlaceMoves::MovesList phi_moves =
2171 aliased_set_->phi_moves()->GetOutgoingMoves(pred);
2172 if (phi_moves != NULL) {
2173 // If there are phi moves, perform intersection with
2174 // a copy of pred_out where the phi moves are applied.
2175 temp_out->CopyFrom(pred_out);
2176 PerformPhiMoves(phi_moves, temp_out, forwarded_loads);
2177 pred_out = temp_out;
2178 }
2179 temp->Intersect(pred_out);
2180 }
2181 }
2182
2183 if (!temp->Equals(*block_in) || (block_out == NULL)) {
2184 // If IN set has changed propagate the change to OUT set.
2185 block_in->CopyFrom(temp);
2186
2187 temp->RemoveAll(block_kill);
2188 temp->AddAll(block_gen);
2189
2190 if ((block_out == NULL) || !block_out->Equals(*temp)) {
2191 if (block_out == NULL) {
2192 block_out = out_[preorder_number] =
2193 new (Z) BitVector(Z, aliased_set_->max_place_id());
2194 }
2195 block_out->CopyFrom(temp);
2196 changed = true;
2197 }
2198 }
2199 }
2200 }
2201 }
2202
2203 // Compute out_values mappings by propagating them in reverse postorder once
2204 // through the graph. Generate phis on back edges where eager merge is
2205 // impossible.
2206 // No replacement is done at this point and thus any out_value[place_id] is
2207 // changed at most once: from NULL to an actual value.
2208 // When merging incoming loads we might need to create a phi.
2209 // These phis are not inserted at the graph immediately because some of them
2210 // might become redundant after load forwarding is done.
2211 void ComputeOutValues() {
2212 GrowableArray<PhiInstr*> pending_phis(5);
2213 ZoneGrowableArray<Definition*>* temp_forwarded_values = NULL;
2214
2215 for (BlockIterator block_it = graph_->reverse_postorder_iterator();
2216 !block_it.Done(); block_it.Advance()) {
2217 BlockEntryInstr* block = block_it.Current();
2218
2219 const bool can_merge_eagerly = CanMergeEagerly(block);
2220
2221 const intptr_t preorder_number = block->preorder_number();
2222
2223 ZoneGrowableArray<Definition*>* block_out_values =
2224 out_values_[preorder_number];
2225
2226 // If OUT set has changed then we have new values available out of
2227 // the block. Compute these values creating phi where necessary.
2228 for (BitVector::Iterator it(out_[preorder_number]); !it.Done();
2229 it.Advance()) {
2230 const intptr_t place_id = it.Current();
2231
2232 if (block_out_values == NULL) {
2233 out_values_[preorder_number] = block_out_values =
2234 CreateBlockOutValues();
2235 }
2236
2237 if ((*block_out_values)[place_id] == NULL) {
2238 ASSERT(block->PredecessorCount() > 0);
2239 Definition* in_value =
2240 can_merge_eagerly ? MergeIncomingValues(block, place_id) : NULL;
2241 if ((in_value == NULL) &&
2242 (in_[preorder_number]->Contains(place_id))) {
2243 PhiInstr* phi = new (Z)
2244 PhiInstr(block->AsJoinEntry(), block->PredecessorCount());
2245 SetPlaceId(phi, place_id);
2246 pending_phis.Add(phi);
2247 in_value = phi;
2248 }
2249 (*block_out_values)[place_id] = in_value;
2250 }
2251 }
2252
2253 // If the block has outgoing phi moves perform them. Use temporary list
2254 // of values to ensure that cyclic moves are performed correctly.
2255 PhiPlaceMoves::MovesList phi_moves =
2256 aliased_set_->phi_moves()->GetOutgoingMoves(block);
2257 if ((phi_moves != NULL) && (block_out_values != NULL)) {
2258 if (temp_forwarded_values == NULL) {
2259 temp_forwarded_values = CreateBlockOutValues();
2260 }
2261
2262 for (intptr_t i = 0; i < phi_moves->length(); i++) {
2263 const intptr_t from = (*phi_moves)[i].from();
2264 const intptr_t to = (*phi_moves)[i].to();
2265 if (from == to) continue;
2266
2267 (*temp_forwarded_values)[to] = (*block_out_values)[from];
2268 }
2269
2270 for (intptr_t i = 0; i < phi_moves->length(); i++) {
2271 const intptr_t from = (*phi_moves)[i].from();
2272 const intptr_t to = (*phi_moves)[i].to();
2273 if (from == to) continue;
2274
2275 (*block_out_values)[to] = (*temp_forwarded_values)[to];
2276 }
2277 }
2278
2279 if (FLAG_trace_load_optimization) {
2280 THR_Print("B%" Pd "\n", block->block_id());
2281 THR_Print(" IN: ");
2282 aliased_set_->PrintSet(in_[preorder_number]);
2283 THR_Print("\n");
2284
2285 THR_Print(" KILL: ");
2286 aliased_set_->PrintSet(kill_[preorder_number]);
2287 THR_Print("\n");
2288
2289 THR_Print(" OUT: ");
2290 aliased_set_->PrintSet(out_[preorder_number]);
2291 THR_Print("\n");
2292 }
2293 }
2294
2295 // All blocks were visited. Fill pending phis with inputs
2296 // that flow on back edges.
2297 for (intptr_t i = 0; i < pending_phis.length(); i++) {
2298 FillPhiInputs(pending_phis[i]);
2299 }
2300 }
2301
2302 bool CanMergeEagerly(BlockEntryInstr* block) {
2303 for (intptr_t i = 0; i < block->PredecessorCount(); i++) {
2304 BlockEntryInstr* pred = block->PredecessorAt(i);
2305 if (pred->postorder_number() < block->postorder_number()) {
2306 return false;
2307 }
2308 }
2309 return true;
2310 }
2311
2312 void MarkLoopInvariantLoads() {
2313 const ZoneGrowableArray<BlockEntryInstr*>& loop_headers =
2314 graph_->GetLoopHierarchy().headers();
2315
2316 ZoneGrowableArray<BitVector*>* invariant_loads =
2317 new (Z) ZoneGrowableArray<BitVector*>(loop_headers.length());
2318
2319 for (intptr_t i = 0; i < loop_headers.length(); i++) {
2320 BlockEntryInstr* header = loop_headers[i];
2321 BlockEntryInstr* pre_header = header->ImmediateDominator();
2322 if (pre_header == NULL) {
2323 invariant_loads->Add(NULL);
2324 continue;
2325 }
2326
2327 BitVector* loop_gen = new (Z) BitVector(Z, aliased_set_->max_place_id());
2328 for (BitVector::Iterator loop_it(header->loop_info()->blocks());
2329 !loop_it.Done(); loop_it.Advance()) {
2330 const intptr_t preorder_number = loop_it.Current();
2331 loop_gen->AddAll(gen_[preorder_number]);
2332 }
2333
2334 for (BitVector::Iterator loop_it(header->loop_info()->blocks());
2335 !loop_it.Done(); loop_it.Advance()) {
2336 const intptr_t preorder_number = loop_it.Current();
2337 loop_gen->RemoveAll(kill_[preorder_number]);
2338 }
2339
2340 if (FLAG_trace_optimization) {
2341 for (BitVector::Iterator it(loop_gen); !it.Done(); it.Advance()) {
2342 THR_Print("place %s is loop invariant for B%" Pd "\n",
2343 aliased_set_->places()[it.Current()]->ToCString(),
2344 header->block_id());
2345 }
2346 }
2347
2348 invariant_loads->Add(loop_gen);
2349 }
2350
2351 graph_->set_loop_invariant_loads(invariant_loads);
2352 }
2353
2354 // Compute incoming value for the given expression id.
2355 // Will create a phi if different values are incoming from multiple
2356 // predecessors.
2357 Definition* MergeIncomingValues(BlockEntryInstr* block, intptr_t place_id) {
2358 // First check if the same value is coming in from all predecessors.
2359 static Definition* const kDifferentValuesMarker =
2360 reinterpret_cast<Definition*>(-1);
2361 Definition* incoming = NULL;
2362 for (intptr_t i = 0; i < block->PredecessorCount(); i++) {
2363 BlockEntryInstr* pred = block->PredecessorAt(i);
2364 ZoneGrowableArray<Definition*>* pred_out_values =
2365 out_values_[pred->preorder_number()];
2366 if ((pred_out_values == NULL) || ((*pred_out_values)[place_id] == NULL)) {
2367 return NULL;
2368 } else if (incoming == NULL) {
2369 incoming = (*pred_out_values)[place_id];
2370 } else if (incoming != (*pred_out_values)[place_id]) {
2371 incoming = kDifferentValuesMarker;
2372 }
2373 }
2374
2375 if (incoming != kDifferentValuesMarker) {
2376 ASSERT(incoming != NULL);
2377 return incoming;
2378 }
2379
2380 // Incoming values are different. Phi is required to merge.
2381 PhiInstr* phi =
2382 new (Z) PhiInstr(block->AsJoinEntry(), block->PredecessorCount());
2383 SetPlaceId(phi, place_id);
2384 FillPhiInputs(phi);
2385 return phi;
2386 }
2387
2388 void FillPhiInputs(PhiInstr* phi) {
2389 BlockEntryInstr* block = phi->GetBlock();
2390 const intptr_t place_id = GetPlaceId(phi);
2391
2392 for (intptr_t i = 0; i < block->PredecessorCount(); i++) {
2393 BlockEntryInstr* pred = block->PredecessorAt(i);
2394 ZoneGrowableArray<Definition*>* pred_out_values =
2395 out_values_[pred->preorder_number()];
2396 ASSERT((*pred_out_values)[place_id] != NULL);
2397
2398 // Sets of outgoing values are not linked into use lists so
2399 // they might contain values that were replaced and removed
2400 // from the graph by this iteration.
2401 // To prevent using them we additionally mark definitions themselves
2402 // as replaced and store a pointer to the replacement.
2403 Definition* replacement = (*pred_out_values)[place_id]->Replacement();
2404 Value* input = new (Z) Value(replacement);
2405 phi->SetInputAt(i, input);
2406 replacement->AddInputUse(input);
2407 }
2408
2409 graph_->AllocateSSAIndexes(phi);
2410 phis_.Add(phi); // Postpone phi insertion until after load forwarding.
2411
2412 if (FLAG_support_il_printer && FLAG_trace_load_optimization) {
2413 THR_Print("created pending phi %s for %s at B%" Pd "\n", phi->ToCString(),
2414 aliased_set_->places()[place_id]->ToCString(),
2415 block->block_id());
2416 }
2417 }
2418
2419 // Iterate over basic blocks and replace exposed loads with incoming
2420 // values.
2421 void ForwardLoads() {
2422 for (BlockIterator block_it = graph_->reverse_postorder_iterator();
2423 !block_it.Done(); block_it.Advance()) {
2424 BlockEntryInstr* block = block_it.Current();
2425
2426 ZoneGrowableArray<Definition*>* loads =
2427 exposed_values_[block->preorder_number()];
2428 if (loads == NULL) continue; // No exposed loads.
2429
2430 BitVector* in = in_[block->preorder_number()];
2431
2432 for (intptr_t i = 0; i < loads->length(); i++) {
2433 Definition* load = (*loads)[i];
2434 if (!in->Contains(GetPlaceId(load))) continue; // No incoming value.
2435
2436 Definition* replacement = MergeIncomingValues(block, GetPlaceId(load));
2437 ASSERT(replacement != NULL);
2438
2439 // Sets of outgoing values are not linked into use lists so
2440 // they might contain values that were replace and removed
2441 // from the graph by this iteration.
2442 // To prevent using them we additionally mark definitions themselves
2443 // as replaced and store a pointer to the replacement.
2444 replacement = replacement->Replacement();
2445
2446 if (load != replacement) {
2447 graph_->EnsureSSATempIndex(load, replacement);
2448
2449 if (FLAG_trace_optimization) {
2450 THR_Print("Replacing load v%" Pd " with v%" Pd "\n",
2451 load->ssa_temp_index(), replacement->ssa_temp_index());
2452 }
2453
2454 load->ReplaceUsesWith(replacement);
2455 load->RemoveFromGraph();
2456 load->SetReplacement(replacement);
2457 forwarded_ = true;
2458 }
2459 }
2460 }
2461 }
2462
2463 // Check if the given phi take the same value on all code paths.
2464 // Eliminate it as redundant if this is the case.
2465 // When analyzing phi operands assumes that only generated during
2466 // this load phase can be redundant. They can be distinguished because
2467 // they are not marked alive.
2468 // TODO(vegorov): move this into a separate phase over all phis.
2469 bool EliminateRedundantPhi(PhiInstr* phi) {
2470 Definition* value = NULL; // Possible value of this phi.
2471
2472 worklist_.Clear();
2473 if (in_worklist_ == NULL) {
2474 in_worklist_ = new (Z) BitVector(Z, graph_->current_ssa_temp_index());
2475 } else {
2476 in_worklist_->Clear();
2477 }
2478
2479 worklist_.Add(phi);
2480 in_worklist_->Add(phi->ssa_temp_index());
2481
2482 for (intptr_t i = 0; i < worklist_.length(); i++) {
2483 PhiInstr* phi = worklist_[i];
2484
2485 for (intptr_t i = 0; i < phi->InputCount(); i++) {
2486 Definition* input = phi->InputAt(i)->definition();
2487 if (input == phi) continue;
2488
2489 PhiInstr* phi_input = input->AsPhi();
2490 if ((phi_input != NULL) && !phi_input->is_alive()) {
2491 if (!in_worklist_->Contains(phi_input->ssa_temp_index())) {
2492 worklist_.Add(phi_input);
2493 in_worklist_->Add(phi_input->ssa_temp_index());
2494 }
2495 continue;
2496 }
2497
2498 if (value == NULL) {
2499 value = input;
2500 } else if (value != input) {
2501 return false; // This phi is not redundant.
2502 }
2503 }
2504 }
2505
2506 // All phis in the worklist are redundant and have the same computed
2507 // value on all code paths.
2508 ASSERT(value != NULL);
2509 for (intptr_t i = 0; i < worklist_.length(); i++) {
2510 worklist_[i]->ReplaceUsesWith(value);
2511 }
2512
2513 return true;
2514 }
2515
2516 // Returns true if definitions are congruent assuming their inputs
2517 // are congruent.
2518 bool CanBeCongruent(Definition* a, Definition* b) {
2519 return (a->tag() == b->tag()) &&
2520 ((a->IsPhi() && (a->GetBlock() == b->GetBlock())) ||
2521 (a->AllowsCSE() && a->AttributesEqual(b)));
2522 }
2523
2524 // Given two definitions check if they are congruent under assumption that
2525 // their inputs will be proven congruent. If they are - add them to the
2526 // worklist to check their inputs' congruency.
2527 // Returns true if pair was added to the worklist or is already in the
2528 // worklist and false if a and b are not congruent.
2529 bool AddPairToCongruencyWorklist(Definition* a, Definition* b) {
2530 if (!CanBeCongruent(a, b)) {
2531 return false;
2532 }
2533
2534 // If a is already in the worklist check if it is being compared to b.
2535 // Give up if it is not.
2536 if (in_worklist_->Contains(a->ssa_temp_index())) {
2537 for (intptr_t i = 0; i < congruency_worklist_.length(); i += 2) {
2538 if (a == congruency_worklist_[i]) {
2539 return (b == congruency_worklist_[i + 1]);
2540 }
2541 }
2542 UNREACHABLE();
2543 } else if (in_worklist_->Contains(b->ssa_temp_index())) {
2544 return AddPairToCongruencyWorklist(b, a);
2545 }
2546
2547 congruency_worklist_.Add(a);
2548 congruency_worklist_.Add(b);
2549 in_worklist_->Add(a->ssa_temp_index());
2550 return true;
2551 }
2552
2553 bool AreInputsCongruent(Definition* a, Definition* b) {
2554 ASSERT(a->tag() == b->tag());
2555 ASSERT(a->InputCount() == b->InputCount());
2556 for (intptr_t j = 0; j < a->InputCount(); j++) {
2557 Definition* inputA = a->InputAt(j)->definition();
2558 Definition* inputB = b->InputAt(j)->definition();
2559
2560 if (inputA != inputB) {
2561 if (!AddPairToCongruencyWorklist(inputA, inputB)) {
2562 return false;
2563 }
2564 }
2565 }
2566 return true;
2567 }
2568
2569 // Returns true if instruction dom dominates instruction other.
2570 static bool Dominates(Instruction* dom, Instruction* other) {
2571 BlockEntryInstr* dom_block = dom->GetBlock();
2572 BlockEntryInstr* other_block = other->GetBlock();
2573
2574 if (dom_block == other_block) {
2575 for (Instruction* current = dom->next(); current != NULL;
2576 current = current->next()) {
2577 if (current == other) {
2578 return true;
2579 }
2580 }
2581 return false;
2582 }
2583
2584 return dom_block->Dominates(other_block);
2585 }
2586
2587 // Replace the given phi with another if they are congruent.
2588 // Returns true if succeeds.
2589 bool ReplacePhiWith(PhiInstr* phi, PhiInstr* replacement) {
2590 ASSERT(phi->InputCount() == replacement->InputCount());
2591 ASSERT(phi->block() == replacement->block());
2592
2593 congruency_worklist_.Clear();
2594 if (in_worklist_ == NULL) {
2595 in_worklist_ = new (Z) BitVector(Z, graph_->current_ssa_temp_index());
2596 } else {
2597 in_worklist_->Clear();
2598 }
2599
2600 // During the comparison worklist contains pairs of definitions to be
2601 // compared.
2602 if (!AddPairToCongruencyWorklist(phi, replacement)) {
2603 return false;
2604 }
2605
2606 // Process the worklist. It might grow during each comparison step.
2607 for (intptr_t i = 0; i < congruency_worklist_.length(); i += 2) {
2608 if (!AreInputsCongruent(congruency_worklist_[i],
2609 congruency_worklist_[i + 1])) {
2610 return false;
2611 }
2612 }
2613
2614 // At this point worklist contains pairs of congruent definitions.
2615 // Replace the one member of the pair with another maintaining proper
2616 // domination relation between definitions and uses.
2617 for (intptr_t i = 0; i < congruency_worklist_.length(); i += 2) {
2618 Definition* a = congruency_worklist_[i];
2619 Definition* b = congruency_worklist_[i + 1];
2620
2621 // If these definitions are not phis then we need to pick up one
2622 // that dominates another as the replacement: if a dominates b swap them.
2623 // Note: both a and b are used as a phi input at the same block B which
2624 // means a dominates B and b dominates B, which guarantees that either
2625 // a dominates b or b dominates a.
2626 if (!a->IsPhi()) {
2627 if (Dominates(a, b)) {
2628 Definition* t = a;
2629 a = b;
2630 b = t;
2631 }
2632 ASSERT(Dominates(b, a));
2633 }
2634
2635 if (FLAG_support_il_printer && FLAG_trace_load_optimization) {
2636 THR_Print("Replacing %s with congruent %s\n", a->ToCString(),
2637 b->ToCString());
2638 }
2639
2640 a->ReplaceUsesWith(b);
2641 if (a->IsPhi()) {
2642 // We might be replacing a phi introduced by the load forwarding
2643 // that is not inserted in the graph yet.
2644 ASSERT(b->IsPhi());
2645 PhiInstr* phi_a = a->AsPhi();
2646 if (phi_a->is_alive()) {
2647 phi_a->mark_dead();
2648 phi_a->block()->RemovePhi(phi_a);
2649 phi_a->UnuseAllInputs();
2650 }
2651 } else {
2652 a->RemoveFromGraph();
2653 }
2654 }
2655
2656 return true;
2657 }
2658
2659 // Insert the given phi into the graph. Attempt to find an equal one in the
2660 // target block first.
2661 // Returns true if the phi was inserted and false if it was replaced.
2662 bool EmitPhi(PhiInstr* phi) {
2663 for (PhiIterator it(phi->block()); !it.Done(); it.Advance()) {
2664 if (ReplacePhiWith(phi, it.Current())) {
2665 return false;
2666 }
2667 }
2668
2669 phi->mark_alive();
2670 phi->block()->InsertPhi(phi);
2671 return true;
2672 }
2673
2674 // Phis have not yet been inserted into the graph but they have uses of
2675 // their inputs. Insert the non-redundant ones and clear the input uses
2676 // of the redundant ones.
2677 void EmitPhis() {
2678 // First eliminate all redundant phis.
2679 for (intptr_t i = 0; i < phis_.length(); i++) {
2680 PhiInstr* phi = phis_[i];
2681 if (!phi->HasUses() || EliminateRedundantPhi(phi)) {
2682 phi->UnuseAllInputs();
2683 phis_[i] = NULL;
2684 }
2685 }
2686
2687 // Now emit phis or replace them with equal phis already present in the
2688 // graph.
2689 for (intptr_t i = 0; i < phis_.length(); i++) {
2690 PhiInstr* phi = phis_[i];
2691 if ((phi != NULL) && (!phi->HasUses() || !EmitPhi(phi))) {
2692 phi->UnuseAllInputs();
2693 }
2694 }
2695 }
2696
2697 ZoneGrowableArray<Definition*>* CreateBlockOutValues() {
2698 ZoneGrowableArray<Definition*>* out =
2699 new (Z) ZoneGrowableArray<Definition*>(aliased_set_->max_place_id());
2700 for (intptr_t i = 0; i < aliased_set_->max_place_id(); i++) {
2701 out->Add(NULL);
2702 }
2703 return out;
2704 }
2705
2706 FlowGraph* graph_;
2707 DirectChainedHashMap<PointerKeyValueTrait<Place> >* map_;
2708
2709 // Mapping between field offsets in words and expression ids of loads from
2710 // that offset.
2711 AliasedSet* aliased_set_;
2712
2713 // Per block sets of expression ids for loads that are: incoming (available
2714 // on the entry), outgoing (available on the exit), generated and killed.
2715 GrowableArray<BitVector*> in_;
2716 GrowableArray<BitVector*> out_;
2717 GrowableArray<BitVector*> gen_;
2718 GrowableArray<BitVector*> kill_;
2719
2720 // Per block list of upwards exposed loads.
2721 GrowableArray<ZoneGrowableArray<Definition*>*> exposed_values_;
2722
2723 // Per block mappings between expression ids and outgoing definitions that
2724 // represent those ids.
2725 GrowableArray<ZoneGrowableArray<Definition*>*> out_values_;
2726
2727 // List of phis generated during ComputeOutValues and ForwardLoads.
2728 // Some of these phis might be redundant and thus a separate pass is
2729 // needed to emit only non-redundant ones.
2730 GrowableArray<PhiInstr*> phis_;
2731
2732 // Auxiliary worklist used by redundant phi elimination.
2733 GrowableArray<PhiInstr*> worklist_;
2734 GrowableArray<Definition*> congruency_worklist_;
2735 BitVector* in_worklist_;
2736
2737 // True if any load was eliminated.
2738 bool forwarded_;
2739
2740 DISALLOW_COPY_AND_ASSIGN(LoadOptimizer);
2741};
2742
2743bool DominatorBasedCSE::Optimize(FlowGraph* graph) {
2744 bool changed = false;
2745 if (FLAG_load_cse) {
2746 changed = LoadOptimizer::OptimizeGraph(graph) || changed;
2747 }
2748
2749 CSEInstructionMap map;
2750 changed = OptimizeRecursive(graph, graph->graph_entry(), &map) || changed;
2751
2752 return changed;
2753}
2754
2755bool DominatorBasedCSE::OptimizeRecursive(FlowGraph* graph,
2756 BlockEntryInstr* block,
2757 CSEInstructionMap* map) {
2758 bool changed = false;
2759 for (ForwardInstructionIterator it(block); !it.Done(); it.Advance()) {
2760 Instruction* current = it.Current();
2761 if (current->AllowsCSE()) {
2762 Instruction* replacement = map->Lookup(current);
2763
2764 if (replacement != NULL) {
2765 // Replace current with lookup result.
2766 ASSERT(replacement->AllowsCSE());
2767 graph->ReplaceCurrentInstruction(&it, current, replacement);
2768 changed = true;
2769 continue;
2770 }
2771
2772 map->Insert(current);
2773 }
2774 }
2775
2776 // Process children in the dominator tree recursively.
2777 intptr_t num_children = block->dominated_blocks().length();
2778 if (num_children != 0) {
2779 graph->thread()->CheckForSafepoint();
2780 }
2781 for (intptr_t i = 0; i < num_children; ++i) {
2782 BlockEntryInstr* child = block->dominated_blocks()[i];
2783 if (i < num_children - 1) {
2784 // Copy map.
2785 CSEInstructionMap child_map(*map);
2786 changed = OptimizeRecursive(graph, child, &child_map) || changed;
2787 } else {
2788 // Reuse map for the last child.
2789 changed = OptimizeRecursive(graph, child, map) || changed;
2790 }
2791 }
2792 return changed;
2793}
2794
2795class StoreOptimizer : public LivenessAnalysis {
2796 public:
2797 StoreOptimizer(FlowGraph* graph,
2798 AliasedSet* aliased_set,
2799 DirectChainedHashMap<PointerKeyValueTrait<Place> >* map)
2800 : LivenessAnalysis(aliased_set->max_place_id(), graph->postorder()),
2801 graph_(graph),
2802 map_(map),
2803 aliased_set_(aliased_set),
2804 exposed_stores_(graph_->postorder().length()) {
2805 const intptr_t num_blocks = graph_->postorder().length();
2806 for (intptr_t i = 0; i < num_blocks; i++) {
2807 exposed_stores_.Add(NULL);
2808 }
2809 }
2810
2811 static void OptimizeGraph(FlowGraph* graph) {
2812 ASSERT(FLAG_load_cse);
2813
2814 // For now, bail out for large functions to avoid OOM situations.
2815 // TODO(fschneider): Fix the memory consumption issue.
2816 intptr_t function_length = graph->function().end_token_pos().Pos() -
2817 graph->function().token_pos().Pos();
2818 if (function_length >= FLAG_huge_method_cutoff_in_tokens) {
2819 return;
2820 }
2821
2822 DirectChainedHashMap<PointerKeyValueTrait<Place> > map;
2823 AliasedSet* aliased_set = NumberPlaces(graph, &map, kOptimizeStores);
2824 if ((aliased_set != NULL) && !aliased_set->IsEmpty()) {
2825 StoreOptimizer store_optimizer(graph, aliased_set, &map);
2826 store_optimizer.Optimize();
2827 }
2828 }
2829
2830 private:
2831 void Optimize() {
2832 Analyze();
2833 if (FLAG_trace_load_optimization) {
2834 Dump();
2835 }
2836 EliminateDeadStores();
2837 }
2838
2839 bool CanEliminateStore(Instruction* instr) {
2840 switch (instr->tag()) {
2841 case Instruction::kStoreInstanceField: {
2842 StoreInstanceFieldInstr* store_instance = instr->AsStoreInstanceField();
2843 // Can't eliminate stores that initialize fields.
2844 return !store_instance->is_initialization();
2845 }
2846 case Instruction::kStoreIndexed:
2847 case Instruction::kStoreStaticField:
2848 return true;
2849 default:
2850 UNREACHABLE();
2851 return false;
2852 }
2853 }
2854
2855 virtual void ComputeInitialSets() {
2856 Zone* zone = graph_->zone();
2857 BitVector* all_places =
2858 new (zone) BitVector(zone, aliased_set_->max_place_id());
2859 all_places->SetAll();
2860 for (BlockIterator block_it = graph_->postorder_iterator();
2861 !block_it.Done(); block_it.Advance()) {
2862 BlockEntryInstr* block = block_it.Current();
2863 const intptr_t postorder_number = block->postorder_number();
2864
2865 BitVector* kill = kill_[postorder_number];
2866 BitVector* live_in = live_in_[postorder_number];
2867 BitVector* live_out = live_out_[postorder_number];
2868
2869 ZoneGrowableArray<Instruction*>* exposed_stores = NULL;
2870
2871 // Iterate backwards starting at the last instruction.
2872 for (BackwardInstructionIterator instr_it(block); !instr_it.Done();
2873 instr_it.Advance()) {
2874 Instruction* instr = instr_it.Current();
2875
2876 bool is_load = false;
2877 bool is_store = false;
2878 Place place(instr, &is_load, &is_store);
2879 if (place.IsImmutableField()) {
2880 // Loads/stores of final fields do not participate.
2881 continue;
2882 }
2883
2884 // Handle stores.
2885 if (is_store) {
2886 if (kill->Contains(GetPlaceId(instr))) {
2887 if (!live_in->Contains(GetPlaceId(instr)) &&
2888 CanEliminateStore(instr)) {
2889 if (FLAG_trace_optimization) {
2890 THR_Print("Removing dead store to place %" Pd " in block B%" Pd
2891 "\n",
2892 GetPlaceId(instr), block->block_id());
2893 }
2894 instr_it.RemoveCurrentFromGraph();
2895 }
2896 } else if (!live_in->Contains(GetPlaceId(instr))) {
2897 // Mark this store as down-ward exposed: They are the only
2898 // candidates for the global store elimination.
2899 if (exposed_stores == NULL) {
2900 const intptr_t kMaxExposedStoresInitialSize = 5;
2901 exposed_stores = new (zone) ZoneGrowableArray<Instruction*>(
2902 Utils::Minimum(kMaxExposedStoresInitialSize,
2903 aliased_set_->max_place_id()));
2904 }
2905 exposed_stores->Add(instr);
2906 }
2907 // Interfering stores kill only loads from the same place.
2908 kill->Add(GetPlaceId(instr));
2909 live_in->Remove(GetPlaceId(instr));
2910 continue;
2911 }
2912
2913 // Handle side effects, deoptimization and function return.
2914 if (instr->HasUnknownSideEffects() || instr->CanDeoptimize() ||
2915 instr->MayThrow() || instr->IsReturn()) {
2916 // Instructions that return from the function, instructions with side
2917 // effects and instructions that can deoptimize are considered as
2918 // loads from all places.
2919 live_in->CopyFrom(all_places);
2920 if (instr->IsThrow() || instr->IsReThrow() || instr->IsReturn()) {
2921 // Initialize live-out for exit blocks since it won't be computed
2922 // otherwise during the fixed point iteration.
2923 live_out->CopyFrom(all_places);
2924 }
2925 continue;
2926 }
2927
2928 // Handle loads.
2929 Definition* defn = instr->AsDefinition();
2930 if ((defn != NULL) && IsLoadEliminationCandidate(defn)) {
2931 const intptr_t alias = aliased_set_->LookupAliasId(place.ToAlias());
2932 live_in->AddAll(aliased_set_->GetKilledSet(alias));
2933 continue;
2934 }
2935 }
2936 exposed_stores_[postorder_number] = exposed_stores;
2937 }
2938 if (FLAG_trace_load_optimization) {
2939 Dump();
2940 THR_Print("---\n");
2941 }
2942 }
2943
2944 void EliminateDeadStores() {
2945 // Iteration order does not matter here.
2946 for (BlockIterator block_it = graph_->postorder_iterator();
2947 !block_it.Done(); block_it.Advance()) {
2948 BlockEntryInstr* block = block_it.Current();
2949 const intptr_t postorder_number = block->postorder_number();
2950
2951 BitVector* live_out = live_out_[postorder_number];
2952
2953 ZoneGrowableArray<Instruction*>* exposed_stores =
2954 exposed_stores_[postorder_number];
2955 if (exposed_stores == NULL) continue; // No exposed stores.
2956
2957 // Iterate over candidate stores.
2958 for (intptr_t i = 0; i < exposed_stores->length(); ++i) {
2959 Instruction* instr = (*exposed_stores)[i];
2960 bool is_load = false;
2961 bool is_store = false;
2962 Place place(instr, &is_load, &is_store);
2963 ASSERT(!is_load && is_store);
2964 if (place.IsImmutableField()) {
2965 // Final field do not participate in dead store elimination.
2966 continue;
2967 }
2968 // Eliminate a downward exposed store if the corresponding place is not
2969 // in live-out.
2970 if (!live_out->Contains(GetPlaceId(instr)) &&
2971 CanEliminateStore(instr)) {
2972 if (FLAG_trace_optimization) {
2973 THR_Print("Removing dead store to place %" Pd " block B%" Pd "\n",
2974 GetPlaceId(instr), block->block_id());
2975 }
2976 instr->RemoveFromGraph(/* ignored */ false);
2977 }
2978 }
2979 }
2980 }
2981
2982 FlowGraph* graph_;
2983 DirectChainedHashMap<PointerKeyValueTrait<Place> >* map_;
2984
2985 // Mapping between field offsets in words and expression ids of loads from
2986 // that offset.
2987 AliasedSet* aliased_set_;
2988
2989 // Per block list of downward exposed stores.
2990 GrowableArray<ZoneGrowableArray<Instruction*>*> exposed_stores_;
2991
2992 DISALLOW_COPY_AND_ASSIGN(StoreOptimizer);
2993};
2994
2995void DeadStoreElimination::Optimize(FlowGraph* graph) {
2996 if (FLAG_dead_store_elimination) {
2997 StoreOptimizer::OptimizeGraph(graph);
2998 }
2999}
3000
3001//
3002// Allocation Sinking
3003//
3004
3005// Returns true if the given instruction is an allocation that
3006// can be sunk by the Allocation Sinking pass.
3007static bool IsSupportedAllocation(Instruction* instr) {
3008 return instr->IsAllocateObject() || instr->IsAllocateUninitializedContext();
3009}
3010
3011enum SafeUseCheck { kOptimisticCheck, kStrictCheck };
3012
3013// Check if the use is safe for allocation sinking. Allocation sinking
3014// candidates can only be used at store instructions:
3015//
3016// - any store into the allocation candidate itself is unconditionally safe
3017// as it just changes the rematerialization state of this candidate;
3018// - store into another object is only safe if another object is allocation
3019// candidate.
3020//
3021// We use a simple fix-point algorithm to discover the set of valid candidates
3022// (see CollectCandidates method), that's why this IsSafeUse can operate in two
3023// modes:
3024//
3025// - optimistic, when every allocation is assumed to be an allocation
3026// sinking candidate;
3027// - strict, when only marked allocations are assumed to be allocation
3028// sinking candidates.
3029//
3030// Fix-point algorithm in CollectCandiates first collects a set of allocations
3031// optimistically and then checks each collected candidate strictly and unmarks
3032// invalid candidates transitively until only strictly valid ones remain.
3033static bool IsSafeUse(Value* use, SafeUseCheck check_type) {
3034 if (use->instruction()->IsMaterializeObject()) {
3035 return true;
3036 }
3037
3038 StoreInstanceFieldInstr* store = use->instruction()->AsStoreInstanceField();
3039 if (store != NULL) {
3040 if (use == store->value()) {
3041 Definition* instance = store->instance()->definition();
3042 return IsSupportedAllocation(instance) &&
3043 ((check_type == kOptimisticCheck) ||
3044 instance->Identity().IsAllocationSinkingCandidate());
3045 }
3046 return true;
3047 }
3048
3049 return false;
3050}
3051
3052// Right now we are attempting to sink allocation only into
3053// deoptimization exit. So candidate should only be used in StoreInstanceField
3054// instructions that write into fields of the allocated object.
3055static bool IsAllocationSinkingCandidate(Definition* alloc,
3056 SafeUseCheck check_type) {
3057 for (Value* use = alloc->input_use_list(); use != NULL;
3058 use = use->next_use()) {
3059 if (!IsSafeUse(use, check_type)) {
3060 if (FLAG_support_il_printer && FLAG_trace_optimization) {
3061 THR_Print("use of %s at %s is unsafe for allocation sinking\n",
3062 alloc->ToCString(), use->instruction()->ToCString());
3063 }
3064 return false;
3065 }
3066 }
3067
3068 return true;
3069}
3070
3071// If the given use is a store into an object then return an object we are
3072// storing into.
3073static Definition* StoreInto(Value* use) {
3074 StoreInstanceFieldInstr* store = use->instruction()->AsStoreInstanceField();
3075 if (store != NULL) {
3076 return store->instance()->definition();
3077 }
3078
3079 return NULL;
3080}
3081
3082// Remove the given allocation from the graph. It is not observable.
3083// If deoptimization occurs the object will be materialized.
3084void AllocationSinking::EliminateAllocation(Definition* alloc) {
3085 ASSERT(IsAllocationSinkingCandidate(alloc, kStrictCheck));
3086
3087 if (FLAG_trace_optimization) {
3088 THR_Print("removing allocation from the graph: v%" Pd "\n",
3089 alloc->ssa_temp_index());
3090 }
3091
3092 // As an allocation sinking candidate it is only used in stores to its own
3093 // fields. Remove these stores.
3094 for (Value* use = alloc->input_use_list(); use != NULL;
3095 use = alloc->input_use_list()) {
3096 use->instruction()->RemoveFromGraph();
3097 }
3098
3099// There should be no environment uses. The pass replaced them with
3100// MaterializeObject instructions.
3101#ifdef DEBUG
3102 for (Value* use = alloc->env_use_list(); use != NULL; use = use->next_use()) {
3103 ASSERT(use->instruction()->IsMaterializeObject());
3104 }
3105#endif
3106 ASSERT(alloc->input_use_list() == NULL);
3107 alloc->RemoveFromGraph();
3108 if (alloc->ArgumentCount() > 0) {
3109 ASSERT(alloc->ArgumentCount() == 1);
3110 ASSERT(!alloc->HasPushArguments());
3111 }
3112}
3113
3114// Find allocation instructions that can be potentially eliminated and
3115// rematerialized at deoptimization exits if needed. See IsSafeUse
3116// for the description of algorithm used below.
3117void AllocationSinking::CollectCandidates() {
3118 // Optimistically collect all potential candidates.
3119 for (BlockIterator block_it = flow_graph_->reverse_postorder_iterator();
3120 !block_it.Done(); block_it.Advance()) {
3121 BlockEntryInstr* block = block_it.Current();
3122 for (ForwardInstructionIterator it(block); !it.Done(); it.Advance()) {
3123 Instruction* current = it.Current();
3124 if (!IsSupportedAllocation(current)) {
3125 continue;
3126 }
3127
3128 Definition* alloc = current->Cast<Definition>();
3129 if (IsAllocationSinkingCandidate(alloc, kOptimisticCheck)) {
3130 alloc->SetIdentity(AliasIdentity::AllocationSinkingCandidate());
3131 candidates_.Add(alloc);
3132 }
3133 }
3134 }
3135
3136 // Transitively unmark all candidates that are not strictly valid.
3137 bool changed;
3138 do {
3139 changed = false;
3140 for (intptr_t i = 0; i < candidates_.length(); i++) {
3141 Definition* alloc = candidates_[i];
3142 if (alloc->Identity().IsAllocationSinkingCandidate()) {
3143 if (!IsAllocationSinkingCandidate(alloc, kStrictCheck)) {
3144 alloc->SetIdentity(AliasIdentity::Unknown());
3145 changed = true;
3146 }
3147 }
3148 }
3149 } while (changed);
3150
3151 // Shrink the list of candidates removing all unmarked ones.
3152 intptr_t j = 0;
3153 for (intptr_t i = 0; i < candidates_.length(); i++) {
3154 Definition* alloc = candidates_[i];
3155 if (alloc->Identity().IsAllocationSinkingCandidate()) {
3156 if (FLAG_trace_optimization) {
3157 THR_Print("discovered allocation sinking candidate: v%" Pd "\n",
3158 alloc->ssa_temp_index());
3159 }
3160
3161 if (j != i) {
3162 candidates_[j] = alloc;
3163 }
3164 j++;
3165 }
3166 }
3167 candidates_.TruncateTo(j);
3168}
3169
3170// If materialization references an allocation sinking candidate then replace
3171// this reference with a materialization which should have been computed for
3172// this side-exit. CollectAllExits should have collected this exit.
3173void AllocationSinking::NormalizeMaterializations() {
3174 for (intptr_t i = 0; i < candidates_.length(); i++) {
3175 Definition* alloc = candidates_[i];
3176
3177 Value* next_use;
3178 for (Value* use = alloc->input_use_list(); use != NULL; use = next_use) {
3179 next_use = use->next_use();
3180 if (use->instruction()->IsMaterializeObject()) {
3181 use->BindTo(MaterializationFor(alloc, use->instruction()));
3182 }
3183 }
3184 }
3185}
3186
3187// We transitively insert materializations at each deoptimization exit that
3188// might see the given allocation (see ExitsCollector). Some of this
3189// materializations are not actually used and some fail to compute because
3190// they are inserted in the block that is not dominated by the allocation.
3191// Remove them unused materializations from the graph.
3192void AllocationSinking::RemoveUnusedMaterializations() {
3193 intptr_t j = 0;
3194 for (intptr_t i = 0; i < materializations_.length(); i++) {
3195 MaterializeObjectInstr* mat = materializations_[i];
3196 if ((mat->input_use_list() == NULL) && (mat->env_use_list() == NULL)) {
3197 // Check if this materialization failed to compute and remove any
3198 // unforwarded loads. There were no loads from any allocation sinking
3199 // candidate in the beginning so it is safe to assume that any encountered
3200 // load was inserted by CreateMaterializationAt.
3201 for (intptr_t i = 0; i < mat->InputCount(); i++) {
3202 LoadFieldInstr* load = mat->InputAt(i)->definition()->AsLoadField();
3203 if ((load != NULL) &&
3204 (load->instance()->definition() == mat->allocation())) {
3205 load->ReplaceUsesWith(flow_graph_->constant_null());
3206 load->RemoveFromGraph();
3207 }
3208 }
3209 mat->RemoveFromGraph();
3210 } else {
3211 if (j != i) {
3212 materializations_[j] = mat;
3213 }
3214 j++;
3215 }
3216 }
3217 materializations_.TruncateTo(j);
3218}
3219
3220// Some candidates might stop being eligible for allocation sinking after
3221// the load forwarding because they flow into phis that load forwarding
3222// inserts. Discover such allocations and remove them from the list
3223// of allocation sinking candidates undoing all changes that we did
3224// in preparation for sinking these allocations.
3225void AllocationSinking::DiscoverFailedCandidates() {
3226 // Transitively unmark all candidates that are not strictly valid.
3227 bool changed;
3228 do {
3229 changed = false;
3230 for (intptr_t i = 0; i < candidates_.length(); i++) {
3231 Definition* alloc = candidates_[i];
3232 if (alloc->Identity().IsAllocationSinkingCandidate()) {
3233 if (!IsAllocationSinkingCandidate(alloc, kStrictCheck)) {
3234 alloc->SetIdentity(AliasIdentity::Unknown());
3235 changed = true;
3236 }
3237 }
3238 }
3239 } while (changed);
3240
3241 // Remove all failed candidates from the candidates list.
3242 intptr_t j = 0;
3243 for (intptr_t i = 0; i < candidates_.length(); i++) {
3244 Definition* alloc = candidates_[i];
3245 if (!alloc->Identity().IsAllocationSinkingCandidate()) {
3246 if (FLAG_trace_optimization) {
3247 THR_Print("allocation v%" Pd " can't be eliminated\n",
3248 alloc->ssa_temp_index());
3249 }
3250
3251#ifdef DEBUG
3252 for (Value* use = alloc->env_use_list(); use != NULL;
3253 use = use->next_use()) {
3254 ASSERT(use->instruction()->IsMaterializeObject());
3255 }
3256#endif
3257
3258 // All materializations will be removed from the graph. Remove inserted
3259 // loads first and detach materializations from allocation's environment
3260 // use list: we will reconstruct it when we start removing
3261 // materializations.
3262 alloc->set_env_use_list(NULL);
3263 for (Value* use = alloc->input_use_list(); use != NULL;
3264 use = use->next_use()) {
3265 if (use->instruction()->IsLoadField()) {
3266 LoadFieldInstr* load = use->instruction()->AsLoadField();
3267 load->ReplaceUsesWith(flow_graph_->constant_null());
3268 load->RemoveFromGraph();
3269 } else {
3270 ASSERT(use->instruction()->IsMaterializeObject() ||
3271 use->instruction()->IsPhi() ||
3272 use->instruction()->IsStoreInstanceField());
3273 }
3274 }
3275 } else {
3276 if (j != i) {
3277 candidates_[j] = alloc;
3278 }
3279 j++;
3280 }
3281 }
3282
3283 if (j != candidates_.length()) { // Something was removed from candidates.
3284 intptr_t k = 0;
3285 for (intptr_t i = 0; i < materializations_.length(); i++) {
3286 MaterializeObjectInstr* mat = materializations_[i];
3287 if (!mat->allocation()->Identity().IsAllocationSinkingCandidate()) {
3288 // Restore environment uses of the allocation that were replaced
3289 // by this materialization and drop materialization.
3290 mat->ReplaceUsesWith(mat->allocation());
3291 mat->RemoveFromGraph();
3292 } else {
3293 if (k != i) {
3294 materializations_[k] = mat;
3295 }
3296 k++;
3297 }
3298 }
3299 materializations_.TruncateTo(k);
3300 }
3301
3302 candidates_.TruncateTo(j);
3303}
3304
3305void AllocationSinking::Optimize() {
3306 CollectCandidates();
3307
3308 // Insert MaterializeObject instructions that will describe the state of the
3309 // object at all deoptimization points. Each inserted materialization looks
3310 // like this (where v_0 is allocation that we are going to eliminate):
3311 // v_1 <- LoadField(v_0, field_1)
3312 // ...
3313 // v_N <- LoadField(v_0, field_N)
3314 // v_{N+1} <- MaterializeObject(field_1 = v_1, ..., field_N = v_{N})
3315 for (intptr_t i = 0; i < candidates_.length(); i++) {
3316 InsertMaterializations(candidates_[i]);
3317 }
3318
3319 // Run load forwarding to eliminate LoadField instructions inserted above.
3320 // All loads will be successfully eliminated because:
3321 // a) they use fields (not offsets) and thus provide precise aliasing
3322 // information
3323 // b) candidate does not escape and thus its fields is not affected by
3324 // external effects from calls.
3325 LoadOptimizer::OptimizeGraph(flow_graph_);
3326
3327 NormalizeMaterializations();
3328
3329 RemoveUnusedMaterializations();
3330
3331 // If any candidates are no longer eligible for allocation sinking abort
3332 // the optimization for them and undo any changes we did in preparation.
3333 DiscoverFailedCandidates();
3334
3335 // At this point we have computed the state of object at each deoptimization
3336 // point and we can eliminate it. Loads inserted above were forwarded so there
3337 // are no uses of the allocation just as in the begging of the pass.
3338 for (intptr_t i = 0; i < candidates_.length(); i++) {
3339 EliminateAllocation(candidates_[i]);
3340 }
3341
3342 // Process materializations and unbox their arguments: materializations
3343 // are part of the environment and can materialize boxes for double/mint/simd
3344 // values when needed.
3345 // TODO(vegorov): handle all box types here.
3346 for (intptr_t i = 0; i < materializations_.length(); i++) {
3347 MaterializeObjectInstr* mat = materializations_[i];
3348 for (intptr_t j = 0; j < mat->InputCount(); j++) {
3349 Definition* defn = mat->InputAt(j)->definition();
3350 if (defn->IsBox()) {
3351 mat->InputAt(j)->BindTo(defn->InputAt(0)->definition());
3352 }
3353 }
3354 }
3355}
3356
3357// Remove materializations from the graph. Register allocator will treat them
3358// as part of the environment not as a real instruction.
3359void AllocationSinking::DetachMaterializations() {
3360 for (intptr_t i = 0; i < materializations_.length(); i++) {
3361 materializations_[i]->previous()->LinkTo(materializations_[i]->next());
3362 }
3363}
3364
3365// Add a field/offset to the list of fields if it is not yet present there.
3366static bool AddSlot(ZoneGrowableArray<const Slot*>* slots, const Slot& slot) {
3367 for (auto s : *slots) {
3368 if (s == &slot) {
3369 return false;
3370 }
3371 }
3372 slots->Add(&slot);
3373 return true;
3374}
3375
3376// Find deoptimization exit for the given materialization assuming that all
3377// materializations are emitted right before the instruction which is a
3378// deoptimization exit.
3379static Instruction* ExitForMaterialization(MaterializeObjectInstr* mat) {
3380 while (mat->next()->IsMaterializeObject()) {
3381 mat = mat->next()->AsMaterializeObject();
3382 }
3383 return mat->next();
3384}
3385
3386// Given the deoptimization exit find first materialization that was inserted
3387// before it.
3388static Instruction* FirstMaterializationAt(Instruction* exit) {
3389 while (exit->previous()->IsMaterializeObject()) {
3390 exit = exit->previous();
3391 }
3392 return exit;
3393}
3394
3395// Given the allocation and deoptimization exit try to find MaterializeObject
3396// instruction corresponding to this allocation at this exit.
3397MaterializeObjectInstr* AllocationSinking::MaterializationFor(
3398 Definition* alloc,
3399 Instruction* exit) {
3400 if (exit->IsMaterializeObject()) {
3401 exit = ExitForMaterialization(exit->AsMaterializeObject());
3402 }
3403
3404 for (MaterializeObjectInstr* mat = exit->previous()->AsMaterializeObject();
3405 mat != NULL; mat = mat->previous()->AsMaterializeObject()) {
3406 if (mat->allocation() == alloc) {
3407 return mat;
3408 }
3409 }
3410
3411 return NULL;
3412}
3413
3414// Insert MaterializeObject instruction for the given allocation before
3415// the given instruction that can deoptimize.
3416void AllocationSinking::CreateMaterializationAt(
3417 Instruction* exit,
3418 Definition* alloc,
3419 const ZoneGrowableArray<const Slot*>& slots) {
3420 ZoneGrowableArray<Value*>* values =
3421 new (Z) ZoneGrowableArray<Value*>(slots.length());
3422
3423 // All loads should be inserted before the first materialization so that
3424 // IR follows the following pattern: loads, materializations, deoptimizing
3425 // instruction.
3426 Instruction* load_point = FirstMaterializationAt(exit);
3427
3428 // Insert load instruction for every field.
3429 for (auto slot : slots) {
3430 LoadFieldInstr* load =
3431 new (Z) LoadFieldInstr(new (Z) Value(alloc), *slot, alloc->token_pos());
3432 flow_graph_->InsertBefore(load_point, load, nullptr, FlowGraph::kValue);
3433 values->Add(new (Z) Value(load));
3434 }
3435
3436 MaterializeObjectInstr* mat = nullptr;
3437 if (alloc->IsAllocateObject()) {
3438 mat = new (Z)
3439 MaterializeObjectInstr(alloc->AsAllocateObject(), slots, values);
3440 } else {
3441 ASSERT(alloc->IsAllocateUninitializedContext());
3442 mat = new (Z) MaterializeObjectInstr(
3443 alloc->AsAllocateUninitializedContext(), slots, values);
3444 }
3445
3446 flow_graph_->InsertBefore(exit, mat, nullptr, FlowGraph::kValue);
3447
3448 // Replace all mentions of this allocation with a newly inserted
3449 // MaterializeObject instruction.
3450 // We must preserve the identity: all mentions are replaced by the same
3451 // materialization.
3452 exit->ReplaceInEnvironment(alloc, mat);
3453
3454 // Mark MaterializeObject as an environment use of this allocation.
3455 // This will allow us to discover it when we are looking for deoptimization
3456 // exits for another allocation that potentially flows into this one.
3457 Value* val = new (Z) Value(alloc);
3458 val->set_instruction(mat);
3459 alloc->AddEnvUse(val);
3460
3461 // Record inserted materialization.
3462 materializations_.Add(mat);
3463}
3464
3465// Add given instruction to the list of the instructions if it is not yet
3466// present there.
3467template <typename T>
3468void AddInstruction(GrowableArray<T*>* list, T* value) {
3469 ASSERT(!value->IsGraphEntry() && !value->IsFunctionEntry());
3470 for (intptr_t i = 0; i < list->length(); i++) {
3471 if ((*list)[i] == value) {
3472 return;
3473 }
3474 }
3475 list->Add(value);
3476}
3477
3478// Transitively collect all deoptimization exits that might need this allocation
3479// rematerialized. It is not enough to collect only environment uses of this
3480// allocation because it can flow into other objects that will be
3481// dematerialized and that are referenced by deopt environments that
3482// don't contain this allocation explicitly.
3483void AllocationSinking::ExitsCollector::Collect(Definition* alloc) {
3484 for (Value* use = alloc->env_use_list(); use != NULL; use = use->next_use()) {
3485 if (use->instruction()->IsMaterializeObject()) {
3486 AddInstruction(&exits_, ExitForMaterialization(
3487 use->instruction()->AsMaterializeObject()));
3488 } else {
3489 AddInstruction(&exits_, use->instruction());
3490 }
3491 }
3492
3493 // Check if this allocation is stored into any other allocation sinking
3494 // candidate and put it on worklist so that we conservatively collect all
3495 // exits for that candidate as well because they potentially might see
3496 // this object.
3497 for (Value* use = alloc->input_use_list(); use != NULL;
3498 use = use->next_use()) {
3499 Definition* obj = StoreInto(use);
3500 if ((obj != NULL) && (obj != alloc)) {
3501 AddInstruction(&worklist_, obj);
3502 }
3503 }
3504}
3505
3506void AllocationSinking::ExitsCollector::CollectTransitively(Definition* alloc) {
3507 exits_.TruncateTo(0);
3508 worklist_.TruncateTo(0);
3509
3510 worklist_.Add(alloc);
3511
3512 // Note: worklist potentially will grow while we are iterating over it.
3513 // We are not removing allocations from the worklist not to waste space on
3514 // the side maintaining BitVector of already processed allocations: worklist
3515 // is expected to be very small thus linear search in it is just as efficient
3516 // as a bitvector.
3517 for (intptr_t i = 0; i < worklist_.length(); i++) {
3518 Collect(worklist_[i]);
3519 }
3520}
3521
3522void AllocationSinking::InsertMaterializations(Definition* alloc) {
3523 // Collect all fields that are written for this instance.
3524 auto slots = new (Z) ZoneGrowableArray<const Slot*>(5);
3525
3526 for (Value* use = alloc->input_use_list(); use != NULL;
3527 use = use->next_use()) {
3528 StoreInstanceFieldInstr* store = use->instruction()->AsStoreInstanceField();
3529 if ((store != NULL) && (store->instance()->definition() == alloc)) {
3530 AddSlot(slots, store->slot());
3531 }
3532 }
3533
3534 if (auto alloc_object = alloc->AsAllocateObject()) {
3535 if (alloc_object->type_arguments() != nullptr) {
3536 AddSlot(slots, Slot::GetTypeArgumentsSlotFor(flow_graph_->thread(),
3537 alloc_object->cls()));
3538 }
3539 }
3540
3541 // Collect all instructions that mention this object in the environment.
3542 exits_collector_.CollectTransitively(alloc);
3543
3544 // Insert materializations at environment uses.
3545 for (intptr_t i = 0; i < exits_collector_.exits().length(); i++) {
3546 CreateMaterializationAt(exits_collector_.exits()[i], alloc, *slots);
3547 }
3548}
3549
3550// TryCatchAnalyzer tries to reduce the state that needs to be synchronized
3551// on entry to the catch by discovering Parameter-s which are never used
3552// or which are always constant.
3553//
3554// This analysis is similar to dead/redundant phi elimination because
3555// Parameter instructions serve as "implicit" phis.
3556//
3557// Caveat: when analyzing which Parameter-s are redundant we limit ourselves to
3558// constant values because CatchBlockEntry-s are hanging out directly from
3559// GraphEntry and thus they are only dominated by constants from GraphEntry -
3560// thus we can't replace Parameter with arbitrary Definition which is not a
3561// Constant even if we know that this Parameter is redundant and would always
3562// evaluate to that Definition.
3563class TryCatchAnalyzer : public ValueObject {
3564 public:
3565 explicit TryCatchAnalyzer(FlowGraph* flow_graph, bool is_aot)
3566 : flow_graph_(flow_graph),
3567 is_aot_(is_aot),
3568 // Initial capacity is selected based on trivial examples.
3569 worklist_(flow_graph, /*initial_capacity=*/10) {}
3570
3571 // Run analysis and eliminate dead/redundant Parameter-s.
3572 void Optimize();
3573
3574 private:
3575 // In precompiled mode we can eliminate all parameters that have no real uses
3576 // and subsequently clear out corresponding slots in the environments assigned
3577 // to instructions that can throw an exception which would be caught by
3578 // the corresponding CatchEntryBlock.
3579 //
3580 // Computing "dead" parameters is essentially a fixed point algorithm because
3581 // Parameter value can flow into another Parameter via an environment attached
3582 // to an instruction that can throw.
3583 //
3584 // Note: this optimization pass assumes that environment values are only
3585 // used during catching, that is why it should only be used in AOT mode.
3586 void OptimizeDeadParameters() {
3587 ASSERT(is_aot_);
3588
3589 NumberCatchEntryParameters();
3590 ComputeIncomingValues();
3591 CollectAliveParametersOrPhis();
3592 PropagateLivenessToInputs();
3593 EliminateDeadParameters();
3594 }
3595
3596 static intptr_t GetParameterId(const Instruction* instr) {
3597 return instr->GetPassSpecificId(CompilerPass::kTryCatchOptimization);
3598 }
3599
3600 static void SetParameterId(Instruction* instr, intptr_t id) {
3601 instr->SetPassSpecificId(CompilerPass::kTryCatchOptimization, id);
3602 }
3603
3604 static bool HasParameterId(Instruction* instr) {
3605 return instr->HasPassSpecificId(CompilerPass::kTryCatchOptimization);
3606 }
3607
3608 // Assign sequential ids to each ParameterInstr in each CatchEntryBlock.
3609 // Collect reverse mapping from try indexes to corresponding catches.
3610 void NumberCatchEntryParameters() {
3611 for (auto catch_entry : flow_graph_->graph_entry()->catch_entries()) {
3612 const GrowableArray<Definition*>& idefs =
3613 *catch_entry->initial_definitions();
3614 for (auto idef : idefs) {
3615 if (idef->IsParameter()) {
3616 SetParameterId(idef, parameter_info_.length());
3617 parameter_info_.Add(new ParameterInfo(idef->AsParameter()));
3618 }
3619 }
3620
3621 catch_by_index_.EnsureLength(catch_entry->catch_try_index() + 1, nullptr);
3622 catch_by_index_[catch_entry->catch_try_index()] = catch_entry;
3623 }
3624 }
3625
3626 // Compute potential incoming values for each Parameter in each catch block
3627 // by looking into environments assigned to MayThrow instructions within
3628 // blocks covered by the corresponding catch.
3629 void ComputeIncomingValues() {
3630 for (auto block : flow_graph_->reverse_postorder()) {
3631 if (block->try_index() == kInvalidTryIndex) {
3632 continue;
3633 }
3634
3635 ASSERT(block->try_index() < catch_by_index_.length());
3636 auto catch_entry = catch_by_index_[block->try_index()];
3637 const auto& idefs = *catch_entry->initial_definitions();
3638
3639 for (ForwardInstructionIterator instr_it(block); !instr_it.Done();
3640 instr_it.Advance()) {
3641 Instruction* current = instr_it.Current();
3642 if (!current->MayThrow()) continue;
3643
3644 Environment* env = current->env()->Outermost();
3645 ASSERT(env != nullptr);
3646
3647 for (intptr_t env_idx = 0; env_idx < idefs.length(); ++env_idx) {
3648 if (ParameterInstr* param = idefs[env_idx]->AsParameter()) {
3649 Definition* defn = env->ValueAt(env_idx)->definition();
3650
3651 // Add defn as an incoming value to the parameter if it is not
3652 // already present in the list.
3653 bool found = false;
3654 for (auto other_defn :
3655 parameter_info_[GetParameterId(param)]->incoming) {
3656 if (other_defn == defn) {
3657 found = true;
3658 break;
3659 }
3660 }
3661 if (!found) {
3662 parameter_info_[GetParameterId(param)]->incoming.Add(defn);
3663 }
3664 }
3665 }
3666 }
3667 }
3668 }
3669
3670 // Find all parameters (and phis) that are definitely alive - because they
3671 // have non-phi uses and place them into worklist.
3672 //
3673 // Note: phis that only have phi (and environment) uses would be marked as
3674 // dead.
3675 void CollectAliveParametersOrPhis() {
3676 for (auto block : flow_graph_->reverse_postorder()) {
3677 if (JoinEntryInstr* join = block->AsJoinEntry()) {
3678 if (join->phis() == nullptr) continue;
3679
3680 for (auto phi : *join->phis()) {
3681 phi->mark_dead();
3682 if (HasNonPhiUse(phi)) {
3683 MarkLive(phi);
3684 }
3685 }
3686 }
3687 }
3688
3689 for (auto info : parameter_info_) {
3690 if (HasNonPhiUse(info->instr)) {
3691 MarkLive(info->instr);
3692 }
3693 }
3694 }
3695
3696 // Propagate liveness from live parameters and phis to other parameters and
3697 // phis transitively.
3698 void PropagateLivenessToInputs() {
3699 while (!worklist_.IsEmpty()) {
3700 Definition* defn = worklist_.RemoveLast();
3701 if (ParameterInstr* param = defn->AsParameter()) {
3702 auto s = parameter_info_[GetParameterId(param)];
3703 for (auto input : s->incoming) {
3704 MarkLive(input);
3705 }
3706 } else if (PhiInstr* phi = defn->AsPhi()) {
3707 for (intptr_t i = 0; i < phi->InputCount(); i++) {
3708 MarkLive(phi->InputAt(i)->definition());
3709 }
3710 }
3711 }
3712 }
3713
3714 // Mark definition as live if it is a dead Phi or a dead Parameter and place
3715 // them into worklist.
3716 void MarkLive(Definition* defn) {
3717 if (PhiInstr* phi = defn->AsPhi()) {
3718 if (!phi->is_alive()) {
3719 phi->mark_alive();
3720 worklist_.Add(phi);
3721 }
3722 } else if (ParameterInstr* param = defn->AsParameter()) {
3723 if (HasParameterId(param)) {
3724 auto input_s = parameter_info_[GetParameterId(param)];
3725 if (!input_s->alive) {
3726 input_s->alive = true;
3727 worklist_.Add(param);
3728 }
3729 }
3730 }
3731 }
3732
3733 // Replace all dead parameters with null value and clear corresponding
3734 // slots in environments.
3735 void EliminateDeadParameters() {
3736 for (auto info : parameter_info_) {
3737 if (!info->alive) {
3738 info->instr->ReplaceUsesWith(flow_graph_->constant_null());
3739 }
3740 }
3741
3742 for (auto block : flow_graph_->reverse_postorder()) {
3743 if (block->try_index() == -1) continue;
3744
3745 auto catch_entry = catch_by_index_[block->try_index()];
3746 const auto& idefs = *catch_entry->initial_definitions();
3747
3748 for (ForwardInstructionIterator instr_it(block); !instr_it.Done();
3749 instr_it.Advance()) {
3750 Instruction* current = instr_it.Current();
3751 if (!current->MayThrow()) continue;
3752
3753 Environment* env = current->env()->Outermost();
3754 RELEASE_ASSERT(env != nullptr);
3755
3756 for (intptr_t env_idx = 0; env_idx < idefs.length(); ++env_idx) {
3757 if (ParameterInstr* param = idefs[env_idx]->AsParameter()) {
3758 if (!parameter_info_[GetParameterId(param)]->alive) {
3759 env->ValueAt(env_idx)->BindToEnvironment(
3760 flow_graph_->constant_null());
3761 }
3762 }
3763 }
3764 }
3765 }
3766
3767 DeadCodeElimination::RemoveDeadAndRedundantPhisFromTheGraph(flow_graph_);
3768 }
3769
3770 // Returns true if definition has a use in an instruction which is not a phi.
3771 static bool HasNonPhiUse(Definition* defn) {
3772 for (Value* use = defn->input_use_list(); use != nullptr;
3773 use = use->next_use()) {
3774 if (!use->instruction()->IsPhi()) {
3775 return true;
3776 }
3777 }
3778 return false;
3779 }
3780
3781 struct ParameterInfo : public ZoneAllocated {
3782 explicit ParameterInfo(ParameterInstr* instr) : instr(instr) {}
3783
3784 ParameterInstr* instr;
3785 bool alive = false;
3786 GrowableArray<Definition*> incoming;
3787 };
3788
3789 FlowGraph* const flow_graph_;
3790 const bool is_aot_;
3791
3792 // Additional information for each Parameter from each CatchBlockEntry.
3793 // Parameter-s are numbered and their number is stored in
3794 // Instruction::place_id() field which is otherwise not used for anything
3795 // at this stage.
3796 GrowableArray<ParameterInfo*> parameter_info_;
3797
3798 // Mapping from catch_try_index to corresponding CatchBlockEntry-s.
3799 GrowableArray<CatchBlockEntryInstr*> catch_by_index_;
3800
3801 // Worklist for live Phi and Parameter instructions which need to be
3802 // processed by PropagateLivenessToInputs.
3803 DefinitionWorklist worklist_;
3804};
3805
3806void OptimizeCatchEntryStates(FlowGraph* flow_graph, bool is_aot) {
3807 if (flow_graph->graph_entry()->catch_entries().is_empty()) {
3808 return;
3809 }
3810
3811 TryCatchAnalyzer analyzer(flow_graph, is_aot);
3812 analyzer.Optimize();
3813}
3814
3815void TryCatchAnalyzer::Optimize() {
3816 // Analyze catch entries and remove "dead" Parameter instructions.
3817 if (is_aot_) {
3818 OptimizeDeadParameters();
3819 }
3820
3821 // For every catch-block: Iterate over all call instructions inside the
3822 // corresponding try-block and figure out for each environment value if it
3823 // is the same constant at all calls. If yes, replace the initial definition
3824 // at the catch-entry with this constant.
3825 const GrowableArray<CatchBlockEntryInstr*>& catch_entries =
3826 flow_graph_->graph_entry()->catch_entries();
3827
3828 for (auto catch_entry : catch_entries) {
3829 // Initialize cdefs with the original initial definitions (ParameterInstr).
3830 // The following representation is used:
3831 // ParameterInstr => unknown
3832 // ConstantInstr => known constant
3833 // NULL => non-constant
3834 GrowableArray<Definition*>* idefs = catch_entry->initial_definitions();
3835 GrowableArray<Definition*> cdefs(idefs->length());
3836 cdefs.AddArray(*idefs);
3837
3838 // exception_var and stacktrace_var are never constant. In asynchronous or
3839 // generator functions they may be context-allocated in which case they are
3840 // not tracked in the environment anyway.
3841
3842 cdefs[flow_graph_->EnvIndex(catch_entry->raw_exception_var())] = nullptr;
3843 cdefs[flow_graph_->EnvIndex(catch_entry->raw_stacktrace_var())] = nullptr;
3844
3845 for (BlockIterator block_it = flow_graph_->reverse_postorder_iterator();
3846 !block_it.Done(); block_it.Advance()) {
3847 BlockEntryInstr* block = block_it.Current();
3848 if (block->try_index() == catch_entry->catch_try_index()) {
3849 for (ForwardInstructionIterator instr_it(block); !instr_it.Done();
3850 instr_it.Advance()) {
3851 Instruction* current = instr_it.Current();
3852 if (current->MayThrow()) {
3853 Environment* env = current->env()->Outermost();
3854 ASSERT(env != nullptr);
3855 for (intptr_t env_idx = 0; env_idx < cdefs.length(); ++env_idx) {
3856 if (cdefs[env_idx] != nullptr && !cdefs[env_idx]->IsConstant() &&
3857 env->ValueAt(env_idx)->BindsToConstant()) {
3858 // If the recorded definition is not a constant, record this
3859 // definition as the current constant definition.
3860 cdefs[env_idx] = env->ValueAt(env_idx)->definition();
3861 }
3862 if (cdefs[env_idx] != env->ValueAt(env_idx)->definition()) {
3863 // Non-constant definitions are reset to nullptr.
3864 cdefs[env_idx] = nullptr;
3865 }
3866 }
3867 }
3868 }
3869 }
3870 }
3871 for (intptr_t j = 0; j < idefs->length(); ++j) {
3872 if (cdefs[j] != nullptr && cdefs[j]->IsConstant()) {
3873 Definition* old = (*idefs)[j];
3874 ConstantInstr* orig = cdefs[j]->AsConstant();
3875 ConstantInstr* copy =
3876 new (flow_graph_->zone()) ConstantInstr(orig->value());
3877 copy->set_ssa_temp_index(flow_graph_->alloc_ssa_temp_index());
3878 if (FlowGraph::NeedsPairLocation(copy->representation())) {
3879 flow_graph_->alloc_ssa_temp_index();
3880 }
3881 old->ReplaceUsesWith(copy);
3882 copy->set_previous(old->previous()); // partial link
3883 (*idefs)[j] = copy;
3884 }
3885 }
3886 }
3887}
3888
3889// Returns true iff this definition is used in a non-phi instruction.
3890static bool HasRealUse(Definition* def) {
3891 // Environment uses are real (non-phi) uses.
3892 if (def->env_use_list() != NULL) return true;
3893
3894 for (Value::Iterator it(def->input_use_list()); !it.Done(); it.Advance()) {
3895 if (!it.Current()->instruction()->IsPhi()) return true;
3896 }
3897 return false;
3898}
3899
3900void DeadCodeElimination::EliminateDeadPhis(FlowGraph* flow_graph) {
3901 GrowableArray<PhiInstr*> live_phis;
3902 for (BlockIterator b = flow_graph->postorder_iterator(); !b.Done();
3903 b.Advance()) {
3904 JoinEntryInstr* join = b.Current()->AsJoinEntry();
3905 if (join != NULL) {
3906 for (PhiIterator it(join); !it.Done(); it.Advance()) {
3907 PhiInstr* phi = it.Current();
3908 // Phis that have uses and phis inside try blocks are
3909 // marked as live.
3910 if (HasRealUse(phi)) {
3911 live_phis.Add(phi);
3912 phi->mark_alive();
3913 } else {
3914 phi->mark_dead();
3915 }
3916 }
3917 }
3918 }
3919
3920 while (!live_phis.is_empty()) {
3921 PhiInstr* phi = live_phis.RemoveLast();
3922 for (intptr_t i = 0; i < phi->InputCount(); i++) {
3923 Value* val = phi->InputAt(i);
3924 PhiInstr* used_phi = val->definition()->AsPhi();
3925 if ((used_phi != NULL) && !used_phi->is_alive()) {
3926 used_phi->mark_alive();
3927 live_phis.Add(used_phi);
3928 }
3929 }
3930 }
3931
3932 RemoveDeadAndRedundantPhisFromTheGraph(flow_graph);
3933}
3934
3935void DeadCodeElimination::RemoveDeadAndRedundantPhisFromTheGraph(
3936 FlowGraph* flow_graph) {
3937 for (auto block : flow_graph->postorder()) {
3938 if (JoinEntryInstr* join = block->AsJoinEntry()) {
3939 if (join->phis_ == nullptr) continue;
3940
3941 // Eliminate dead phis and compact the phis_ array of the block.
3942 intptr_t to_index = 0;
3943 for (intptr_t i = 0; i < join->phis_->length(); ++i) {
3944 PhiInstr* phi = (*join->phis_)[i];
3945 if (phi != nullptr) {
3946 if (!phi->is_alive()) {
3947 phi->ReplaceUsesWith(flow_graph->constant_null());
3948 phi->UnuseAllInputs();
3949 (*join->phis_)[i] = nullptr;
3950 if (FLAG_trace_optimization) {
3951 THR_Print("Removing dead phi v%" Pd "\n", phi->ssa_temp_index());
3952 }
3953 } else {
3954 (*join->phis_)[to_index++] = phi;
3955 }
3956 }
3957 }
3958 if (to_index == 0) {
3959 join->phis_ = nullptr;
3960 } else {
3961 join->phis_->TruncateTo(to_index);
3962 }
3963 }
3964 }
3965}
3966
3967// Returns true if [current] instruction can be possibly eliminated
3968// (if its result is not used).
3969static bool CanEliminateInstruction(Instruction* current,
3970 BlockEntryInstr* block) {
3971 ASSERT(current->GetBlock() == block);
3972 if (MayHaveVisibleEffect(current) || current->CanDeoptimize() ||
3973 current == block->last_instruction() || current->IsMaterializeObject() ||
3974 current->IsCheckStackOverflow() || current->IsReachabilityFence() ||
3975 current->IsEnterHandleScope() || current->IsExitHandleScope() ||
3976 current->IsRawStoreField()) {
3977 return false;
3978 }
3979 return true;
3980}
3981
3982void DeadCodeElimination::EliminateDeadCode(FlowGraph* flow_graph) {
3983 GrowableArray<Instruction*> worklist;
3984 BitVector live(flow_graph->zone(), flow_graph->current_ssa_temp_index());
3985
3986 // Mark all instructions with side-effects as live.
3987 for (BlockIterator block_it = flow_graph->reverse_postorder_iterator();
3988 !block_it.Done(); block_it.Advance()) {
3989 BlockEntryInstr* block = block_it.Current();
3990 for (ForwardInstructionIterator it(block); !it.Done(); it.Advance()) {
3991 Instruction* current = it.Current();
3992 ASSERT(!current->IsPushArgument());
3993 // TODO(alexmarkov): take control dependencies into account and
3994 // eliminate dead branches/conditions.
3995 if (!CanEliminateInstruction(current, block)) {
3996 worklist.Add(current);
3997 if (Definition* def = current->AsDefinition()) {
3998 if (def->HasSSATemp()) {
3999 live.Add(def->ssa_temp_index());
4000 }
4001 }
4002 }
4003 }
4004 }
4005
4006 // Iteratively follow inputs of instructions in the work list.
4007 while (!worklist.is_empty()) {
4008 Instruction* current = worklist.RemoveLast();
4009 for (intptr_t i = 0, n = current->InputCount(); i < n; ++i) {
4010 Definition* input = current->InputAt(i)->definition();
4011 ASSERT(input->HasSSATemp());
4012 if (!live.Contains(input->ssa_temp_index())) {
4013 worklist.Add(input);
4014 live.Add(input->ssa_temp_index());
4015 }
4016 }
4017 for (intptr_t i = 0, n = current->ArgumentCount(); i < n; ++i) {
4018 Definition* input = current->ArgumentAt(i);
4019 ASSERT(input->HasSSATemp());
4020 if (!live.Contains(input->ssa_temp_index())) {
4021 worklist.Add(input);
4022 live.Add(input->ssa_temp_index());
4023 }
4024 }
4025 if (current->env() != nullptr) {
4026 for (Environment::DeepIterator it(current->env()); !it.Done();
4027 it.Advance()) {
4028 Definition* input = it.CurrentValue()->definition();
4029 ASSERT(!input->IsPushArgument());
4030 if (input->HasSSATemp() && !live.Contains(input->ssa_temp_index())) {
4031 worklist.Add(input);
4032 live.Add(input->ssa_temp_index());
4033 }
4034 }
4035 }
4036 }
4037
4038 // Remove all instructions which are not marked as live.
4039 for (BlockIterator block_it = flow_graph->reverse_postorder_iterator();
4040 !block_it.Done(); block_it.Advance()) {
4041 BlockEntryInstr* block = block_it.Current();
4042 if (JoinEntryInstr* join = block->AsJoinEntry()) {
4043 for (PhiIterator it(join); !it.Done(); it.Advance()) {
4044 PhiInstr* current = it.Current();
4045 if (!live.Contains(current->ssa_temp_index())) {
4046 it.RemoveCurrentFromGraph();
4047 }
4048 }
4049 }
4050 for (ForwardInstructionIterator it(block); !it.Done(); it.Advance()) {
4051 Instruction* current = it.Current();
4052 if (!CanEliminateInstruction(current, block)) {
4053 continue;
4054 }
4055 ASSERT(!current->IsPushArgument());
4056 ASSERT((current->ArgumentCount() == 0) || !current->HasPushArguments());
4057 if (Definition* def = current->AsDefinition()) {
4058 if (def->HasSSATemp() && live.Contains(def->ssa_temp_index())) {
4059 continue;
4060 }
4061 }
4062 it.RemoveCurrentFromGraph();
4063 }
4064 }
4065}
4066
4067void CheckStackOverflowElimination::EliminateStackOverflow(FlowGraph* graph) {
4068 CheckStackOverflowInstr* first_stack_overflow_instr = NULL;
4069 for (BlockIterator block_it = graph->reverse_postorder_iterator();
4070 !block_it.Done(); block_it.Advance()) {
4071 BlockEntryInstr* entry = block_it.Current();
4072
4073 for (ForwardInstructionIterator it(entry); !it.Done(); it.Advance()) {
4074 Instruction* current = it.Current();
4075
4076 if (CheckStackOverflowInstr* instr = current->AsCheckStackOverflow()) {
4077 if (first_stack_overflow_instr == NULL) {
4078 first_stack_overflow_instr = instr;
4079 ASSERT(!first_stack_overflow_instr->in_loop());
4080 }
4081 continue;
4082 }
4083
4084 if (current->IsBranch()) {
4085 current = current->AsBranch()->comparison();
4086 }
4087
4088 if (current->HasUnknownSideEffects()) {
4089 return;
4090 }
4091 }
4092 }
4093
4094 if (first_stack_overflow_instr != NULL) {
4095 first_stack_overflow_instr->RemoveFromGraph();
4096 }
4097}
4098
4099} // namespace dart
4100