1 | // Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file |
2 | // for details. All rights reserved. Use of this source code is governed by a |
3 | // BSD-style license that can be found in the LICENSE file. |
4 | |
5 | #include "vm/compiler/backend/redundancy_elimination.h" |
6 | |
7 | #include "vm/bit_vector.h" |
8 | #include "vm/compiler/backend/flow_graph.h" |
9 | #include "vm/compiler/backend/il.h" |
10 | #include "vm/compiler/backend/il_printer.h" |
11 | #include "vm/compiler/backend/loops.h" |
12 | #include "vm/hash_map.h" |
13 | #include "vm/stack_frame.h" |
14 | |
15 | namespace dart { |
16 | |
17 | DEFINE_FLAG(bool, dead_store_elimination, true, "Eliminate dead stores" ); |
18 | DEFINE_FLAG(bool, load_cse, true, "Use redundant load elimination." ); |
19 | DEFINE_FLAG(bool, |
20 | optimize_lazy_initializer_calls, |
21 | true, |
22 | "Eliminate redundant lazy initializer calls." ); |
23 | DEFINE_FLAG(bool, |
24 | trace_load_optimization, |
25 | false, |
26 | "Print live sets for load optimization pass." ); |
27 | |
28 | // Quick access to the current zone. |
29 | #define Z (zone()) |
30 | |
31 | class CSEInstructionMap : public ValueObject { |
32 | public: |
33 | CSEInstructionMap() : map_() {} |
34 | explicit CSEInstructionMap(const CSEInstructionMap& other) |
35 | : ValueObject(), map_(other.map_) {} |
36 | |
37 | Instruction* Lookup(Instruction* other) const { |
38 | ASSERT(other->AllowsCSE()); |
39 | return map_.LookupValue(other); |
40 | } |
41 | |
42 | void Insert(Instruction* instr) { |
43 | ASSERT(instr->AllowsCSE()); |
44 | return map_.Insert(instr); |
45 | } |
46 | |
47 | private: |
48 | typedef DirectChainedHashMap<PointerKeyValueTrait<Instruction> > Map; |
49 | |
50 | Map map_; |
51 | }; |
52 | |
53 | // Place describes an abstract location (e.g. field) that IR can load |
54 | // from or store to. |
55 | // |
56 | // Places are also used to describe wild-card locations also known as aliases, |
57 | // that essentially represent sets of places that alias each other. Places A |
58 | // and B are said to alias each other if store into A can affect load from B. |
59 | // |
60 | // We distinguish the following aliases: |
61 | // |
62 | // - for fields |
63 | // - *.f - field inside some object; |
64 | // - X.f - field inside an allocated object X; |
65 | // - f - static fields |
66 | // |
67 | // - for indexed accesses |
68 | // - *[*] - non-constant index inside some object; |
69 | // - *[C] - constant index inside some object; |
70 | // - X[*] - non-constant index inside an allocated object X; |
71 | // - X[C] - constant index inside an allocated object X. |
72 | // |
73 | // Constant indexed places are divided into two subcategories: |
74 | // |
75 | // - Access to homogeneous array-like objects: Array, ImmutableArray, |
76 | // OneByteString, TwoByteString. These objects can only be accessed |
77 | // on element by element basis with all elements having the same size. |
78 | // This means X[C] aliases X[K] if and only if C === K. |
79 | // - TypedData accesses. TypedData allow to read one of the primitive |
80 | // data types at the given byte offset. When TypedData is accessed through |
81 | // index operator on a typed array or a typed array view it is guaranteed |
82 | // that the byte offset is always aligned by the element size. We write |
83 | // these accesses as X[C|S], where C is constant byte offset and S is size |
84 | // of the data type. Obviously X[C|S] and X[K|U] alias if and only if either |
85 | // C = RoundDown(K, S) or K = RoundDown(C, U). |
86 | // Note that not all accesses to typed data are aligned: e.g. ByteData |
87 | // allows unanaligned access through it's get*/set* methods. |
88 | // Check in Place::SetIndex ensures that we never create a place X[C|S] |
89 | // such that C is not aligned by S. |
90 | // |
91 | // Separating allocations from other objects improves precision of the |
92 | // load forwarding pass because of the following two properties: |
93 | // |
94 | // - if X can be proven to have no aliases itself (i.e. there is no other SSA |
95 | // variable that points to X) then no place inside X can be aliased with any |
96 | // wildcard dependent place (*.f, *.@offs, *[*], *[C]); |
97 | // - given allocations X and Y no place inside X can be aliased with any place |
98 | // inside Y even if any of them or both escape. |
99 | // |
100 | // It is important to realize that single place can belong to multiple aliases. |
101 | // For example place X.f with aliased allocation X belongs both to X.f and *.f |
102 | // aliases. Likewise X[C] with non-aliased allocation X belongs to X[C] and X[*] |
103 | // aliases. |
104 | // |
105 | class Place : public ValueObject { |
106 | public: |
107 | enum Kind { |
108 | kNone, |
109 | |
110 | // Static field location. Is represented as a Field object with a |
111 | // nullptr instance. |
112 | kStaticField, |
113 | |
114 | // Instance field location. It is reprensented by a pair of instance |
115 | // and a Slot. |
116 | kInstanceField, |
117 | |
118 | // Indexed location with a non-constant index. |
119 | kIndexed, |
120 | |
121 | // Indexed location with a constant index. |
122 | kConstantIndexed, |
123 | }; |
124 | |
125 | // Size of the element accessed by constant index. Size is only important |
126 | // for TypedData because those accesses can alias even when constant indexes |
127 | // are not the same: X[0|4] aliases X[0|2] and X[2|2]. |
128 | enum ElementSize { |
129 | // If indexed access is not a TypedData access then element size is not |
130 | // important because there is only a single possible access size depending |
131 | // on the receiver - X[C] aliases X[K] if and only if C == K. |
132 | // This is the size set for Array, ImmutableArray, OneByteString and |
133 | // TwoByteString accesses. |
134 | kNoSize, |
135 | |
136 | // 1 byte (Int8List, Uint8List, Uint8ClampedList). |
137 | kInt8, |
138 | |
139 | // 2 bytes (Int16List, Uint16List). |
140 | kInt16, |
141 | |
142 | // 4 bytes (Int32List, Uint32List, Float32List). |
143 | kInt32, |
144 | |
145 | // 8 bytes (Int64List, Uint64List, Float64List). |
146 | kInt64, |
147 | |
148 | // 16 bytes (Int32x4List, Float32x4List, Float64x2List). |
149 | kInt128, |
150 | |
151 | kLargestElementSize = kInt128, |
152 | }; |
153 | |
154 | Place(const Place& other) |
155 | : ValueObject(), |
156 | flags_(other.flags_), |
157 | instance_(other.instance_), |
158 | raw_selector_(other.raw_selector_), |
159 | id_(other.id_) {} |
160 | |
161 | // Construct a place from instruction if instruction accesses any place. |
162 | // Otherwise constructs kNone place. |
163 | Place(Instruction* instr, bool* is_load, bool* is_store) |
164 | : flags_(0), instance_(nullptr), raw_selector_(0), id_(0) { |
165 | switch (instr->tag()) { |
166 | case Instruction::kLoadField: { |
167 | LoadFieldInstr* load_field = instr->AsLoadField(); |
168 | set_representation(load_field->representation()); |
169 | instance_ = load_field->instance()->definition()->OriginalDefinition(); |
170 | set_kind(kInstanceField); |
171 | instance_field_ = &load_field->slot(); |
172 | *is_load = true; |
173 | break; |
174 | } |
175 | |
176 | case Instruction::kStoreInstanceField: { |
177 | StoreInstanceFieldInstr* store = instr->AsStoreInstanceField(); |
178 | set_representation(store->RequiredInputRepresentation( |
179 | StoreInstanceFieldInstr::kValuePos)); |
180 | instance_ = store->instance()->definition()->OriginalDefinition(); |
181 | set_kind(kInstanceField); |
182 | instance_field_ = &store->slot(); |
183 | *is_store = true; |
184 | break; |
185 | } |
186 | |
187 | case Instruction::kLoadStaticField: |
188 | set_kind(kStaticField); |
189 | set_representation(instr->AsLoadStaticField()->representation()); |
190 | static_field_ = &instr->AsLoadStaticField()->field(); |
191 | *is_load = true; |
192 | break; |
193 | |
194 | case Instruction::kStoreStaticField: |
195 | set_kind(kStaticField); |
196 | set_representation( |
197 | instr->AsStoreStaticField()->RequiredInputRepresentation( |
198 | StoreStaticFieldInstr::kValuePos)); |
199 | static_field_ = &instr->AsStoreStaticField()->field(); |
200 | *is_store = true; |
201 | break; |
202 | |
203 | case Instruction::kLoadIndexed: { |
204 | LoadIndexedInstr* load_indexed = instr->AsLoadIndexed(); |
205 | set_representation(load_indexed->representation()); |
206 | instance_ = load_indexed->array()->definition()->OriginalDefinition(); |
207 | SetIndex(load_indexed->index()->definition()->OriginalDefinition(), |
208 | load_indexed->index_scale(), load_indexed->class_id()); |
209 | *is_load = true; |
210 | break; |
211 | } |
212 | |
213 | case Instruction::kStoreIndexed: { |
214 | StoreIndexedInstr* store_indexed = instr->AsStoreIndexed(); |
215 | set_representation(store_indexed->RequiredInputRepresentation( |
216 | StoreIndexedInstr::kValuePos)); |
217 | instance_ = store_indexed->array()->definition()->OriginalDefinition(); |
218 | SetIndex(store_indexed->index()->definition()->OriginalDefinition(), |
219 | store_indexed->index_scale(), store_indexed->class_id()); |
220 | *is_store = true; |
221 | break; |
222 | } |
223 | |
224 | default: |
225 | break; |
226 | } |
227 | } |
228 | |
229 | bool IsConstant(Object* value) const { |
230 | switch (kind()) { |
231 | case kInstanceField: |
232 | return (instance() != nullptr) && instance()->IsConstant() && |
233 | LoadFieldInstr::TryEvaluateLoad( |
234 | instance()->AsConstant()->constant_value(), instance_field(), |
235 | value); |
236 | default: |
237 | return false; |
238 | } |
239 | } |
240 | |
241 | // Create object representing *[*] alias. |
242 | static Place* CreateAnyInstanceAnyIndexAlias(Zone* zone, intptr_t id) { |
243 | return Wrap( |
244 | zone, Place(EncodeFlags(kIndexed, kNoRepresentation, kNoSize), NULL, 0), |
245 | id); |
246 | } |
247 | |
248 | // Return least generic alias for this place. Given that aliases are |
249 | // essentially sets of places we define least generic alias as a smallest |
250 | // alias that contains this place. |
251 | // |
252 | // We obtain such alias by a simple transformation: |
253 | // |
254 | // - for places that depend on an instance X.f, X.@offs, X[i], X[C] |
255 | // we drop X if X is not an allocation because in this case X does not |
256 | // possess an identity obtaining aliases *.f, *.@offs, *[i] and *[C] |
257 | // respectively; |
258 | // - for non-constant indexed places X[i] we drop information about the |
259 | // index obtaining alias X[*]. |
260 | // - we drop information about representation, but keep element size |
261 | // if any. |
262 | // |
263 | Place ToAlias() const { |
264 | return Place( |
265 | RepresentationBits::update(kNoRepresentation, flags_), |
266 | (DependsOnInstance() && IsAllocation(instance())) ? instance() : NULL, |
267 | (kind() == kIndexed) ? 0 : raw_selector_); |
268 | } |
269 | |
270 | bool DependsOnInstance() const { |
271 | switch (kind()) { |
272 | case kInstanceField: |
273 | case kIndexed: |
274 | case kConstantIndexed: |
275 | return true; |
276 | |
277 | case kStaticField: |
278 | case kNone: |
279 | return false; |
280 | } |
281 | |
282 | UNREACHABLE(); |
283 | return false; |
284 | } |
285 | |
286 | // Given instance dependent alias X.f, X.@offs, X[C], X[*] return |
287 | // wild-card dependent alias *.f, *.@offs, *[C] or *[*] respectively. |
288 | Place CopyWithoutInstance() const { |
289 | ASSERT(DependsOnInstance()); |
290 | return Place(flags_, NULL, raw_selector_); |
291 | } |
292 | |
293 | // Given alias X[C] or *[C] return X[*] and *[*] respectively. |
294 | Place CopyWithoutIndex() const { |
295 | ASSERT(kind() == kConstantIndexed); |
296 | return Place(EncodeFlags(kIndexed, kNoRepresentation, kNoSize), instance_, |
297 | 0); |
298 | } |
299 | |
300 | // Given alias X[ByteOffs|S] and a larger element size S', return |
301 | // alias X[RoundDown(ByteOffs, S')|S'] - this is the byte offset of a larger |
302 | // typed array element that contains this typed array element. |
303 | // In other words this method computes the only possible place with the given |
304 | // size that can alias this place (due to alignment restrictions). |
305 | // For example for X[9|kInt8] and target size kInt32 we would return |
306 | // X[8|kInt32]. |
307 | Place ToLargerElement(ElementSize to) const { |
308 | ASSERT(kind() == kConstantIndexed); |
309 | ASSERT(element_size() != kNoSize); |
310 | ASSERT(element_size() < to); |
311 | return Place(ElementSizeBits::update(to, flags_), instance_, |
312 | RoundByteOffset(to, index_constant_)); |
313 | } |
314 | |
315 | // Given alias X[ByteOffs|S], smaller element size S' and index from 0 to |
316 | // S/S' - 1 return alias X[ByteOffs + S'*index|S'] - this is the byte offset |
317 | // of a smaller typed array element which is contained within this typed |
318 | // array element. |
319 | // For example X[8|kInt32] contains inside X[8|kInt16] (index is 0) and |
320 | // X[10|kInt16] (index is 1). |
321 | Place ToSmallerElement(ElementSize to, intptr_t index) const { |
322 | ASSERT(kind() == kConstantIndexed); |
323 | ASSERT(element_size() != kNoSize); |
324 | ASSERT(element_size() > to); |
325 | ASSERT(index >= 0); |
326 | ASSERT(index < |
327 | ElementSizeMultiplier(element_size()) / ElementSizeMultiplier(to)); |
328 | return Place(ElementSizeBits::update(to, flags_), instance_, |
329 | ByteOffsetToSmallerElement(to, index, index_constant_)); |
330 | } |
331 | |
332 | intptr_t id() const { return id_; } |
333 | |
334 | Kind kind() const { return KindBits::decode(flags_); } |
335 | |
336 | Representation representation() const { |
337 | return RepresentationBits::decode(flags_); |
338 | } |
339 | |
340 | Definition* instance() const { |
341 | ASSERT(DependsOnInstance()); |
342 | return instance_; |
343 | } |
344 | |
345 | void set_instance(Definition* def) { |
346 | ASSERT(DependsOnInstance()); |
347 | instance_ = def->OriginalDefinition(); |
348 | } |
349 | |
350 | const Field& static_field() const { |
351 | ASSERT(kind() == kStaticField); |
352 | ASSERT(static_field_->is_static()); |
353 | return *static_field_; |
354 | } |
355 | |
356 | const Slot& instance_field() const { |
357 | ASSERT(kind() == kInstanceField); |
358 | return *instance_field_; |
359 | } |
360 | |
361 | Definition* index() const { |
362 | ASSERT(kind() == kIndexed); |
363 | return index_; |
364 | } |
365 | |
366 | ElementSize element_size() const { return ElementSizeBits::decode(flags_); } |
367 | |
368 | intptr_t index_constant() const { |
369 | ASSERT(kind() == kConstantIndexed); |
370 | return index_constant_; |
371 | } |
372 | |
373 | static const char* DefinitionName(Definition* def) { |
374 | if (def == NULL) { |
375 | return "*" ; |
376 | } else { |
377 | return Thread::Current()->zone()->PrintToString("v%" Pd, |
378 | def->ssa_temp_index()); |
379 | } |
380 | } |
381 | |
382 | const char* ToCString() const { |
383 | switch (kind()) { |
384 | case kNone: |
385 | return "<none>" ; |
386 | |
387 | case kStaticField: { |
388 | const char* field_name = |
389 | String::Handle(static_field().name()).ToCString(); |
390 | return Thread::Current()->zone()->PrintToString("<%s>" , field_name); |
391 | } |
392 | |
393 | case kInstanceField: |
394 | return Thread::Current()->zone()->PrintToString( |
395 | "<%s.%s[%p]>" , DefinitionName(instance()), instance_field().Name(), |
396 | &instance_field()); |
397 | |
398 | case kIndexed: |
399 | return Thread::Current()->zone()->PrintToString( |
400 | "<%s[%s]>" , DefinitionName(instance()), DefinitionName(index())); |
401 | |
402 | case kConstantIndexed: |
403 | if (element_size() == kNoSize) { |
404 | return Thread::Current()->zone()->PrintToString( |
405 | "<%s[%" Pd "]>" , DefinitionName(instance()), index_constant()); |
406 | } else { |
407 | return Thread::Current()->zone()->PrintToString( |
408 | "<%s[%" Pd "|%" Pd "]>" , DefinitionName(instance()), |
409 | index_constant(), ElementSizeMultiplier(element_size())); |
410 | } |
411 | } |
412 | UNREACHABLE(); |
413 | return "<?>" ; |
414 | } |
415 | |
416 | // Fields that are considered immutable by load optimization. |
417 | // Handle static finals as non-final with precompilation because |
418 | // they may be reset to uninitialized after compilation. |
419 | bool IsImmutableField() const { |
420 | switch (kind()) { |
421 | case kInstanceField: |
422 | return instance_field().is_immutable(); |
423 | case kStaticField: |
424 | return static_field().is_final() && !FLAG_fields_may_be_reset; |
425 | default: |
426 | return false; |
427 | } |
428 | } |
429 | |
430 | intptr_t Hashcode() const { |
431 | return (flags_ * 63 + reinterpret_cast<intptr_t>(instance_)) * 31 + |
432 | FieldHashcode(); |
433 | } |
434 | |
435 | bool Equals(const Place* other) const { |
436 | return (flags_ == other->flags_) && (instance_ == other->instance_) && |
437 | SameField(other); |
438 | } |
439 | |
440 | // Create a zone allocated copy of this place and assign given id to it. |
441 | static Place* Wrap(Zone* zone, const Place& place, intptr_t id); |
442 | |
443 | static bool IsAllocation(Definition* defn) { |
444 | return (defn != NULL) && |
445 | (defn->IsAllocateObject() || defn->IsCreateArray() || |
446 | defn->IsAllocateUninitializedContext() || |
447 | (defn->IsStaticCall() && |
448 | defn->AsStaticCall()->IsRecognizedFactory())); |
449 | } |
450 | |
451 | private: |
452 | Place(uword flags, Definition* instance, intptr_t selector) |
453 | : flags_(flags), instance_(instance), raw_selector_(selector), id_(0) {} |
454 | |
455 | bool SameField(const Place* other) const { |
456 | return (kind() == kStaticField) |
457 | ? (static_field().Original() == other->static_field().Original()) |
458 | : (raw_selector_ == other->raw_selector_); |
459 | } |
460 | |
461 | intptr_t FieldHashcode() const { |
462 | return (kind() == kStaticField) |
463 | ? String::Handle(Field::Handle(static_field().Original()).name()) |
464 | .Hash() |
465 | : raw_selector_; |
466 | } |
467 | |
468 | void set_representation(Representation rep) { |
469 | flags_ = RepresentationBits::update(rep, flags_); |
470 | } |
471 | |
472 | void set_kind(Kind kind) { flags_ = KindBits::update(kind, flags_); } |
473 | |
474 | void set_element_size(ElementSize scale) { |
475 | flags_ = ElementSizeBits::update(scale, flags_); |
476 | } |
477 | |
478 | void SetIndex(Definition* index, intptr_t scale, intptr_t class_id) { |
479 | ConstantInstr* index_constant = index->AsConstant(); |
480 | if ((index_constant != NULL) && index_constant->value().IsSmi()) { |
481 | const intptr_t index_value = Smi::Cast(index_constant->value()).Value(); |
482 | const ElementSize size = ElementSizeFor(class_id); |
483 | const bool is_typed_access = (size != kNoSize); |
484 | // Indexing into [RawTypedDataView]/[RawExternalTypedData happens via a |
485 | // untagged load of the `_data` field (which points to C memory). |
486 | // |
487 | // Indexing into dart:ffi's [RawPointer] happens via loading of the |
488 | // `c_memory_address_`, converting it to an integer, doing some arithmetic |
489 | // and finally using IntConverterInstr to convert to a untagged |
490 | // representation. |
491 | // |
492 | // In both cases the array used for load/store has untagged |
493 | // representation. |
494 | const bool can_be_view = instance_->representation() == kUntagged; |
495 | |
496 | // If we are writing into the typed data scale the index to |
497 | // get byte offset. Otherwise ignore the scale. |
498 | if (!is_typed_access) { |
499 | scale = 1; |
500 | } |
501 | |
502 | // Guard against potential multiplication overflow and negative indices. |
503 | if ((0 <= index_value) && (index_value < (kMaxInt32 / scale))) { |
504 | const intptr_t scaled_index = index_value * scale; |
505 | |
506 | // Guard against unaligned byte offsets and access through raw |
507 | // memory pointer (which can be pointing into another typed data). |
508 | if (!is_typed_access || |
509 | (!can_be_view && |
510 | Utils::IsAligned(scaled_index, ElementSizeMultiplier(size)))) { |
511 | set_kind(kConstantIndexed); |
512 | set_element_size(size); |
513 | index_constant_ = scaled_index; |
514 | return; |
515 | } |
516 | } |
517 | |
518 | // Fallthrough: create generic _[*] place. |
519 | } |
520 | |
521 | set_kind(kIndexed); |
522 | index_ = index; |
523 | } |
524 | |
525 | static uword EncodeFlags(Kind kind, Representation rep, ElementSize scale) { |
526 | ASSERT((kind == kConstantIndexed) || (scale == kNoSize)); |
527 | return KindBits::encode(kind) | RepresentationBits::encode(rep) | |
528 | ElementSizeBits::encode(scale); |
529 | } |
530 | |
531 | static ElementSize ElementSizeFor(intptr_t class_id) { |
532 | switch (class_id) { |
533 | case kArrayCid: |
534 | case kImmutableArrayCid: |
535 | case kOneByteStringCid: |
536 | case kTwoByteStringCid: |
537 | case kExternalOneByteStringCid: |
538 | case kExternalTwoByteStringCid: |
539 | // Object arrays and strings do not allow accessing them through |
540 | // different types. No need to attach scale. |
541 | return kNoSize; |
542 | |
543 | case kTypedDataInt8ArrayCid: |
544 | case kTypedDataUint8ArrayCid: |
545 | case kTypedDataUint8ClampedArrayCid: |
546 | case kExternalTypedDataUint8ArrayCid: |
547 | case kExternalTypedDataUint8ClampedArrayCid: |
548 | return kInt8; |
549 | |
550 | case kTypedDataInt16ArrayCid: |
551 | case kTypedDataUint16ArrayCid: |
552 | return kInt16; |
553 | |
554 | case kTypedDataInt32ArrayCid: |
555 | case kTypedDataUint32ArrayCid: |
556 | case kTypedDataFloat32ArrayCid: |
557 | return kInt32; |
558 | |
559 | case kTypedDataInt64ArrayCid: |
560 | case kTypedDataUint64ArrayCid: |
561 | case kTypedDataFloat64ArrayCid: |
562 | return kInt64; |
563 | |
564 | case kTypedDataInt32x4ArrayCid: |
565 | case kTypedDataFloat32x4ArrayCid: |
566 | case kTypedDataFloat64x2ArrayCid: |
567 | return kInt128; |
568 | |
569 | default: |
570 | UNREACHABLE(); |
571 | return kNoSize; |
572 | } |
573 | } |
574 | |
575 | static intptr_t ElementSizeMultiplier(ElementSize size) { |
576 | return 1 << (static_cast<intptr_t>(size) - static_cast<intptr_t>(kInt8)); |
577 | } |
578 | |
579 | static intptr_t RoundByteOffset(ElementSize size, intptr_t offset) { |
580 | return offset & ~(ElementSizeMultiplier(size) - 1); |
581 | } |
582 | |
583 | static intptr_t ByteOffsetToSmallerElement(ElementSize size, |
584 | intptr_t index, |
585 | intptr_t base_offset) { |
586 | return base_offset + index * ElementSizeMultiplier(size); |
587 | } |
588 | |
589 | class KindBits : public BitField<uword, Kind, 0, 3> {}; |
590 | class RepresentationBits |
591 | : public BitField<uword, Representation, KindBits::kNextBit, 11> {}; |
592 | class ElementSizeBits |
593 | : public BitField<uword, ElementSize, RepresentationBits::kNextBit, 3> {}; |
594 | |
595 | uword flags_; |
596 | Definition* instance_; |
597 | union { |
598 | intptr_t raw_selector_; |
599 | const Field* static_field_; |
600 | const Slot* instance_field_; |
601 | intptr_t index_constant_; |
602 | Definition* index_; |
603 | }; |
604 | |
605 | intptr_t id_; |
606 | }; |
607 | |
608 | class ZonePlace : public ZoneAllocated { |
609 | public: |
610 | explicit ZonePlace(const Place& place) : place_(place) {} |
611 | |
612 | Place* place() { return &place_; } |
613 | |
614 | private: |
615 | Place place_; |
616 | }; |
617 | |
618 | Place* Place::Wrap(Zone* zone, const Place& place, intptr_t id) { |
619 | Place* wrapped = (new (zone) ZonePlace(place))->place(); |
620 | wrapped->id_ = id; |
621 | return wrapped; |
622 | } |
623 | |
624 | // Correspondence between places connected through outgoing phi moves on the |
625 | // edge that targets join. |
626 | class PhiPlaceMoves : public ZoneAllocated { |
627 | public: |
628 | // Record a move from the place with id |from| to the place with id |to| at |
629 | // the given block. |
630 | void CreateOutgoingMove(Zone* zone, |
631 | BlockEntryInstr* block, |
632 | intptr_t from, |
633 | intptr_t to) { |
634 | const intptr_t block_num = block->preorder_number(); |
635 | moves_.EnsureLength(block_num + 1, nullptr); |
636 | |
637 | if (moves_[block_num] == nullptr) { |
638 | moves_[block_num] = new (zone) ZoneGrowableArray<Move>(5); |
639 | } |
640 | |
641 | moves_[block_num]->Add(Move(from, to)); |
642 | } |
643 | |
644 | class Move { |
645 | public: |
646 | Move(intptr_t from, intptr_t to) : from_(from), to_(to) {} |
647 | |
648 | intptr_t from() const { return from_; } |
649 | intptr_t to() const { return to_; } |
650 | |
651 | private: |
652 | intptr_t from_; |
653 | intptr_t to_; |
654 | }; |
655 | |
656 | typedef const ZoneGrowableArray<Move>* MovesList; |
657 | |
658 | MovesList GetOutgoingMoves(BlockEntryInstr* block) const { |
659 | const intptr_t block_num = block->preorder_number(); |
660 | return (block_num < moves_.length()) ? moves_[block_num] : NULL; |
661 | } |
662 | |
663 | private: |
664 | GrowableArray<ZoneGrowableArray<Move>*> moves_; |
665 | }; |
666 | |
667 | // A map from aliases to a set of places sharing the alias. Additionally |
668 | // carries a set of places that can be aliased by side-effects, essentially |
669 | // those that are affected by calls. |
670 | class AliasedSet : public ZoneAllocated { |
671 | public: |
672 | AliasedSet(Zone* zone, |
673 | DirectChainedHashMap<PointerKeyValueTrait<Place> >* places_map, |
674 | ZoneGrowableArray<Place*>* places, |
675 | PhiPlaceMoves* phi_moves) |
676 | : zone_(zone), |
677 | places_map_(places_map), |
678 | places_(*places), |
679 | phi_moves_(phi_moves), |
680 | aliases_(5), |
681 | aliases_map_(), |
682 | typed_data_access_sizes_(), |
683 | representatives_(), |
684 | killed_(), |
685 | aliased_by_effects_(new (zone) BitVector(zone, places->length())) { |
686 | InsertAlias(Place::CreateAnyInstanceAnyIndexAlias( |
687 | zone_, kAnyInstanceAnyIndexAlias)); |
688 | for (intptr_t i = 0; i < places_.length(); i++) { |
689 | AddRepresentative(places_[i]); |
690 | } |
691 | ComputeKillSets(); |
692 | } |
693 | |
694 | intptr_t LookupAliasId(const Place& alias) { |
695 | const Place* result = aliases_map_.LookupValue(&alias); |
696 | return (result != NULL) ? result->id() : static_cast<intptr_t>(kNoAlias); |
697 | } |
698 | |
699 | BitVector* GetKilledSet(intptr_t alias) { |
700 | return (alias < killed_.length()) ? killed_[alias] : NULL; |
701 | } |
702 | |
703 | intptr_t max_place_id() const { return places().length(); } |
704 | bool IsEmpty() const { return max_place_id() == 0; } |
705 | |
706 | BitVector* aliased_by_effects() const { return aliased_by_effects_; } |
707 | |
708 | const ZoneGrowableArray<Place*>& places() const { return places_; } |
709 | |
710 | Place* LookupCanonical(Place* place) const { |
711 | return places_map_->LookupValue(place); |
712 | } |
713 | |
714 | void PrintSet(BitVector* set) { |
715 | bool comma = false; |
716 | for (BitVector::Iterator it(set); !it.Done(); it.Advance()) { |
717 | if (comma) { |
718 | THR_Print(", " ); |
719 | } |
720 | THR_Print("%s" , places_[it.Current()]->ToCString()); |
721 | comma = true; |
722 | } |
723 | } |
724 | |
725 | const PhiPlaceMoves* phi_moves() const { return phi_moves_; } |
726 | |
727 | void RollbackAliasedIdentites() { |
728 | for (intptr_t i = 0; i < identity_rollback_.length(); ++i) { |
729 | identity_rollback_[i]->SetIdentity(AliasIdentity::Unknown()); |
730 | } |
731 | } |
732 | |
733 | // Returns false if the result of an allocation instruction can't be aliased |
734 | // by another SSA variable and true otherwise. |
735 | bool CanBeAliased(Definition* alloc) { |
736 | if (!Place::IsAllocation(alloc)) { |
737 | return true; |
738 | } |
739 | |
740 | if (alloc->Identity().IsUnknown()) { |
741 | ComputeAliasing(alloc); |
742 | } |
743 | |
744 | return !alloc->Identity().IsNotAliased(); |
745 | } |
746 | |
747 | enum { kNoAlias = 0 }; |
748 | |
749 | private: |
750 | enum { |
751 | // Artificial alias that is used to collect all representatives of the |
752 | // *[C], X[C] aliases for arbitrary C. |
753 | kAnyConstantIndexedAlias = 1, |
754 | |
755 | // Artificial alias that is used to collect all representatives of |
756 | // *[C] alias for arbitrary C. |
757 | kUnknownInstanceConstantIndexedAlias = 2, |
758 | |
759 | // Artificial alias that is used to collect all representatives of |
760 | // X[*] alias for all X. |
761 | kAnyAllocationIndexedAlias = 3, |
762 | |
763 | // *[*] alias. |
764 | kAnyInstanceAnyIndexAlias = 4 |
765 | }; |
766 | |
767 | // Compute least generic alias for the place and assign alias id to it. |
768 | void AddRepresentative(Place* place) { |
769 | if (!place->IsImmutableField()) { |
770 | const Place* alias = CanonicalizeAlias(place->ToAlias()); |
771 | EnsureSet(&representatives_, alias->id())->Add(place->id()); |
772 | |
773 | // Update cumulative representative sets that are used during |
774 | // killed sets computation. |
775 | if (alias->kind() == Place::kConstantIndexed) { |
776 | if (CanBeAliased(alias->instance())) { |
777 | EnsureSet(&representatives_, kAnyConstantIndexedAlias) |
778 | ->Add(place->id()); |
779 | } |
780 | |
781 | if (alias->instance() == NULL) { |
782 | EnsureSet(&representatives_, kUnknownInstanceConstantIndexedAlias) |
783 | ->Add(place->id()); |
784 | } |
785 | |
786 | // Collect all element sizes used to access TypedData arrays in |
787 | // the function. This is used to skip sizes without representatives |
788 | // when computing kill sets. |
789 | if (alias->element_size() != Place::kNoSize) { |
790 | typed_data_access_sizes_.Add(alias->element_size()); |
791 | } |
792 | } else if ((alias->kind() == Place::kIndexed) && |
793 | CanBeAliased(place->instance())) { |
794 | EnsureSet(&representatives_, kAnyAllocationIndexedAlias) |
795 | ->Add(place->id()); |
796 | } |
797 | |
798 | if (!IsIndependentFromEffects(place)) { |
799 | aliased_by_effects_->Add(place->id()); |
800 | } |
801 | } |
802 | } |
803 | |
804 | void ComputeKillSets() { |
805 | for (intptr_t i = 0; i < aliases_.length(); ++i) { |
806 | const Place* alias = aliases_[i]; |
807 | // Add all representatives to the kill set. |
808 | AddAllRepresentatives(alias->id(), alias->id()); |
809 | ComputeKillSet(alias); |
810 | } |
811 | |
812 | if (FLAG_trace_load_optimization) { |
813 | THR_Print("Aliases KILL sets:\n" ); |
814 | for (intptr_t i = 0; i < aliases_.length(); ++i) { |
815 | const Place* alias = aliases_[i]; |
816 | BitVector* kill = GetKilledSet(alias->id()); |
817 | |
818 | THR_Print("%s: " , alias->ToCString()); |
819 | if (kill != NULL) { |
820 | PrintSet(kill); |
821 | } |
822 | THR_Print("\n" ); |
823 | } |
824 | } |
825 | } |
826 | |
827 | void InsertAlias(const Place* alias) { |
828 | aliases_map_.Insert(alias); |
829 | aliases_.Add(alias); |
830 | } |
831 | |
832 | const Place* CanonicalizeAlias(const Place& alias) { |
833 | const Place* canonical = aliases_map_.LookupValue(&alias); |
834 | if (canonical == NULL) { |
835 | canonical = Place::Wrap(zone_, alias, |
836 | kAnyInstanceAnyIndexAlias + aliases_.length()); |
837 | InsertAlias(canonical); |
838 | } |
839 | ASSERT(aliases_map_.LookupValue(&alias) == canonical); |
840 | return canonical; |
841 | } |
842 | |
843 | BitVector* GetRepresentativesSet(intptr_t alias) { |
844 | return (alias < representatives_.length()) ? representatives_[alias] : NULL; |
845 | } |
846 | |
847 | BitVector* EnsureSet(GrowableArray<BitVector*>* sets, intptr_t alias) { |
848 | while (sets->length() <= alias) { |
849 | sets->Add(NULL); |
850 | } |
851 | |
852 | BitVector* set = (*sets)[alias]; |
853 | if (set == NULL) { |
854 | (*sets)[alias] = set = new (zone_) BitVector(zone_, max_place_id()); |
855 | } |
856 | return set; |
857 | } |
858 | |
859 | void AddAllRepresentatives(const Place* to, intptr_t from) { |
860 | AddAllRepresentatives(to->id(), from); |
861 | } |
862 | |
863 | void AddAllRepresentatives(intptr_t to, intptr_t from) { |
864 | BitVector* from_set = GetRepresentativesSet(from); |
865 | if (from_set != NULL) { |
866 | EnsureSet(&killed_, to)->AddAll(from_set); |
867 | } |
868 | } |
869 | |
870 | void CrossAlias(const Place* to, const Place& from) { |
871 | const intptr_t from_id = LookupAliasId(from); |
872 | if (from_id == kNoAlias) { |
873 | return; |
874 | } |
875 | CrossAlias(to, from_id); |
876 | } |
877 | |
878 | void CrossAlias(const Place* to, intptr_t from) { |
879 | AddAllRepresentatives(to->id(), from); |
880 | AddAllRepresentatives(from, to->id()); |
881 | } |
882 | |
883 | // When computing kill sets we let less generic alias insert its |
884 | // representatives into more generic alias'es kill set. For example |
885 | // when visiting alias X[*] instead of searching for all aliases X[C] |
886 | // and inserting their representatives into kill set for X[*] we update |
887 | // kill set for X[*] each time we visit new X[C] for some C. |
888 | // There is an exception however: if both aliases are parametric like *[C] |
889 | // and X[*] which cross alias when X is an aliased allocation then we use |
890 | // artificial aliases that contain all possible representatives for the given |
891 | // alias for any value of the parameter to compute resulting kill set. |
892 | void ComputeKillSet(const Place* alias) { |
893 | switch (alias->kind()) { |
894 | case Place::kIndexed: // Either *[*] or X[*] alias. |
895 | if (alias->instance() == NULL) { |
896 | // *[*] aliases with X[*], X[C], *[C]. |
897 | AddAllRepresentatives(alias, kAnyConstantIndexedAlias); |
898 | AddAllRepresentatives(alias, kAnyAllocationIndexedAlias); |
899 | } else if (CanBeAliased(alias->instance())) { |
900 | // X[*] aliases with X[C]. |
901 | // If X can be aliased then X[*] also aliases with *[C], *[*]. |
902 | CrossAlias(alias, kAnyInstanceAnyIndexAlias); |
903 | AddAllRepresentatives(alias, kUnknownInstanceConstantIndexedAlias); |
904 | } |
905 | break; |
906 | |
907 | case Place::kConstantIndexed: // Either X[C] or *[C] alias. |
908 | if (alias->element_size() != Place::kNoSize) { |
909 | const bool has_aliased_instance = |
910 | (alias->instance() != NULL) && CanBeAliased(alias->instance()); |
911 | |
912 | // If this is a TypedData access then X[C|S] aliases larger elements |
913 | // covering this one X[RoundDown(C, S')|S'] for all S' > S and |
914 | // all smaller elements being covered by this one X[C'|S'] for |
915 | // some S' < S and all C' such that C = RoundDown(C', S). |
916 | // In the loop below it's enough to only propagate aliasing to |
917 | // larger aliases because propagation is symmetric: smaller aliases |
918 | // (if there are any) would update kill set for this alias when they |
919 | // are visited. |
920 | for (intptr_t i = static_cast<intptr_t>(alias->element_size()) + 1; |
921 | i <= Place::kLargestElementSize; i++) { |
922 | // Skip element sizes that a guaranteed to have no representatives. |
923 | if (!typed_data_access_sizes_.Contains(alias->element_size())) { |
924 | continue; |
925 | } |
926 | |
927 | // X[C|S] aliases with X[RoundDown(C, S')|S'] and likewise |
928 | // *[C|S] aliases with *[RoundDown(C, S')|S']. |
929 | CrossAlias(alias, alias->ToLargerElement( |
930 | static_cast<Place::ElementSize>(i))); |
931 | } |
932 | |
933 | if (has_aliased_instance) { |
934 | // If X is an aliased instance then X[C|S] aliases *[C'|S'] for all |
935 | // related combinations of C' and S'. |
936 | // Caveat: this propagation is not symmetric (we would not know |
937 | // to propagate aliasing from *[C'|S'] to X[C|S] when visiting |
938 | // *[C'|S']) and thus we need to handle both element sizes smaller |
939 | // and larger than S. |
940 | const Place no_instance_alias = alias->CopyWithoutInstance(); |
941 | for (intptr_t i = Place::kInt8; i <= Place::kLargestElementSize; |
942 | i++) { |
943 | // Skip element sizes that a guaranteed to have no |
944 | // representatives. |
945 | if (!typed_data_access_sizes_.Contains(alias->element_size())) { |
946 | continue; |
947 | } |
948 | |
949 | const auto other_size = static_cast<Place::ElementSize>(i); |
950 | if (other_size > alias->element_size()) { |
951 | // X[C|S] aliases all larger elements which cover it: |
952 | // *[RoundDown(C, S')|S'] for S' > S. |
953 | CrossAlias(alias, |
954 | no_instance_alias.ToLargerElement(other_size)); |
955 | } else if (other_size < alias->element_size()) { |
956 | // X[C|S] aliases all sub-elements of smaller size: |
957 | // *[C+j*S'|S'] for S' < S and j from 0 to S/S' - 1. |
958 | const auto num_smaller_elements = |
959 | 1 << (alias->element_size() - other_size); |
960 | for (intptr_t j = 0; j < num_smaller_elements; j++) { |
961 | CrossAlias(alias, |
962 | no_instance_alias.ToSmallerElement(other_size, j)); |
963 | } |
964 | } |
965 | } |
966 | } |
967 | } |
968 | |
969 | if (alias->instance() == NULL) { |
970 | // *[C] aliases with X[C], X[*], *[*]. |
971 | AddAllRepresentatives(alias, kAnyAllocationIndexedAlias); |
972 | CrossAlias(alias, kAnyInstanceAnyIndexAlias); |
973 | } else { |
974 | // X[C] aliases with X[*]. |
975 | // If X can be aliased then X[C] also aliases with *[C], *[*]. |
976 | CrossAlias(alias, alias->CopyWithoutIndex()); |
977 | if (CanBeAliased(alias->instance())) { |
978 | CrossAlias(alias, alias->CopyWithoutInstance()); |
979 | CrossAlias(alias, kAnyInstanceAnyIndexAlias); |
980 | } |
981 | } |
982 | break; |
983 | |
984 | case Place::kStaticField: |
985 | // Nothing to do. |
986 | break; |
987 | |
988 | case Place::kInstanceField: |
989 | if (CanBeAliased(alias->instance())) { |
990 | // X.f alias with *.f. |
991 | CrossAlias(alias, alias->CopyWithoutInstance()); |
992 | } |
993 | break; |
994 | |
995 | case Place::kNone: |
996 | UNREACHABLE(); |
997 | } |
998 | } |
999 | |
1000 | // Returns true if the given load is unaffected by external side-effects. |
1001 | // This essentially means that no stores to the same location can |
1002 | // occur in other functions. |
1003 | bool IsIndependentFromEffects(Place* place) { |
1004 | if (place->IsImmutableField()) { |
1005 | return true; |
1006 | } |
1007 | |
1008 | return (place->kind() == Place::kInstanceField) && |
1009 | !CanBeAliased(place->instance()); |
1010 | } |
1011 | |
1012 | // Returns true if there are direct loads from the given place. |
1013 | bool HasLoadsFromPlace(Definition* defn, const Place* place) { |
1014 | ASSERT(place->kind() == Place::kInstanceField); |
1015 | |
1016 | for (Value* use = defn->input_use_list(); use != NULL; |
1017 | use = use->next_use()) { |
1018 | Instruction* instr = use->instruction(); |
1019 | if (UseIsARedefinition(use) && |
1020 | HasLoadsFromPlace(instr->Cast<Definition>(), place)) { |
1021 | return true; |
1022 | } |
1023 | bool is_load = false, is_store; |
1024 | Place load_place(instr, &is_load, &is_store); |
1025 | |
1026 | if (is_load && load_place.Equals(place)) { |
1027 | return true; |
1028 | } |
1029 | } |
1030 | |
1031 | return false; |
1032 | } |
1033 | |
1034 | // Returns true if the given [use] is a redefinition (e.g. RedefinitionInstr, |
1035 | // CheckNull, CheckArrayBound, etc). |
1036 | static bool UseIsARedefinition(Value* use) { |
1037 | Instruction* instr = use->instruction(); |
1038 | return instr->IsDefinition() && |
1039 | (instr->Cast<Definition>()->RedefinedValue() == use); |
1040 | } |
1041 | |
1042 | // Check if any use of the definition can create an alias. |
1043 | // Can add more objects into aliasing_worklist_. |
1044 | bool AnyUseCreatesAlias(Definition* defn) { |
1045 | for (Value* use = defn->input_use_list(); use != NULL; |
1046 | use = use->next_use()) { |
1047 | Instruction* instr = use->instruction(); |
1048 | if (instr->HasUnknownSideEffects() || instr->IsLoadUntagged() || |
1049 | (instr->IsStoreIndexed() && |
1050 | (use->use_index() == StoreIndexedInstr::kValuePos)) || |
1051 | instr->IsStoreStaticField() || instr->IsPhi()) { |
1052 | return true; |
1053 | } else if (UseIsARedefinition(use) && |
1054 | AnyUseCreatesAlias(instr->Cast<Definition>())) { |
1055 | return true; |
1056 | } else if ((instr->IsStoreInstanceField() && |
1057 | (use->use_index() != |
1058 | StoreInstanceFieldInstr::kInstancePos))) { |
1059 | ASSERT(use->use_index() == StoreInstanceFieldInstr::kValuePos); |
1060 | // If we store this value into an object that is not aliased itself |
1061 | // and we never load again then the store does not create an alias. |
1062 | StoreInstanceFieldInstr* store = instr->AsStoreInstanceField(); |
1063 | Definition* instance = |
1064 | store->instance()->definition()->OriginalDefinition(); |
1065 | if (Place::IsAllocation(instance) && |
1066 | !instance->Identity().IsAliased()) { |
1067 | bool is_load, is_store; |
1068 | Place store_place(instr, &is_load, &is_store); |
1069 | |
1070 | if (!HasLoadsFromPlace(instance, &store_place)) { |
1071 | // No loads found that match this store. If it is yet unknown if |
1072 | // the object is not aliased then optimistically assume this but |
1073 | // add it to the worklist to check its uses transitively. |
1074 | if (instance->Identity().IsUnknown()) { |
1075 | instance->SetIdentity(AliasIdentity::NotAliased()); |
1076 | aliasing_worklist_.Add(instance); |
1077 | } |
1078 | continue; |
1079 | } |
1080 | } |
1081 | return true; |
1082 | } |
1083 | } |
1084 | return false; |
1085 | } |
1086 | |
1087 | // Mark any value stored into the given object as potentially aliased. |
1088 | void MarkStoredValuesEscaping(Definition* defn) { |
1089 | // Find all stores into this object. |
1090 | for (Value* use = defn->input_use_list(); use != NULL; |
1091 | use = use->next_use()) { |
1092 | auto instr = use->instruction(); |
1093 | if (UseIsARedefinition(use)) { |
1094 | MarkStoredValuesEscaping(instr->AsDefinition()); |
1095 | continue; |
1096 | } |
1097 | if ((use->use_index() == StoreInstanceFieldInstr::kInstancePos) && |
1098 | instr->IsStoreInstanceField()) { |
1099 | StoreInstanceFieldInstr* store = instr->AsStoreInstanceField(); |
1100 | Definition* value = store->value()->definition()->OriginalDefinition(); |
1101 | if (value->Identity().IsNotAliased()) { |
1102 | value->SetIdentity(AliasIdentity::Aliased()); |
1103 | identity_rollback_.Add(value); |
1104 | |
1105 | // Add to worklist to propagate the mark transitively. |
1106 | aliasing_worklist_.Add(value); |
1107 | } |
1108 | } |
1109 | } |
1110 | } |
1111 | |
1112 | // Determine if the given definition can't be aliased. |
1113 | void ComputeAliasing(Definition* alloc) { |
1114 | ASSERT(Place::IsAllocation(alloc)); |
1115 | ASSERT(alloc->Identity().IsUnknown()); |
1116 | ASSERT(aliasing_worklist_.is_empty()); |
1117 | |
1118 | alloc->SetIdentity(AliasIdentity::NotAliased()); |
1119 | aliasing_worklist_.Add(alloc); |
1120 | |
1121 | while (!aliasing_worklist_.is_empty()) { |
1122 | Definition* defn = aliasing_worklist_.RemoveLast(); |
1123 | ASSERT(Place::IsAllocation(defn)); |
1124 | // If the definition in the worklist was optimistically marked as |
1125 | // not-aliased check that optimistic assumption still holds: check if |
1126 | // any of its uses can create an alias. |
1127 | if (!defn->Identity().IsAliased() && AnyUseCreatesAlias(defn)) { |
1128 | defn->SetIdentity(AliasIdentity::Aliased()); |
1129 | identity_rollback_.Add(defn); |
1130 | } |
1131 | |
1132 | // If the allocation site is marked as aliased conservatively mark |
1133 | // any values stored into the object aliased too. |
1134 | if (defn->Identity().IsAliased()) { |
1135 | MarkStoredValuesEscaping(defn); |
1136 | } |
1137 | } |
1138 | } |
1139 | |
1140 | Zone* zone_; |
1141 | |
1142 | DirectChainedHashMap<PointerKeyValueTrait<Place> >* places_map_; |
1143 | |
1144 | const ZoneGrowableArray<Place*>& places_; |
1145 | |
1146 | const PhiPlaceMoves* phi_moves_; |
1147 | |
1148 | // A list of all seen aliases and a map that allows looking up canonical |
1149 | // alias object. |
1150 | GrowableArray<const Place*> aliases_; |
1151 | DirectChainedHashMap<PointerKeyValueTrait<const Place> > aliases_map_; |
1152 | |
1153 | SmallSet<Place::ElementSize> typed_data_access_sizes_; |
1154 | |
1155 | // Maps alias id to set of ids of places representing the alias. |
1156 | // Place represents an alias if this alias is least generic alias for |
1157 | // the place. |
1158 | // (see ToAlias for the definition of least generic alias). |
1159 | GrowableArray<BitVector*> representatives_; |
1160 | |
1161 | // Maps alias id to set of ids of places aliased. |
1162 | GrowableArray<BitVector*> killed_; |
1163 | |
1164 | // Set of ids of places that can be affected by side-effects other than |
1165 | // explicit stores (i.e. through calls). |
1166 | BitVector* aliased_by_effects_; |
1167 | |
1168 | // Worklist used during alias analysis. |
1169 | GrowableArray<Definition*> aliasing_worklist_; |
1170 | |
1171 | // List of definitions that had their identity set to Aliased. At the end |
1172 | // of load optimization their identity will be rolled back to Unknown to |
1173 | // avoid treating them as Aliased at later stages without checking first |
1174 | // as optimizations can potentially eliminate instructions leading to |
1175 | // aliasing. |
1176 | GrowableArray<Definition*> identity_rollback_; |
1177 | }; |
1178 | |
1179 | static Definition* GetStoredValue(Instruction* instr) { |
1180 | if (instr->IsStoreIndexed()) { |
1181 | return instr->AsStoreIndexed()->value()->definition(); |
1182 | } |
1183 | |
1184 | StoreInstanceFieldInstr* store_instance_field = instr->AsStoreInstanceField(); |
1185 | if (store_instance_field != NULL) { |
1186 | return store_instance_field->value()->definition(); |
1187 | } |
1188 | |
1189 | StoreStaticFieldInstr* store_static_field = instr->AsStoreStaticField(); |
1190 | if (store_static_field != NULL) { |
1191 | return store_static_field->value()->definition(); |
1192 | } |
1193 | |
1194 | UNREACHABLE(); // Should only be called for supported store instructions. |
1195 | return NULL; |
1196 | } |
1197 | |
1198 | static bool IsPhiDependentPlace(Place* place) { |
1199 | return (place->kind() == Place::kInstanceField) && |
1200 | (place->instance() != NULL) && place->instance()->IsPhi(); |
1201 | } |
1202 | |
1203 | // For each place that depends on a phi ensure that equivalent places |
1204 | // corresponding to phi input are numbered and record outgoing phi moves |
1205 | // for each block which establish correspondence between phi dependent place |
1206 | // and phi input's place that is flowing in. |
1207 | static PhiPlaceMoves* ComputePhiMoves( |
1208 | DirectChainedHashMap<PointerKeyValueTrait<Place> >* map, |
1209 | ZoneGrowableArray<Place*>* places) { |
1210 | Thread* thread = Thread::Current(); |
1211 | Zone* zone = thread->zone(); |
1212 | PhiPlaceMoves* phi_moves = new (zone) PhiPlaceMoves(); |
1213 | |
1214 | for (intptr_t i = 0; i < places->length(); i++) { |
1215 | Place* place = (*places)[i]; |
1216 | |
1217 | if (IsPhiDependentPlace(place)) { |
1218 | PhiInstr* phi = place->instance()->AsPhi(); |
1219 | BlockEntryInstr* block = phi->GetBlock(); |
1220 | |
1221 | if (FLAG_trace_optimization) { |
1222 | THR_Print("phi dependent place %s\n" , place->ToCString()); |
1223 | } |
1224 | |
1225 | Place input_place(*place); |
1226 | for (intptr_t j = 0; j < phi->InputCount(); j++) { |
1227 | input_place.set_instance(phi->InputAt(j)->definition()); |
1228 | |
1229 | Place* result = map->LookupValue(&input_place); |
1230 | if (result == NULL) { |
1231 | result = Place::Wrap(zone, input_place, places->length()); |
1232 | map->Insert(result); |
1233 | places->Add(result); |
1234 | if (FLAG_trace_optimization) { |
1235 | THR_Print(" adding place %s as %" Pd "\n" , result->ToCString(), |
1236 | result->id()); |
1237 | } |
1238 | } |
1239 | phi_moves->CreateOutgoingMove(zone, block->PredecessorAt(j), |
1240 | result->id(), place->id()); |
1241 | } |
1242 | } |
1243 | } |
1244 | |
1245 | return phi_moves; |
1246 | } |
1247 | |
1248 | DART_FORCE_INLINE static void SetPlaceId(Instruction* instr, intptr_t id) { |
1249 | instr->SetPassSpecificId(CompilerPass::kCSE, id); |
1250 | } |
1251 | |
1252 | DART_FORCE_INLINE static bool HasPlaceId(const Instruction* instr) { |
1253 | return instr->HasPassSpecificId(CompilerPass::kCSE); |
1254 | } |
1255 | |
1256 | DART_FORCE_INLINE static intptr_t GetPlaceId(const Instruction* instr) { |
1257 | ASSERT(HasPlaceId(instr)); |
1258 | return instr->GetPassSpecificId(CompilerPass::kCSE); |
1259 | } |
1260 | |
1261 | enum CSEMode { kOptimizeLoads, kOptimizeStores }; |
1262 | |
1263 | static AliasedSet* NumberPlaces( |
1264 | FlowGraph* graph, |
1265 | DirectChainedHashMap<PointerKeyValueTrait<Place> >* map, |
1266 | CSEMode mode) { |
1267 | // Loads representing different expression ids will be collected and |
1268 | // used to build per offset kill sets. |
1269 | Zone* zone = graph->zone(); |
1270 | ZoneGrowableArray<Place*>* places = new (zone) ZoneGrowableArray<Place*>(10); |
1271 | |
1272 | bool has_loads = false; |
1273 | bool has_stores = false; |
1274 | for (BlockIterator it = graph->reverse_postorder_iterator(); !it.Done(); |
1275 | it.Advance()) { |
1276 | BlockEntryInstr* block = it.Current(); |
1277 | |
1278 | for (ForwardInstructionIterator instr_it(block); !instr_it.Done(); |
1279 | instr_it.Advance()) { |
1280 | Instruction* instr = instr_it.Current(); |
1281 | Place place(instr, &has_loads, &has_stores); |
1282 | if (place.kind() == Place::kNone) { |
1283 | continue; |
1284 | } |
1285 | |
1286 | Place* result = map->LookupValue(&place); |
1287 | if (result == NULL) { |
1288 | result = Place::Wrap(zone, place, places->length()); |
1289 | map->Insert(result); |
1290 | places->Add(result); |
1291 | |
1292 | if (FLAG_trace_optimization) { |
1293 | THR_Print("numbering %s as %" Pd "\n" , result->ToCString(), |
1294 | result->id()); |
1295 | } |
1296 | } |
1297 | |
1298 | SetPlaceId(instr, result->id()); |
1299 | } |
1300 | } |
1301 | |
1302 | if ((mode == kOptimizeLoads) && !has_loads) { |
1303 | return NULL; |
1304 | } |
1305 | if ((mode == kOptimizeStores) && !has_stores) { |
1306 | return NULL; |
1307 | } |
1308 | |
1309 | PhiPlaceMoves* phi_moves = ComputePhiMoves(map, places); |
1310 | |
1311 | // Build aliasing sets mapping aliases to loads. |
1312 | return new (zone) AliasedSet(zone, map, places, phi_moves); |
1313 | } |
1314 | |
1315 | // Load instructions handled by load elimination. |
1316 | static bool IsLoadEliminationCandidate(Instruction* instr) { |
1317 | return instr->IsLoadField() || instr->IsLoadIndexed() || |
1318 | instr->IsLoadStaticField(); |
1319 | } |
1320 | |
1321 | static bool IsLoopInvariantLoad(ZoneGrowableArray<BitVector*>* sets, |
1322 | intptr_t , |
1323 | Instruction* instr) { |
1324 | return IsLoadEliminationCandidate(instr) && (sets != NULL) && |
1325 | HasPlaceId(instr) && |
1326 | (*sets)[loop_header_index]->Contains(GetPlaceId(instr)); |
1327 | } |
1328 | |
1329 | LICM::LICM(FlowGraph* flow_graph) : flow_graph_(flow_graph) { |
1330 | ASSERT(flow_graph->is_licm_allowed()); |
1331 | } |
1332 | |
1333 | void LICM::Hoist(ForwardInstructionIterator* it, |
1334 | BlockEntryInstr* , |
1335 | Instruction* current) { |
1336 | if (current->IsCheckClass()) { |
1337 | current->AsCheckClass()->set_licm_hoisted(true); |
1338 | } else if (current->IsCheckSmi()) { |
1339 | current->AsCheckSmi()->set_licm_hoisted(true); |
1340 | } else if (current->IsCheckEitherNonSmi()) { |
1341 | current->AsCheckEitherNonSmi()->set_licm_hoisted(true); |
1342 | } else if (current->IsCheckArrayBound()) { |
1343 | ASSERT(!CompilerState::Current().is_aot()); // speculative in JIT only |
1344 | current->AsCheckArrayBound()->set_licm_hoisted(true); |
1345 | } else if (current->IsGenericCheckBound()) { |
1346 | ASSERT(CompilerState::Current().is_aot()); // non-speculative in AOT only |
1347 | // Does not deopt, so no need for licm_hoisted flag. |
1348 | } else if (current->IsTestCids()) { |
1349 | current->AsTestCids()->set_licm_hoisted(true); |
1350 | } |
1351 | if (FLAG_trace_optimization) { |
1352 | THR_Print("Hoisting instruction %s:%" Pd " from B%" Pd " to B%" Pd "\n" , |
1353 | current->DebugName(), current->GetDeoptId(), |
1354 | current->GetBlock()->block_id(), pre_header->block_id()); |
1355 | } |
1356 | // Move the instruction out of the loop. |
1357 | current->RemoveEnvironment(); |
1358 | if (it != NULL) { |
1359 | it->RemoveCurrentFromGraph(); |
1360 | } else { |
1361 | current->RemoveFromGraph(); |
1362 | } |
1363 | GotoInstr* last = pre_header->last_instruction()->AsGoto(); |
1364 | // Using kind kEffect will not assign a fresh ssa temporary index. |
1365 | flow_graph()->InsertBefore(last, current, last->env(), FlowGraph::kEffect); |
1366 | current->CopyDeoptIdFrom(*last); |
1367 | } |
1368 | |
1369 | void LICM::TrySpecializeSmiPhi(PhiInstr* phi, |
1370 | BlockEntryInstr* , |
1371 | BlockEntryInstr* ) { |
1372 | if (phi->Type()->ToCid() == kSmiCid) { |
1373 | return; |
1374 | } |
1375 | |
1376 | // Check if there is only a single kDynamicCid input to the phi that |
1377 | // comes from the pre-header. |
1378 | const intptr_t kNotFound = -1; |
1379 | intptr_t non_smi_input = kNotFound; |
1380 | for (intptr_t i = 0; i < phi->InputCount(); ++i) { |
1381 | Value* input = phi->InputAt(i); |
1382 | if (input->Type()->ToCid() != kSmiCid) { |
1383 | if ((non_smi_input != kNotFound) || |
1384 | (input->Type()->ToCid() != kDynamicCid)) { |
1385 | // There are multiple kDynamicCid inputs or there is an input that is |
1386 | // known to be non-smi. |
1387 | return; |
1388 | } else { |
1389 | non_smi_input = i; |
1390 | } |
1391 | } |
1392 | } |
1393 | |
1394 | if ((non_smi_input == kNotFound) || |
1395 | (phi->block()->PredecessorAt(non_smi_input) != pre_header)) { |
1396 | return; |
1397 | } |
1398 | |
1399 | CheckSmiInstr* check = NULL; |
1400 | for (Value* use = phi->input_use_list(); (use != NULL) && (check == NULL); |
1401 | use = use->next_use()) { |
1402 | check = use->instruction()->AsCheckSmi(); |
1403 | } |
1404 | |
1405 | if (check == NULL) { |
1406 | return; |
1407 | } |
1408 | |
1409 | // Host CheckSmi instruction and make this phi smi one. |
1410 | Hoist(NULL, pre_header, check); |
1411 | |
1412 | // Replace value we are checking with phi's input. |
1413 | check->value()->BindTo(phi->InputAt(non_smi_input)->definition()); |
1414 | check->value()->SetReachingType(phi->InputAt(non_smi_input)->Type()); |
1415 | |
1416 | phi->UpdateType(CompileType::FromCid(kSmiCid)); |
1417 | } |
1418 | |
1419 | void LICM::OptimisticallySpecializeSmiPhis() { |
1420 | if (flow_graph()->function().ProhibitsHoistingCheckClass() || |
1421 | CompilerState::Current().is_aot()) { |
1422 | // Do not hoist any: Either deoptimized on a hoisted check, |
1423 | // or compiling precompiled code where we can't do optimistic |
1424 | // hoisting of checks. |
1425 | return; |
1426 | } |
1427 | |
1428 | const ZoneGrowableArray<BlockEntryInstr*>& = |
1429 | flow_graph()->GetLoopHierarchy().headers(); |
1430 | |
1431 | for (intptr_t i = 0; i < loop_headers.length(); ++i) { |
1432 | JoinEntryInstr* = loop_headers[i]->AsJoinEntry(); |
1433 | // Skip loop that don't have a pre-header block. |
1434 | BlockEntryInstr* = header->ImmediateDominator(); |
1435 | if (pre_header == NULL) continue; |
1436 | |
1437 | for (PhiIterator it(header); !it.Done(); it.Advance()) { |
1438 | TrySpecializeSmiPhi(it.Current(), header, pre_header); |
1439 | } |
1440 | } |
1441 | } |
1442 | |
1443 | // Returns true if instruction may have a "visible" effect, |
1444 | static bool MayHaveVisibleEffect(Instruction* instr) { |
1445 | switch (instr->tag()) { |
1446 | case Instruction::kStoreInstanceField: |
1447 | case Instruction::kStoreStaticField: |
1448 | case Instruction::kStoreIndexed: |
1449 | case Instruction::kStoreIndexedUnsafe: |
1450 | case Instruction::kStoreUntagged: |
1451 | return true; |
1452 | default: |
1453 | return instr->HasUnknownSideEffects() || instr->MayThrow(); |
1454 | } |
1455 | } |
1456 | |
1457 | void LICM::Optimize() { |
1458 | if (flow_graph()->function().ProhibitsHoistingCheckClass()) { |
1459 | // Do not hoist any. |
1460 | return; |
1461 | } |
1462 | |
1463 | // Compute loops and induction in flow graph. |
1464 | const LoopHierarchy& loop_hierarchy = flow_graph()->GetLoopHierarchy(); |
1465 | const ZoneGrowableArray<BlockEntryInstr*>& = |
1466 | loop_hierarchy.headers(); |
1467 | loop_hierarchy.ComputeInduction(); |
1468 | |
1469 | ZoneGrowableArray<BitVector*>* loop_invariant_loads = |
1470 | flow_graph()->loop_invariant_loads(); |
1471 | |
1472 | // Iterate over all loops. |
1473 | for (intptr_t i = 0; i < loop_headers.length(); ++i) { |
1474 | BlockEntryInstr* = loop_headers[i]; |
1475 | |
1476 | // Skip loops that don't have a pre-header block. |
1477 | BlockEntryInstr* = header->ImmediateDominator(); |
1478 | if (pre_header == nullptr) { |
1479 | continue; |
1480 | } |
1481 | |
1482 | // Flag that remains true as long as the loop has not seen any instruction |
1483 | // that may have a "visible" effect (write, throw, or other side-effect). |
1484 | bool seen_visible_effect = false; |
1485 | |
1486 | // Iterate over all blocks in the loop. |
1487 | LoopInfo* loop = header->loop_info(); |
1488 | for (BitVector::Iterator loop_it(loop->blocks()); !loop_it.Done(); |
1489 | loop_it.Advance()) { |
1490 | BlockEntryInstr* block = flow_graph()->preorder()[loop_it.Current()]; |
1491 | |
1492 | // Preserve the "visible" effect flag as long as the preorder traversal |
1493 | // sees always-taken blocks. This way, we can only hoist invariant |
1494 | // may-throw instructions that are always seen during the first iteration. |
1495 | if (!seen_visible_effect && !loop->IsAlwaysTaken(block)) { |
1496 | seen_visible_effect = true; |
1497 | } |
1498 | // Iterate over all instructions in the block. |
1499 | for (ForwardInstructionIterator it(block); !it.Done(); it.Advance()) { |
1500 | Instruction* current = it.Current(); |
1501 | |
1502 | // Treat loads of static final fields specially: we can CSE them but |
1503 | // we should not move them around unless the field is initialized. |
1504 | // Otherwise we might move load past the initialization. |
1505 | if (LoadStaticFieldInstr* load = current->AsLoadStaticField()) { |
1506 | if (load->AllowsCSE() && !load->IsFieldInitialized()) { |
1507 | seen_visible_effect = true; |
1508 | continue; |
1509 | } |
1510 | } |
1511 | |
1512 | // Determine if we can hoist loop invariant code. Even may-throw |
1513 | // instructions can be hoisted as long as its exception is still |
1514 | // the very first "visible" effect of the loop. |
1515 | bool is_loop_invariant = false; |
1516 | if ((current->AllowsCSE() || |
1517 | IsLoopInvariantLoad(loop_invariant_loads, i, current)) && |
1518 | (!seen_visible_effect || !current->MayThrow())) { |
1519 | is_loop_invariant = true; |
1520 | for (intptr_t i = 0; i < current->InputCount(); ++i) { |
1521 | Definition* input_def = current->InputAt(i)->definition(); |
1522 | if (!input_def->GetBlock()->Dominates(pre_header)) { |
1523 | is_loop_invariant = false; |
1524 | break; |
1525 | } |
1526 | } |
1527 | } |
1528 | |
1529 | // Hoist if all inputs are loop invariant. If not hoisted, any |
1530 | // instruction that writes, may throw, or has an unknown side |
1531 | // effect invalidates the first "visible" effect flag. |
1532 | if (is_loop_invariant) { |
1533 | Hoist(&it, pre_header, current); |
1534 | } else if (!seen_visible_effect && MayHaveVisibleEffect(current)) { |
1535 | seen_visible_effect = true; |
1536 | } |
1537 | } |
1538 | } |
1539 | } |
1540 | } |
1541 | |
1542 | void DelayAllocations::Optimize(FlowGraph* graph) { |
1543 | // Go through all AllocateObject instructions and move them down to their |
1544 | // dominant use when doing so is sound. |
1545 | DirectChainedHashMap<IdentitySetKeyValueTrait<Instruction*>> moved; |
1546 | for (BlockIterator block_it = graph->reverse_postorder_iterator(); |
1547 | !block_it.Done(); block_it.Advance()) { |
1548 | BlockEntryInstr* block = block_it.Current(); |
1549 | |
1550 | for (ForwardInstructionIterator instr_it(block); !instr_it.Done(); |
1551 | instr_it.Advance()) { |
1552 | Definition* def = instr_it.Current()->AsDefinition(); |
1553 | if (def != nullptr && (def->IsAllocateObject() || def->IsCreateArray()) && |
1554 | def->env() == nullptr && !moved.HasKey(def)) { |
1555 | Instruction* use = DominantUse(def); |
1556 | if (use != nullptr && !use->IsPhi() && IsOneTimeUse(use, def)) { |
1557 | instr_it.RemoveCurrentFromGraph(); |
1558 | def->InsertBefore(use); |
1559 | moved.Insert(def); |
1560 | } |
1561 | } |
1562 | } |
1563 | } |
1564 | } |
1565 | |
1566 | Instruction* DelayAllocations::DominantUse(Definition* def) { |
1567 | // Find the use that dominates all other uses. |
1568 | |
1569 | // Collect all uses. |
1570 | DirectChainedHashMap<IdentitySetKeyValueTrait<Instruction*>> uses; |
1571 | for (Value::Iterator it(def->input_use_list()); !it.Done(); it.Advance()) { |
1572 | Instruction* use = it.Current()->instruction(); |
1573 | uses.Insert(use); |
1574 | } |
1575 | for (Value::Iterator it(def->env_use_list()); !it.Done(); it.Advance()) { |
1576 | Instruction* use = it.Current()->instruction(); |
1577 | uses.Insert(use); |
1578 | } |
1579 | |
1580 | // Find the dominant use. |
1581 | Instruction* dominant_use = nullptr; |
1582 | auto use_it = uses.GetIterator(); |
1583 | while (auto use = use_it.Next()) { |
1584 | // Start with the instruction before the use, then walk backwards through |
1585 | // blocks in the dominator chain until we hit the definition or another use. |
1586 | Instruction* instr = nullptr; |
1587 | if (auto phi = (*use)->AsPhi()) { |
1588 | // For phi uses, the dominant use only has to dominate the |
1589 | // predecessor block corresponding to the phi input. |
1590 | ASSERT(phi->InputCount() == phi->block()->PredecessorCount()); |
1591 | for (intptr_t i = 0; i < phi->InputCount(); i++) { |
1592 | if (phi->InputAt(i)->definition() == def) { |
1593 | instr = phi->block()->PredecessorAt(i)->last_instruction(); |
1594 | break; |
1595 | } |
1596 | } |
1597 | ASSERT(instr != nullptr); |
1598 | } else { |
1599 | instr = (*use)->previous(); |
1600 | } |
1601 | |
1602 | bool dominated = false; |
1603 | while (instr != def) { |
1604 | if (uses.HasKey(instr)) { |
1605 | // We hit another use. |
1606 | dominated = true; |
1607 | break; |
1608 | } |
1609 | if (auto block = instr->AsBlockEntry()) { |
1610 | instr = block->dominator()->last_instruction(); |
1611 | } else { |
1612 | instr = instr->previous(); |
1613 | } |
1614 | } |
1615 | if (!dominated) { |
1616 | if (dominant_use != nullptr) { |
1617 | // More than one use reached the definition, which means no use |
1618 | // dominates all other uses. |
1619 | return nullptr; |
1620 | } |
1621 | dominant_use = *use; |
1622 | } |
1623 | } |
1624 | |
1625 | return dominant_use; |
1626 | } |
1627 | |
1628 | bool DelayAllocations::IsOneTimeUse(Instruction* use, Definition* def) { |
1629 | // Check that this use is always executed at most once for each execution of |
1630 | // the definition, i.e. that there is no path from the use to itself that |
1631 | // doesn't pass through the definition. |
1632 | BlockEntryInstr* use_block = use->GetBlock(); |
1633 | BlockEntryInstr* def_block = def->GetBlock(); |
1634 | if (use_block == def_block) return true; |
1635 | |
1636 | DirectChainedHashMap<IdentitySetKeyValueTrait<BlockEntryInstr*>> seen; |
1637 | GrowableArray<BlockEntryInstr*> worklist; |
1638 | worklist.Add(use_block); |
1639 | |
1640 | while (!worklist.is_empty()) { |
1641 | BlockEntryInstr* block = worklist.RemoveLast(); |
1642 | for (intptr_t i = 0; i < block->PredecessorCount(); i++) { |
1643 | BlockEntryInstr* pred = block->PredecessorAt(i); |
1644 | if (pred == use_block) return false; |
1645 | if (pred == def_block) continue; |
1646 | if (seen.HasKey(pred)) continue; |
1647 | seen.Insert(pred); |
1648 | worklist.Add(pred); |
1649 | } |
1650 | } |
1651 | return true; |
1652 | } |
1653 | |
1654 | class LoadOptimizer : public ValueObject { |
1655 | public: |
1656 | LoadOptimizer(FlowGraph* graph, AliasedSet* aliased_set) |
1657 | : graph_(graph), |
1658 | aliased_set_(aliased_set), |
1659 | in_(graph_->preorder().length()), |
1660 | out_(graph_->preorder().length()), |
1661 | gen_(graph_->preorder().length()), |
1662 | kill_(graph_->preorder().length()), |
1663 | exposed_values_(graph_->preorder().length()), |
1664 | out_values_(graph_->preorder().length()), |
1665 | phis_(5), |
1666 | worklist_(5), |
1667 | congruency_worklist_(6), |
1668 | in_worklist_(NULL), |
1669 | forwarded_(false) { |
1670 | const intptr_t num_blocks = graph_->preorder().length(); |
1671 | for (intptr_t i = 0; i < num_blocks; i++) { |
1672 | out_.Add(NULL); |
1673 | gen_.Add(new (Z) BitVector(Z, aliased_set_->max_place_id())); |
1674 | kill_.Add(new (Z) BitVector(Z, aliased_set_->max_place_id())); |
1675 | in_.Add(new (Z) BitVector(Z, aliased_set_->max_place_id())); |
1676 | |
1677 | exposed_values_.Add(NULL); |
1678 | out_values_.Add(NULL); |
1679 | } |
1680 | } |
1681 | |
1682 | ~LoadOptimizer() { aliased_set_->RollbackAliasedIdentites(); } |
1683 | |
1684 | Isolate* isolate() const { return graph_->isolate(); } |
1685 | Zone* zone() const { return graph_->zone(); } |
1686 | |
1687 | static bool OptimizeGraph(FlowGraph* graph) { |
1688 | ASSERT(FLAG_load_cse); |
1689 | |
1690 | // For now, bail out for large functions to avoid OOM situations. |
1691 | // TODO(fschneider): Fix the memory consumption issue. |
1692 | intptr_t function_length = graph->function().end_token_pos().Pos() - |
1693 | graph->function().token_pos().Pos(); |
1694 | if (function_length >= FLAG_huge_method_cutoff_in_tokens) { |
1695 | return false; |
1696 | } |
1697 | |
1698 | DirectChainedHashMap<PointerKeyValueTrait<Place> > map; |
1699 | AliasedSet* aliased_set = NumberPlaces(graph, &map, kOptimizeLoads); |
1700 | if ((aliased_set != NULL) && !aliased_set->IsEmpty()) { |
1701 | // If any loads were forwarded return true from Optimize to run load |
1702 | // forwarding again. This will allow to forward chains of loads. |
1703 | // This is especially important for context variables as they are built |
1704 | // as loads from loaded context. |
1705 | // TODO(vegorov): renumber newly discovered congruences during the |
1706 | // forwarding to forward chains without running whole pass twice. |
1707 | LoadOptimizer load_optimizer(graph, aliased_set); |
1708 | return load_optimizer.Optimize(); |
1709 | } |
1710 | return false; |
1711 | } |
1712 | |
1713 | private: |
1714 | bool Optimize() { |
1715 | // Initializer calls should be eliminated before ComputeInitialSets() |
1716 | // in order to calculate kill sets more precisely. |
1717 | OptimizeLazyInitialization(); |
1718 | |
1719 | ComputeInitialSets(); |
1720 | ComputeOutSets(); |
1721 | ComputeOutValues(); |
1722 | if (graph_->is_licm_allowed()) { |
1723 | MarkLoopInvariantLoads(); |
1724 | } |
1725 | ForwardLoads(); |
1726 | EmitPhis(); |
1727 | return forwarded_; |
1728 | } |
1729 | |
1730 | bool CallsInitializer(Instruction* instr) { |
1731 | if (auto* load_field = instr->AsLoadField()) { |
1732 | return load_field->calls_initializer(); |
1733 | } else if (auto* load_static = instr->AsLoadStaticField()) { |
1734 | return load_static->calls_initializer(); |
1735 | } |
1736 | return false; |
1737 | } |
1738 | |
1739 | void ClearCallsInitializer(Instruction* instr) { |
1740 | if (auto* load_field = instr->AsLoadField()) { |
1741 | load_field->set_calls_initializer(false); |
1742 | } else if (auto* load_static = instr->AsLoadStaticField()) { |
1743 | load_static->set_calls_initializer(false); |
1744 | } else { |
1745 | UNREACHABLE(); |
1746 | } |
1747 | } |
1748 | |
1749 | // Returns true if given instruction stores the sentinel value. |
1750 | // Such a store doesn't initialize corresponding field. |
1751 | bool IsSentinelStore(Instruction* instr) { |
1752 | Value* value = nullptr; |
1753 | if (auto* store_field = instr->AsStoreInstanceField()) { |
1754 | value = store_field->value(); |
1755 | } else if (auto* store_static = instr->AsStoreStaticField()) { |
1756 | value = store_static->value(); |
1757 | } |
1758 | return value != nullptr && value->BindsToConstant() && |
1759 | (value->BoundConstant().raw() == Object::sentinel().raw()); |
1760 | } |
1761 | |
1762 | // This optimization pass tries to get rid of lazy initializer calls in |
1763 | // LoadField and LoadStaticField instructions. The "initialized" state of |
1764 | // places is propagated through the flow graph. |
1765 | void OptimizeLazyInitialization() { |
1766 | if (!FLAG_optimize_lazy_initializer_calls) { |
1767 | return; |
1768 | } |
1769 | |
1770 | // 1) Populate 'gen' sets with places which are initialized at each basic |
1771 | // block. Optimize lazy initializer calls within basic block and |
1772 | // figure out if there are lazy intializer calls left to optimize. |
1773 | bool has_lazy_initializer_calls = false; |
1774 | for (BlockIterator block_it = graph_->reverse_postorder_iterator(); |
1775 | !block_it.Done(); block_it.Advance()) { |
1776 | BlockEntryInstr* block = block_it.Current(); |
1777 | BitVector* gen = gen_[block->preorder_number()]; |
1778 | |
1779 | for (ForwardInstructionIterator instr_it(block); !instr_it.Done(); |
1780 | instr_it.Advance()) { |
1781 | Instruction* instr = instr_it.Current(); |
1782 | |
1783 | bool is_load = false, is_store = false; |
1784 | Place place(instr, &is_load, &is_store); |
1785 | |
1786 | if (is_store && !IsSentinelStore(instr)) { |
1787 | gen->Add(GetPlaceId(instr)); |
1788 | } else if (is_load) { |
1789 | const auto place_id = GetPlaceId(instr); |
1790 | if (CallsInitializer(instr)) { |
1791 | if (gen->Contains(place_id)) { |
1792 | ClearCallsInitializer(instr); |
1793 | } else { |
1794 | has_lazy_initializer_calls = true; |
1795 | } |
1796 | } |
1797 | gen->Add(place_id); |
1798 | } |
1799 | } |
1800 | |
1801 | // Spread initialized state through outgoing phis. |
1802 | PhiPlaceMoves::MovesList phi_moves = |
1803 | aliased_set_->phi_moves()->GetOutgoingMoves(block); |
1804 | if (phi_moves != nullptr) { |
1805 | for (intptr_t i = 0, n = phi_moves->length(); i < n; ++i) { |
1806 | const intptr_t from = (*phi_moves)[i].from(); |
1807 | const intptr_t to = (*phi_moves)[i].to(); |
1808 | if ((from != to) && gen->Contains(from)) { |
1809 | gen->Add(to); |
1810 | } |
1811 | } |
1812 | } |
1813 | } |
1814 | |
1815 | if (has_lazy_initializer_calls) { |
1816 | // 2) Propagate initialized state between blocks, calculating |
1817 | // incoming initialized state. Iterate until reaching fixed point. |
1818 | BitVector* temp = new (Z) BitVector(Z, aliased_set_->max_place_id()); |
1819 | bool changed = true; |
1820 | while (changed) { |
1821 | changed = false; |
1822 | |
1823 | for (BlockIterator block_it = graph_->reverse_postorder_iterator(); |
1824 | !block_it.Done(); block_it.Advance()) { |
1825 | BlockEntryInstr* block = block_it.Current(); |
1826 | BitVector* block_in = in_[block->preorder_number()]; |
1827 | BitVector* gen = gen_[block->preorder_number()]; |
1828 | |
1829 | // Incoming initialized state is the intersection of all |
1830 | // outgoing initialized states of predecessors. |
1831 | if (block->IsGraphEntry()) { |
1832 | temp->Clear(); |
1833 | } else { |
1834 | temp->SetAll(); |
1835 | ASSERT(block->PredecessorCount() > 0); |
1836 | for (intptr_t i = 0, pred_count = block->PredecessorCount(); |
1837 | i < pred_count; ++i) { |
1838 | BlockEntryInstr* pred = block->PredecessorAt(i); |
1839 | BitVector* pred_out = gen_[pred->preorder_number()]; |
1840 | temp->Intersect(pred_out); |
1841 | } |
1842 | } |
1843 | |
1844 | if (!temp->Equals(*block_in)) { |
1845 | ASSERT(block_in->SubsetOf(*temp)); |
1846 | block_in->AddAll(temp); |
1847 | gen->AddAll(temp); |
1848 | changed = true; |
1849 | } |
1850 | } |
1851 | } |
1852 | |
1853 | // 3) Single pass through basic blocks to optimize lazy |
1854 | // initializer calls using calculated incoming inter-block |
1855 | // initialized state. |
1856 | for (BlockIterator block_it = graph_->reverse_postorder_iterator(); |
1857 | !block_it.Done(); block_it.Advance()) { |
1858 | BlockEntryInstr* block = block_it.Current(); |
1859 | BitVector* block_in = in_[block->preorder_number()]; |
1860 | |
1861 | for (ForwardInstructionIterator instr_it(block); !instr_it.Done(); |
1862 | instr_it.Advance()) { |
1863 | Instruction* instr = instr_it.Current(); |
1864 | if (CallsInitializer(instr) && |
1865 | block_in->Contains(GetPlaceId(instr))) { |
1866 | ClearCallsInitializer(instr); |
1867 | } |
1868 | } |
1869 | } |
1870 | } |
1871 | |
1872 | // Clear sets which are also used in the main part of load forwarding. |
1873 | for (intptr_t i = 0, n = graph_->preorder().length(); i < n; ++i) { |
1874 | gen_[i]->Clear(); |
1875 | in_[i]->Clear(); |
1876 | } |
1877 | } |
1878 | |
1879 | // Only forward stores to normal arrays, float64, and simd arrays |
1880 | // to loads because other array stores (intXX/uintXX/float32) |
1881 | // may implicitly convert the value stored. |
1882 | bool CanForwardStore(StoreIndexedInstr* array_store) { |
1883 | return ((array_store == nullptr) || |
1884 | (array_store->class_id() == kArrayCid) || |
1885 | (array_store->class_id() == kTypedDataFloat64ArrayCid) || |
1886 | (array_store->class_id() == kTypedDataFloat32ArrayCid) || |
1887 | (array_store->class_id() == kTypedDataFloat32x4ArrayCid)); |
1888 | } |
1889 | |
1890 | // Compute sets of loads generated and killed by each block. |
1891 | // Additionally compute upwards exposed and generated loads for each block. |
1892 | // Exposed loads are those that can be replaced if a corresponding |
1893 | // reaching load will be found. |
1894 | // Loads that are locally redundant will be replaced as we go through |
1895 | // instructions. |
1896 | void ComputeInitialSets() { |
1897 | for (BlockIterator block_it = graph_->reverse_postorder_iterator(); |
1898 | !block_it.Done(); block_it.Advance()) { |
1899 | BlockEntryInstr* block = block_it.Current(); |
1900 | const intptr_t preorder_number = block->preorder_number(); |
1901 | |
1902 | BitVector* kill = kill_[preorder_number]; |
1903 | BitVector* gen = gen_[preorder_number]; |
1904 | |
1905 | ZoneGrowableArray<Definition*>* exposed_values = NULL; |
1906 | ZoneGrowableArray<Definition*>* out_values = NULL; |
1907 | |
1908 | for (ForwardInstructionIterator instr_it(block); !instr_it.Done(); |
1909 | instr_it.Advance()) { |
1910 | Instruction* instr = instr_it.Current(); |
1911 | |
1912 | bool is_load = false, is_store = false; |
1913 | Place place(instr, &is_load, &is_store); |
1914 | |
1915 | BitVector* killed = NULL; |
1916 | if (is_store) { |
1917 | const intptr_t alias_id = |
1918 | aliased_set_->LookupAliasId(place.ToAlias()); |
1919 | if (alias_id != AliasedSet::kNoAlias) { |
1920 | killed = aliased_set_->GetKilledSet(alias_id); |
1921 | } else if (!place.IsImmutableField()) { |
1922 | // We encountered unknown alias: this means intrablock load |
1923 | // forwarding refined parameter of this store, for example |
1924 | // |
1925 | // o <- alloc() |
1926 | // a.f <- o |
1927 | // u <- a.f |
1928 | // u.x <- null ;; this store alias is *.x |
1929 | // |
1930 | // after intrablock load forwarding |
1931 | // |
1932 | // o <- alloc() |
1933 | // a.f <- o |
1934 | // o.x <- null ;; this store alias is o.x |
1935 | // |
1936 | // In this case we fallback to using place id recorded in the |
1937 | // instruction that still points to the old place with a more |
1938 | // generic alias. |
1939 | const intptr_t old_alias_id = aliased_set_->LookupAliasId( |
1940 | aliased_set_->places()[GetPlaceId(instr)]->ToAlias()); |
1941 | killed = aliased_set_->GetKilledSet(old_alias_id); |
1942 | } |
1943 | |
1944 | // Find canonical place of store. |
1945 | Place* canonical_place = nullptr; |
1946 | if (CanForwardStore(instr->AsStoreIndexed())) { |
1947 | canonical_place = aliased_set_->LookupCanonical(&place); |
1948 | if (canonical_place != nullptr) { |
1949 | // Is this a redundant store (stored value already resides |
1950 | // in this field)? |
1951 | const intptr_t place_id = canonical_place->id(); |
1952 | if (gen->Contains(place_id)) { |
1953 | ASSERT((out_values != nullptr) && |
1954 | ((*out_values)[place_id] != nullptr)); |
1955 | if ((*out_values)[place_id] == GetStoredValue(instr)) { |
1956 | if (FLAG_trace_optimization) { |
1957 | THR_Print("Removing redundant store to place %" Pd |
1958 | " in block B%" Pd "\n" , |
1959 | GetPlaceId(instr), block->block_id()); |
1960 | } |
1961 | instr_it.RemoveCurrentFromGraph(); |
1962 | continue; |
1963 | } |
1964 | } |
1965 | } |
1966 | } |
1967 | |
1968 | // Update kill/gen/out_values (after inspection of incoming values). |
1969 | if (killed != nullptr) { |
1970 | kill->AddAll(killed); |
1971 | // There is no need to clear out_values when clearing GEN set |
1972 | // because only those values that are in the GEN set |
1973 | // will ever be used. |
1974 | gen->RemoveAll(killed); |
1975 | } |
1976 | if (canonical_place != nullptr) { |
1977 | // Store has a corresponding numbered place that might have a |
1978 | // load. Try forwarding stored value to it. |
1979 | gen->Add(canonical_place->id()); |
1980 | if (out_values == nullptr) out_values = CreateBlockOutValues(); |
1981 | (*out_values)[canonical_place->id()] = GetStoredValue(instr); |
1982 | } |
1983 | |
1984 | ASSERT(!instr->IsDefinition() || |
1985 | !IsLoadEliminationCandidate(instr->AsDefinition())); |
1986 | continue; |
1987 | } else if (is_load) { |
1988 | // Check if this load needs renumbering because of the intrablock |
1989 | // load forwarding. |
1990 | const Place* canonical = aliased_set_->LookupCanonical(&place); |
1991 | if ((canonical != NULL) && |
1992 | (canonical->id() != GetPlaceId(instr->AsDefinition()))) { |
1993 | SetPlaceId(instr->AsDefinition(), canonical->id()); |
1994 | } |
1995 | } |
1996 | |
1997 | // If instruction has effects then kill all loads affected. |
1998 | if (instr->HasUnknownSideEffects()) { |
1999 | kill->AddAll(aliased_set_->aliased_by_effects()); |
2000 | // There is no need to clear out_values when removing values from GEN |
2001 | // set because only those values that are in the GEN set |
2002 | // will ever be used. |
2003 | gen->RemoveAll(aliased_set_->aliased_by_effects()); |
2004 | } |
2005 | |
2006 | Definition* defn = instr->AsDefinition(); |
2007 | if (defn == NULL) { |
2008 | continue; |
2009 | } |
2010 | |
2011 | // For object allocation forward initial values of the fields to |
2012 | // subsequent loads (and potential dead stores) except for final |
2013 | // fields of escaping objects. Final fields are initialized in |
2014 | // constructor which potentially was not inlined into the function |
2015 | // that we are currently optimizing. However at the same time we |
2016 | // assume that values of the final fields can be forwarded across |
2017 | // side-effects. If we add 'null' as known values for these fields |
2018 | // here we will incorrectly propagate this null across constructor |
2019 | // invocation. |
2020 | if (auto alloc = instr->AsAllocateObject()) { |
2021 | for (Value* use = alloc->input_use_list(); use != NULL; |
2022 | use = use->next_use()) { |
2023 | // Look for all immediate loads/stores from this object. |
2024 | if (use->use_index() != 0) { |
2025 | continue; |
2026 | } |
2027 | const Slot* slot = nullptr; |
2028 | intptr_t place_id = 0; |
2029 | if (auto load = use->instruction()->AsLoadField()) { |
2030 | slot = &load->slot(); |
2031 | place_id = GetPlaceId(load); |
2032 | } else if (auto store = |
2033 | use->instruction()->AsStoreInstanceField()) { |
2034 | slot = &store->slot(); |
2035 | place_id = GetPlaceId(store); |
2036 | } |
2037 | |
2038 | if (slot != nullptr) { |
2039 | // Found a load/store. Initialize current value of the field |
2040 | // to null for normal fields, or with type arguments. |
2041 | |
2042 | // If the object escapes then don't forward final fields - see |
2043 | // the comment above for explanation. |
2044 | if (aliased_set_->CanBeAliased(alloc) && slot->IsDartField() && |
2045 | slot->is_immutable()) { |
2046 | continue; |
2047 | } |
2048 | |
2049 | Definition* forward_def = graph_->constant_null(); |
2050 | if (alloc->type_arguments() != nullptr) { |
2051 | const Slot& type_args_slot = Slot::GetTypeArgumentsSlotFor( |
2052 | graph_->thread(), alloc->cls()); |
2053 | if (slot->IsIdentical(type_args_slot)) { |
2054 | forward_def = alloc->type_arguments()->definition(); |
2055 | } |
2056 | } |
2057 | gen->Add(place_id); |
2058 | if (out_values == nullptr) out_values = CreateBlockOutValues(); |
2059 | (*out_values)[place_id] = forward_def; |
2060 | } |
2061 | } |
2062 | continue; |
2063 | } |
2064 | |
2065 | if (!IsLoadEliminationCandidate(defn)) { |
2066 | continue; |
2067 | } |
2068 | |
2069 | const intptr_t place_id = GetPlaceId(defn); |
2070 | if (gen->Contains(place_id)) { |
2071 | // This is a locally redundant load. |
2072 | ASSERT((out_values != NULL) && ((*out_values)[place_id] != NULL)); |
2073 | |
2074 | Definition* replacement = (*out_values)[place_id]; |
2075 | graph_->EnsureSSATempIndex(defn, replacement); |
2076 | if (FLAG_trace_optimization) { |
2077 | THR_Print("Replacing load v%" Pd " with v%" Pd "\n" , |
2078 | defn->ssa_temp_index(), replacement->ssa_temp_index()); |
2079 | } |
2080 | |
2081 | defn->ReplaceUsesWith(replacement); |
2082 | instr_it.RemoveCurrentFromGraph(); |
2083 | forwarded_ = true; |
2084 | continue; |
2085 | } else if (!kill->Contains(place_id)) { |
2086 | // This is an exposed load: it is the first representative of a |
2087 | // given expression id and it is not killed on the path from |
2088 | // the block entry. |
2089 | if (exposed_values == NULL) { |
2090 | static const intptr_t kMaxExposedValuesInitialSize = 5; |
2091 | exposed_values = new (Z) ZoneGrowableArray<Definition*>( |
2092 | Utils::Minimum(kMaxExposedValuesInitialSize, |
2093 | aliased_set_->max_place_id())); |
2094 | } |
2095 | |
2096 | exposed_values->Add(defn); |
2097 | } |
2098 | |
2099 | gen->Add(place_id); |
2100 | |
2101 | if (out_values == NULL) out_values = CreateBlockOutValues(); |
2102 | (*out_values)[place_id] = defn; |
2103 | } |
2104 | |
2105 | exposed_values_[preorder_number] = exposed_values; |
2106 | out_values_[preorder_number] = out_values; |
2107 | } |
2108 | } |
2109 | |
2110 | static void PerformPhiMoves(PhiPlaceMoves::MovesList phi_moves, |
2111 | BitVector* out, |
2112 | BitVector* forwarded_loads) { |
2113 | forwarded_loads->Clear(); |
2114 | |
2115 | for (intptr_t i = 0; i < phi_moves->length(); i++) { |
2116 | const intptr_t from = (*phi_moves)[i].from(); |
2117 | const intptr_t to = (*phi_moves)[i].to(); |
2118 | if (from == to) continue; |
2119 | |
2120 | if (out->Contains(from)) { |
2121 | forwarded_loads->Add(to); |
2122 | } |
2123 | } |
2124 | |
2125 | for (intptr_t i = 0; i < phi_moves->length(); i++) { |
2126 | const intptr_t from = (*phi_moves)[i].from(); |
2127 | const intptr_t to = (*phi_moves)[i].to(); |
2128 | if (from == to) continue; |
2129 | |
2130 | out->Remove(to); |
2131 | } |
2132 | |
2133 | out->AddAll(forwarded_loads); |
2134 | } |
2135 | |
2136 | // Compute OUT sets by propagating them iteratively until fix point |
2137 | // is reached. |
2138 | void ComputeOutSets() { |
2139 | BitVector* temp = new (Z) BitVector(Z, aliased_set_->max_place_id()); |
2140 | BitVector* forwarded_loads = |
2141 | new (Z) BitVector(Z, aliased_set_->max_place_id()); |
2142 | BitVector* temp_out = new (Z) BitVector(Z, aliased_set_->max_place_id()); |
2143 | |
2144 | bool changed = true; |
2145 | while (changed) { |
2146 | changed = false; |
2147 | |
2148 | for (BlockIterator block_it = graph_->reverse_postorder_iterator(); |
2149 | !block_it.Done(); block_it.Advance()) { |
2150 | BlockEntryInstr* block = block_it.Current(); |
2151 | |
2152 | const intptr_t preorder_number = block->preorder_number(); |
2153 | |
2154 | BitVector* block_in = in_[preorder_number]; |
2155 | BitVector* block_out = out_[preorder_number]; |
2156 | BitVector* block_kill = kill_[preorder_number]; |
2157 | BitVector* block_gen = gen_[preorder_number]; |
2158 | |
2159 | // Compute block_in as the intersection of all out(p) where p |
2160 | // is a predecessor of the current block. |
2161 | if (block->IsGraphEntry()) { |
2162 | temp->Clear(); |
2163 | } else { |
2164 | temp->SetAll(); |
2165 | ASSERT(block->PredecessorCount() > 0); |
2166 | for (intptr_t i = 0; i < block->PredecessorCount(); i++) { |
2167 | BlockEntryInstr* pred = block->PredecessorAt(i); |
2168 | BitVector* pred_out = out_[pred->preorder_number()]; |
2169 | if (pred_out == NULL) continue; |
2170 | PhiPlaceMoves::MovesList phi_moves = |
2171 | aliased_set_->phi_moves()->GetOutgoingMoves(pred); |
2172 | if (phi_moves != NULL) { |
2173 | // If there are phi moves, perform intersection with |
2174 | // a copy of pred_out where the phi moves are applied. |
2175 | temp_out->CopyFrom(pred_out); |
2176 | PerformPhiMoves(phi_moves, temp_out, forwarded_loads); |
2177 | pred_out = temp_out; |
2178 | } |
2179 | temp->Intersect(pred_out); |
2180 | } |
2181 | } |
2182 | |
2183 | if (!temp->Equals(*block_in) || (block_out == NULL)) { |
2184 | // If IN set has changed propagate the change to OUT set. |
2185 | block_in->CopyFrom(temp); |
2186 | |
2187 | temp->RemoveAll(block_kill); |
2188 | temp->AddAll(block_gen); |
2189 | |
2190 | if ((block_out == NULL) || !block_out->Equals(*temp)) { |
2191 | if (block_out == NULL) { |
2192 | block_out = out_[preorder_number] = |
2193 | new (Z) BitVector(Z, aliased_set_->max_place_id()); |
2194 | } |
2195 | block_out->CopyFrom(temp); |
2196 | changed = true; |
2197 | } |
2198 | } |
2199 | } |
2200 | } |
2201 | } |
2202 | |
2203 | // Compute out_values mappings by propagating them in reverse postorder once |
2204 | // through the graph. Generate phis on back edges where eager merge is |
2205 | // impossible. |
2206 | // No replacement is done at this point and thus any out_value[place_id] is |
2207 | // changed at most once: from NULL to an actual value. |
2208 | // When merging incoming loads we might need to create a phi. |
2209 | // These phis are not inserted at the graph immediately because some of them |
2210 | // might become redundant after load forwarding is done. |
2211 | void ComputeOutValues() { |
2212 | GrowableArray<PhiInstr*> pending_phis(5); |
2213 | ZoneGrowableArray<Definition*>* temp_forwarded_values = NULL; |
2214 | |
2215 | for (BlockIterator block_it = graph_->reverse_postorder_iterator(); |
2216 | !block_it.Done(); block_it.Advance()) { |
2217 | BlockEntryInstr* block = block_it.Current(); |
2218 | |
2219 | const bool can_merge_eagerly = CanMergeEagerly(block); |
2220 | |
2221 | const intptr_t preorder_number = block->preorder_number(); |
2222 | |
2223 | ZoneGrowableArray<Definition*>* block_out_values = |
2224 | out_values_[preorder_number]; |
2225 | |
2226 | // If OUT set has changed then we have new values available out of |
2227 | // the block. Compute these values creating phi where necessary. |
2228 | for (BitVector::Iterator it(out_[preorder_number]); !it.Done(); |
2229 | it.Advance()) { |
2230 | const intptr_t place_id = it.Current(); |
2231 | |
2232 | if (block_out_values == NULL) { |
2233 | out_values_[preorder_number] = block_out_values = |
2234 | CreateBlockOutValues(); |
2235 | } |
2236 | |
2237 | if ((*block_out_values)[place_id] == NULL) { |
2238 | ASSERT(block->PredecessorCount() > 0); |
2239 | Definition* in_value = |
2240 | can_merge_eagerly ? MergeIncomingValues(block, place_id) : NULL; |
2241 | if ((in_value == NULL) && |
2242 | (in_[preorder_number]->Contains(place_id))) { |
2243 | PhiInstr* phi = new (Z) |
2244 | PhiInstr(block->AsJoinEntry(), block->PredecessorCount()); |
2245 | SetPlaceId(phi, place_id); |
2246 | pending_phis.Add(phi); |
2247 | in_value = phi; |
2248 | } |
2249 | (*block_out_values)[place_id] = in_value; |
2250 | } |
2251 | } |
2252 | |
2253 | // If the block has outgoing phi moves perform them. Use temporary list |
2254 | // of values to ensure that cyclic moves are performed correctly. |
2255 | PhiPlaceMoves::MovesList phi_moves = |
2256 | aliased_set_->phi_moves()->GetOutgoingMoves(block); |
2257 | if ((phi_moves != NULL) && (block_out_values != NULL)) { |
2258 | if (temp_forwarded_values == NULL) { |
2259 | temp_forwarded_values = CreateBlockOutValues(); |
2260 | } |
2261 | |
2262 | for (intptr_t i = 0; i < phi_moves->length(); i++) { |
2263 | const intptr_t from = (*phi_moves)[i].from(); |
2264 | const intptr_t to = (*phi_moves)[i].to(); |
2265 | if (from == to) continue; |
2266 | |
2267 | (*temp_forwarded_values)[to] = (*block_out_values)[from]; |
2268 | } |
2269 | |
2270 | for (intptr_t i = 0; i < phi_moves->length(); i++) { |
2271 | const intptr_t from = (*phi_moves)[i].from(); |
2272 | const intptr_t to = (*phi_moves)[i].to(); |
2273 | if (from == to) continue; |
2274 | |
2275 | (*block_out_values)[to] = (*temp_forwarded_values)[to]; |
2276 | } |
2277 | } |
2278 | |
2279 | if (FLAG_trace_load_optimization) { |
2280 | THR_Print("B%" Pd "\n" , block->block_id()); |
2281 | THR_Print(" IN: " ); |
2282 | aliased_set_->PrintSet(in_[preorder_number]); |
2283 | THR_Print("\n" ); |
2284 | |
2285 | THR_Print(" KILL: " ); |
2286 | aliased_set_->PrintSet(kill_[preorder_number]); |
2287 | THR_Print("\n" ); |
2288 | |
2289 | THR_Print(" OUT: " ); |
2290 | aliased_set_->PrintSet(out_[preorder_number]); |
2291 | THR_Print("\n" ); |
2292 | } |
2293 | } |
2294 | |
2295 | // All blocks were visited. Fill pending phis with inputs |
2296 | // that flow on back edges. |
2297 | for (intptr_t i = 0; i < pending_phis.length(); i++) { |
2298 | FillPhiInputs(pending_phis[i]); |
2299 | } |
2300 | } |
2301 | |
2302 | bool CanMergeEagerly(BlockEntryInstr* block) { |
2303 | for (intptr_t i = 0; i < block->PredecessorCount(); i++) { |
2304 | BlockEntryInstr* pred = block->PredecessorAt(i); |
2305 | if (pred->postorder_number() < block->postorder_number()) { |
2306 | return false; |
2307 | } |
2308 | } |
2309 | return true; |
2310 | } |
2311 | |
2312 | void MarkLoopInvariantLoads() { |
2313 | const ZoneGrowableArray<BlockEntryInstr*>& = |
2314 | graph_->GetLoopHierarchy().headers(); |
2315 | |
2316 | ZoneGrowableArray<BitVector*>* invariant_loads = |
2317 | new (Z) ZoneGrowableArray<BitVector*>(loop_headers.length()); |
2318 | |
2319 | for (intptr_t i = 0; i < loop_headers.length(); i++) { |
2320 | BlockEntryInstr* = loop_headers[i]; |
2321 | BlockEntryInstr* = header->ImmediateDominator(); |
2322 | if (pre_header == NULL) { |
2323 | invariant_loads->Add(NULL); |
2324 | continue; |
2325 | } |
2326 | |
2327 | BitVector* loop_gen = new (Z) BitVector(Z, aliased_set_->max_place_id()); |
2328 | for (BitVector::Iterator loop_it(header->loop_info()->blocks()); |
2329 | !loop_it.Done(); loop_it.Advance()) { |
2330 | const intptr_t preorder_number = loop_it.Current(); |
2331 | loop_gen->AddAll(gen_[preorder_number]); |
2332 | } |
2333 | |
2334 | for (BitVector::Iterator loop_it(header->loop_info()->blocks()); |
2335 | !loop_it.Done(); loop_it.Advance()) { |
2336 | const intptr_t preorder_number = loop_it.Current(); |
2337 | loop_gen->RemoveAll(kill_[preorder_number]); |
2338 | } |
2339 | |
2340 | if (FLAG_trace_optimization) { |
2341 | for (BitVector::Iterator it(loop_gen); !it.Done(); it.Advance()) { |
2342 | THR_Print("place %s is loop invariant for B%" Pd "\n" , |
2343 | aliased_set_->places()[it.Current()]->ToCString(), |
2344 | header->block_id()); |
2345 | } |
2346 | } |
2347 | |
2348 | invariant_loads->Add(loop_gen); |
2349 | } |
2350 | |
2351 | graph_->set_loop_invariant_loads(invariant_loads); |
2352 | } |
2353 | |
2354 | // Compute incoming value for the given expression id. |
2355 | // Will create a phi if different values are incoming from multiple |
2356 | // predecessors. |
2357 | Definition* MergeIncomingValues(BlockEntryInstr* block, intptr_t place_id) { |
2358 | // First check if the same value is coming in from all predecessors. |
2359 | static Definition* const kDifferentValuesMarker = |
2360 | reinterpret_cast<Definition*>(-1); |
2361 | Definition* incoming = NULL; |
2362 | for (intptr_t i = 0; i < block->PredecessorCount(); i++) { |
2363 | BlockEntryInstr* pred = block->PredecessorAt(i); |
2364 | ZoneGrowableArray<Definition*>* pred_out_values = |
2365 | out_values_[pred->preorder_number()]; |
2366 | if ((pred_out_values == NULL) || ((*pred_out_values)[place_id] == NULL)) { |
2367 | return NULL; |
2368 | } else if (incoming == NULL) { |
2369 | incoming = (*pred_out_values)[place_id]; |
2370 | } else if (incoming != (*pred_out_values)[place_id]) { |
2371 | incoming = kDifferentValuesMarker; |
2372 | } |
2373 | } |
2374 | |
2375 | if (incoming != kDifferentValuesMarker) { |
2376 | ASSERT(incoming != NULL); |
2377 | return incoming; |
2378 | } |
2379 | |
2380 | // Incoming values are different. Phi is required to merge. |
2381 | PhiInstr* phi = |
2382 | new (Z) PhiInstr(block->AsJoinEntry(), block->PredecessorCount()); |
2383 | SetPlaceId(phi, place_id); |
2384 | FillPhiInputs(phi); |
2385 | return phi; |
2386 | } |
2387 | |
2388 | void FillPhiInputs(PhiInstr* phi) { |
2389 | BlockEntryInstr* block = phi->GetBlock(); |
2390 | const intptr_t place_id = GetPlaceId(phi); |
2391 | |
2392 | for (intptr_t i = 0; i < block->PredecessorCount(); i++) { |
2393 | BlockEntryInstr* pred = block->PredecessorAt(i); |
2394 | ZoneGrowableArray<Definition*>* pred_out_values = |
2395 | out_values_[pred->preorder_number()]; |
2396 | ASSERT((*pred_out_values)[place_id] != NULL); |
2397 | |
2398 | // Sets of outgoing values are not linked into use lists so |
2399 | // they might contain values that were replaced and removed |
2400 | // from the graph by this iteration. |
2401 | // To prevent using them we additionally mark definitions themselves |
2402 | // as replaced and store a pointer to the replacement. |
2403 | Definition* replacement = (*pred_out_values)[place_id]->Replacement(); |
2404 | Value* input = new (Z) Value(replacement); |
2405 | phi->SetInputAt(i, input); |
2406 | replacement->AddInputUse(input); |
2407 | } |
2408 | |
2409 | graph_->AllocateSSAIndexes(phi); |
2410 | phis_.Add(phi); // Postpone phi insertion until after load forwarding. |
2411 | |
2412 | if (FLAG_support_il_printer && FLAG_trace_load_optimization) { |
2413 | THR_Print("created pending phi %s for %s at B%" Pd "\n" , phi->ToCString(), |
2414 | aliased_set_->places()[place_id]->ToCString(), |
2415 | block->block_id()); |
2416 | } |
2417 | } |
2418 | |
2419 | // Iterate over basic blocks and replace exposed loads with incoming |
2420 | // values. |
2421 | void ForwardLoads() { |
2422 | for (BlockIterator block_it = graph_->reverse_postorder_iterator(); |
2423 | !block_it.Done(); block_it.Advance()) { |
2424 | BlockEntryInstr* block = block_it.Current(); |
2425 | |
2426 | ZoneGrowableArray<Definition*>* loads = |
2427 | exposed_values_[block->preorder_number()]; |
2428 | if (loads == NULL) continue; // No exposed loads. |
2429 | |
2430 | BitVector* in = in_[block->preorder_number()]; |
2431 | |
2432 | for (intptr_t i = 0; i < loads->length(); i++) { |
2433 | Definition* load = (*loads)[i]; |
2434 | if (!in->Contains(GetPlaceId(load))) continue; // No incoming value. |
2435 | |
2436 | Definition* replacement = MergeIncomingValues(block, GetPlaceId(load)); |
2437 | ASSERT(replacement != NULL); |
2438 | |
2439 | // Sets of outgoing values are not linked into use lists so |
2440 | // they might contain values that were replace and removed |
2441 | // from the graph by this iteration. |
2442 | // To prevent using them we additionally mark definitions themselves |
2443 | // as replaced and store a pointer to the replacement. |
2444 | replacement = replacement->Replacement(); |
2445 | |
2446 | if (load != replacement) { |
2447 | graph_->EnsureSSATempIndex(load, replacement); |
2448 | |
2449 | if (FLAG_trace_optimization) { |
2450 | THR_Print("Replacing load v%" Pd " with v%" Pd "\n" , |
2451 | load->ssa_temp_index(), replacement->ssa_temp_index()); |
2452 | } |
2453 | |
2454 | load->ReplaceUsesWith(replacement); |
2455 | load->RemoveFromGraph(); |
2456 | load->SetReplacement(replacement); |
2457 | forwarded_ = true; |
2458 | } |
2459 | } |
2460 | } |
2461 | } |
2462 | |
2463 | // Check if the given phi take the same value on all code paths. |
2464 | // Eliminate it as redundant if this is the case. |
2465 | // When analyzing phi operands assumes that only generated during |
2466 | // this load phase can be redundant. They can be distinguished because |
2467 | // they are not marked alive. |
2468 | // TODO(vegorov): move this into a separate phase over all phis. |
2469 | bool EliminateRedundantPhi(PhiInstr* phi) { |
2470 | Definition* value = NULL; // Possible value of this phi. |
2471 | |
2472 | worklist_.Clear(); |
2473 | if (in_worklist_ == NULL) { |
2474 | in_worklist_ = new (Z) BitVector(Z, graph_->current_ssa_temp_index()); |
2475 | } else { |
2476 | in_worklist_->Clear(); |
2477 | } |
2478 | |
2479 | worklist_.Add(phi); |
2480 | in_worklist_->Add(phi->ssa_temp_index()); |
2481 | |
2482 | for (intptr_t i = 0; i < worklist_.length(); i++) { |
2483 | PhiInstr* phi = worklist_[i]; |
2484 | |
2485 | for (intptr_t i = 0; i < phi->InputCount(); i++) { |
2486 | Definition* input = phi->InputAt(i)->definition(); |
2487 | if (input == phi) continue; |
2488 | |
2489 | PhiInstr* phi_input = input->AsPhi(); |
2490 | if ((phi_input != NULL) && !phi_input->is_alive()) { |
2491 | if (!in_worklist_->Contains(phi_input->ssa_temp_index())) { |
2492 | worklist_.Add(phi_input); |
2493 | in_worklist_->Add(phi_input->ssa_temp_index()); |
2494 | } |
2495 | continue; |
2496 | } |
2497 | |
2498 | if (value == NULL) { |
2499 | value = input; |
2500 | } else if (value != input) { |
2501 | return false; // This phi is not redundant. |
2502 | } |
2503 | } |
2504 | } |
2505 | |
2506 | // All phis in the worklist are redundant and have the same computed |
2507 | // value on all code paths. |
2508 | ASSERT(value != NULL); |
2509 | for (intptr_t i = 0; i < worklist_.length(); i++) { |
2510 | worklist_[i]->ReplaceUsesWith(value); |
2511 | } |
2512 | |
2513 | return true; |
2514 | } |
2515 | |
2516 | // Returns true if definitions are congruent assuming their inputs |
2517 | // are congruent. |
2518 | bool CanBeCongruent(Definition* a, Definition* b) { |
2519 | return (a->tag() == b->tag()) && |
2520 | ((a->IsPhi() && (a->GetBlock() == b->GetBlock())) || |
2521 | (a->AllowsCSE() && a->AttributesEqual(b))); |
2522 | } |
2523 | |
2524 | // Given two definitions check if they are congruent under assumption that |
2525 | // their inputs will be proven congruent. If they are - add them to the |
2526 | // worklist to check their inputs' congruency. |
2527 | // Returns true if pair was added to the worklist or is already in the |
2528 | // worklist and false if a and b are not congruent. |
2529 | bool AddPairToCongruencyWorklist(Definition* a, Definition* b) { |
2530 | if (!CanBeCongruent(a, b)) { |
2531 | return false; |
2532 | } |
2533 | |
2534 | // If a is already in the worklist check if it is being compared to b. |
2535 | // Give up if it is not. |
2536 | if (in_worklist_->Contains(a->ssa_temp_index())) { |
2537 | for (intptr_t i = 0; i < congruency_worklist_.length(); i += 2) { |
2538 | if (a == congruency_worklist_[i]) { |
2539 | return (b == congruency_worklist_[i + 1]); |
2540 | } |
2541 | } |
2542 | UNREACHABLE(); |
2543 | } else if (in_worklist_->Contains(b->ssa_temp_index())) { |
2544 | return AddPairToCongruencyWorklist(b, a); |
2545 | } |
2546 | |
2547 | congruency_worklist_.Add(a); |
2548 | congruency_worklist_.Add(b); |
2549 | in_worklist_->Add(a->ssa_temp_index()); |
2550 | return true; |
2551 | } |
2552 | |
2553 | bool AreInputsCongruent(Definition* a, Definition* b) { |
2554 | ASSERT(a->tag() == b->tag()); |
2555 | ASSERT(a->InputCount() == b->InputCount()); |
2556 | for (intptr_t j = 0; j < a->InputCount(); j++) { |
2557 | Definition* inputA = a->InputAt(j)->definition(); |
2558 | Definition* inputB = b->InputAt(j)->definition(); |
2559 | |
2560 | if (inputA != inputB) { |
2561 | if (!AddPairToCongruencyWorklist(inputA, inputB)) { |
2562 | return false; |
2563 | } |
2564 | } |
2565 | } |
2566 | return true; |
2567 | } |
2568 | |
2569 | // Returns true if instruction dom dominates instruction other. |
2570 | static bool Dominates(Instruction* dom, Instruction* other) { |
2571 | BlockEntryInstr* dom_block = dom->GetBlock(); |
2572 | BlockEntryInstr* other_block = other->GetBlock(); |
2573 | |
2574 | if (dom_block == other_block) { |
2575 | for (Instruction* current = dom->next(); current != NULL; |
2576 | current = current->next()) { |
2577 | if (current == other) { |
2578 | return true; |
2579 | } |
2580 | } |
2581 | return false; |
2582 | } |
2583 | |
2584 | return dom_block->Dominates(other_block); |
2585 | } |
2586 | |
2587 | // Replace the given phi with another if they are congruent. |
2588 | // Returns true if succeeds. |
2589 | bool ReplacePhiWith(PhiInstr* phi, PhiInstr* replacement) { |
2590 | ASSERT(phi->InputCount() == replacement->InputCount()); |
2591 | ASSERT(phi->block() == replacement->block()); |
2592 | |
2593 | congruency_worklist_.Clear(); |
2594 | if (in_worklist_ == NULL) { |
2595 | in_worklist_ = new (Z) BitVector(Z, graph_->current_ssa_temp_index()); |
2596 | } else { |
2597 | in_worklist_->Clear(); |
2598 | } |
2599 | |
2600 | // During the comparison worklist contains pairs of definitions to be |
2601 | // compared. |
2602 | if (!AddPairToCongruencyWorklist(phi, replacement)) { |
2603 | return false; |
2604 | } |
2605 | |
2606 | // Process the worklist. It might grow during each comparison step. |
2607 | for (intptr_t i = 0; i < congruency_worklist_.length(); i += 2) { |
2608 | if (!AreInputsCongruent(congruency_worklist_[i], |
2609 | congruency_worklist_[i + 1])) { |
2610 | return false; |
2611 | } |
2612 | } |
2613 | |
2614 | // At this point worklist contains pairs of congruent definitions. |
2615 | // Replace the one member of the pair with another maintaining proper |
2616 | // domination relation between definitions and uses. |
2617 | for (intptr_t i = 0; i < congruency_worklist_.length(); i += 2) { |
2618 | Definition* a = congruency_worklist_[i]; |
2619 | Definition* b = congruency_worklist_[i + 1]; |
2620 | |
2621 | // If these definitions are not phis then we need to pick up one |
2622 | // that dominates another as the replacement: if a dominates b swap them. |
2623 | // Note: both a and b are used as a phi input at the same block B which |
2624 | // means a dominates B and b dominates B, which guarantees that either |
2625 | // a dominates b or b dominates a. |
2626 | if (!a->IsPhi()) { |
2627 | if (Dominates(a, b)) { |
2628 | Definition* t = a; |
2629 | a = b; |
2630 | b = t; |
2631 | } |
2632 | ASSERT(Dominates(b, a)); |
2633 | } |
2634 | |
2635 | if (FLAG_support_il_printer && FLAG_trace_load_optimization) { |
2636 | THR_Print("Replacing %s with congruent %s\n" , a->ToCString(), |
2637 | b->ToCString()); |
2638 | } |
2639 | |
2640 | a->ReplaceUsesWith(b); |
2641 | if (a->IsPhi()) { |
2642 | // We might be replacing a phi introduced by the load forwarding |
2643 | // that is not inserted in the graph yet. |
2644 | ASSERT(b->IsPhi()); |
2645 | PhiInstr* phi_a = a->AsPhi(); |
2646 | if (phi_a->is_alive()) { |
2647 | phi_a->mark_dead(); |
2648 | phi_a->block()->RemovePhi(phi_a); |
2649 | phi_a->UnuseAllInputs(); |
2650 | } |
2651 | } else { |
2652 | a->RemoveFromGraph(); |
2653 | } |
2654 | } |
2655 | |
2656 | return true; |
2657 | } |
2658 | |
2659 | // Insert the given phi into the graph. Attempt to find an equal one in the |
2660 | // target block first. |
2661 | // Returns true if the phi was inserted and false if it was replaced. |
2662 | bool EmitPhi(PhiInstr* phi) { |
2663 | for (PhiIterator it(phi->block()); !it.Done(); it.Advance()) { |
2664 | if (ReplacePhiWith(phi, it.Current())) { |
2665 | return false; |
2666 | } |
2667 | } |
2668 | |
2669 | phi->mark_alive(); |
2670 | phi->block()->InsertPhi(phi); |
2671 | return true; |
2672 | } |
2673 | |
2674 | // Phis have not yet been inserted into the graph but they have uses of |
2675 | // their inputs. Insert the non-redundant ones and clear the input uses |
2676 | // of the redundant ones. |
2677 | void EmitPhis() { |
2678 | // First eliminate all redundant phis. |
2679 | for (intptr_t i = 0; i < phis_.length(); i++) { |
2680 | PhiInstr* phi = phis_[i]; |
2681 | if (!phi->HasUses() || EliminateRedundantPhi(phi)) { |
2682 | phi->UnuseAllInputs(); |
2683 | phis_[i] = NULL; |
2684 | } |
2685 | } |
2686 | |
2687 | // Now emit phis or replace them with equal phis already present in the |
2688 | // graph. |
2689 | for (intptr_t i = 0; i < phis_.length(); i++) { |
2690 | PhiInstr* phi = phis_[i]; |
2691 | if ((phi != NULL) && (!phi->HasUses() || !EmitPhi(phi))) { |
2692 | phi->UnuseAllInputs(); |
2693 | } |
2694 | } |
2695 | } |
2696 | |
2697 | ZoneGrowableArray<Definition*>* CreateBlockOutValues() { |
2698 | ZoneGrowableArray<Definition*>* out = |
2699 | new (Z) ZoneGrowableArray<Definition*>(aliased_set_->max_place_id()); |
2700 | for (intptr_t i = 0; i < aliased_set_->max_place_id(); i++) { |
2701 | out->Add(NULL); |
2702 | } |
2703 | return out; |
2704 | } |
2705 | |
2706 | FlowGraph* graph_; |
2707 | DirectChainedHashMap<PointerKeyValueTrait<Place> >* map_; |
2708 | |
2709 | // Mapping between field offsets in words and expression ids of loads from |
2710 | // that offset. |
2711 | AliasedSet* aliased_set_; |
2712 | |
2713 | // Per block sets of expression ids for loads that are: incoming (available |
2714 | // on the entry), outgoing (available on the exit), generated and killed. |
2715 | GrowableArray<BitVector*> in_; |
2716 | GrowableArray<BitVector*> out_; |
2717 | GrowableArray<BitVector*> gen_; |
2718 | GrowableArray<BitVector*> kill_; |
2719 | |
2720 | // Per block list of upwards exposed loads. |
2721 | GrowableArray<ZoneGrowableArray<Definition*>*> exposed_values_; |
2722 | |
2723 | // Per block mappings between expression ids and outgoing definitions that |
2724 | // represent those ids. |
2725 | GrowableArray<ZoneGrowableArray<Definition*>*> out_values_; |
2726 | |
2727 | // List of phis generated during ComputeOutValues and ForwardLoads. |
2728 | // Some of these phis might be redundant and thus a separate pass is |
2729 | // needed to emit only non-redundant ones. |
2730 | GrowableArray<PhiInstr*> phis_; |
2731 | |
2732 | // Auxiliary worklist used by redundant phi elimination. |
2733 | GrowableArray<PhiInstr*> worklist_; |
2734 | GrowableArray<Definition*> congruency_worklist_; |
2735 | BitVector* in_worklist_; |
2736 | |
2737 | // True if any load was eliminated. |
2738 | bool forwarded_; |
2739 | |
2740 | DISALLOW_COPY_AND_ASSIGN(LoadOptimizer); |
2741 | }; |
2742 | |
2743 | bool DominatorBasedCSE::Optimize(FlowGraph* graph) { |
2744 | bool changed = false; |
2745 | if (FLAG_load_cse) { |
2746 | changed = LoadOptimizer::OptimizeGraph(graph) || changed; |
2747 | } |
2748 | |
2749 | CSEInstructionMap map; |
2750 | changed = OptimizeRecursive(graph, graph->graph_entry(), &map) || changed; |
2751 | |
2752 | return changed; |
2753 | } |
2754 | |
2755 | bool DominatorBasedCSE::OptimizeRecursive(FlowGraph* graph, |
2756 | BlockEntryInstr* block, |
2757 | CSEInstructionMap* map) { |
2758 | bool changed = false; |
2759 | for (ForwardInstructionIterator it(block); !it.Done(); it.Advance()) { |
2760 | Instruction* current = it.Current(); |
2761 | if (current->AllowsCSE()) { |
2762 | Instruction* replacement = map->Lookup(current); |
2763 | |
2764 | if (replacement != NULL) { |
2765 | // Replace current with lookup result. |
2766 | ASSERT(replacement->AllowsCSE()); |
2767 | graph->ReplaceCurrentInstruction(&it, current, replacement); |
2768 | changed = true; |
2769 | continue; |
2770 | } |
2771 | |
2772 | map->Insert(current); |
2773 | } |
2774 | } |
2775 | |
2776 | // Process children in the dominator tree recursively. |
2777 | intptr_t num_children = block->dominated_blocks().length(); |
2778 | if (num_children != 0) { |
2779 | graph->thread()->CheckForSafepoint(); |
2780 | } |
2781 | for (intptr_t i = 0; i < num_children; ++i) { |
2782 | BlockEntryInstr* child = block->dominated_blocks()[i]; |
2783 | if (i < num_children - 1) { |
2784 | // Copy map. |
2785 | CSEInstructionMap child_map(*map); |
2786 | changed = OptimizeRecursive(graph, child, &child_map) || changed; |
2787 | } else { |
2788 | // Reuse map for the last child. |
2789 | changed = OptimizeRecursive(graph, child, map) || changed; |
2790 | } |
2791 | } |
2792 | return changed; |
2793 | } |
2794 | |
2795 | class StoreOptimizer : public LivenessAnalysis { |
2796 | public: |
2797 | StoreOptimizer(FlowGraph* graph, |
2798 | AliasedSet* aliased_set, |
2799 | DirectChainedHashMap<PointerKeyValueTrait<Place> >* map) |
2800 | : LivenessAnalysis(aliased_set->max_place_id(), graph->postorder()), |
2801 | graph_(graph), |
2802 | map_(map), |
2803 | aliased_set_(aliased_set), |
2804 | exposed_stores_(graph_->postorder().length()) { |
2805 | const intptr_t num_blocks = graph_->postorder().length(); |
2806 | for (intptr_t i = 0; i < num_blocks; i++) { |
2807 | exposed_stores_.Add(NULL); |
2808 | } |
2809 | } |
2810 | |
2811 | static void OptimizeGraph(FlowGraph* graph) { |
2812 | ASSERT(FLAG_load_cse); |
2813 | |
2814 | // For now, bail out for large functions to avoid OOM situations. |
2815 | // TODO(fschneider): Fix the memory consumption issue. |
2816 | intptr_t function_length = graph->function().end_token_pos().Pos() - |
2817 | graph->function().token_pos().Pos(); |
2818 | if (function_length >= FLAG_huge_method_cutoff_in_tokens) { |
2819 | return; |
2820 | } |
2821 | |
2822 | DirectChainedHashMap<PointerKeyValueTrait<Place> > map; |
2823 | AliasedSet* aliased_set = NumberPlaces(graph, &map, kOptimizeStores); |
2824 | if ((aliased_set != NULL) && !aliased_set->IsEmpty()) { |
2825 | StoreOptimizer store_optimizer(graph, aliased_set, &map); |
2826 | store_optimizer.Optimize(); |
2827 | } |
2828 | } |
2829 | |
2830 | private: |
2831 | void Optimize() { |
2832 | Analyze(); |
2833 | if (FLAG_trace_load_optimization) { |
2834 | Dump(); |
2835 | } |
2836 | EliminateDeadStores(); |
2837 | } |
2838 | |
2839 | bool CanEliminateStore(Instruction* instr) { |
2840 | switch (instr->tag()) { |
2841 | case Instruction::kStoreInstanceField: { |
2842 | StoreInstanceFieldInstr* store_instance = instr->AsStoreInstanceField(); |
2843 | // Can't eliminate stores that initialize fields. |
2844 | return !store_instance->is_initialization(); |
2845 | } |
2846 | case Instruction::kStoreIndexed: |
2847 | case Instruction::kStoreStaticField: |
2848 | return true; |
2849 | default: |
2850 | UNREACHABLE(); |
2851 | return false; |
2852 | } |
2853 | } |
2854 | |
2855 | virtual void ComputeInitialSets() { |
2856 | Zone* zone = graph_->zone(); |
2857 | BitVector* all_places = |
2858 | new (zone) BitVector(zone, aliased_set_->max_place_id()); |
2859 | all_places->SetAll(); |
2860 | for (BlockIterator block_it = graph_->postorder_iterator(); |
2861 | !block_it.Done(); block_it.Advance()) { |
2862 | BlockEntryInstr* block = block_it.Current(); |
2863 | const intptr_t postorder_number = block->postorder_number(); |
2864 | |
2865 | BitVector* kill = kill_[postorder_number]; |
2866 | BitVector* live_in = live_in_[postorder_number]; |
2867 | BitVector* live_out = live_out_[postorder_number]; |
2868 | |
2869 | ZoneGrowableArray<Instruction*>* exposed_stores = NULL; |
2870 | |
2871 | // Iterate backwards starting at the last instruction. |
2872 | for (BackwardInstructionIterator instr_it(block); !instr_it.Done(); |
2873 | instr_it.Advance()) { |
2874 | Instruction* instr = instr_it.Current(); |
2875 | |
2876 | bool is_load = false; |
2877 | bool is_store = false; |
2878 | Place place(instr, &is_load, &is_store); |
2879 | if (place.IsImmutableField()) { |
2880 | // Loads/stores of final fields do not participate. |
2881 | continue; |
2882 | } |
2883 | |
2884 | // Handle stores. |
2885 | if (is_store) { |
2886 | if (kill->Contains(GetPlaceId(instr))) { |
2887 | if (!live_in->Contains(GetPlaceId(instr)) && |
2888 | CanEliminateStore(instr)) { |
2889 | if (FLAG_trace_optimization) { |
2890 | THR_Print("Removing dead store to place %" Pd " in block B%" Pd |
2891 | "\n" , |
2892 | GetPlaceId(instr), block->block_id()); |
2893 | } |
2894 | instr_it.RemoveCurrentFromGraph(); |
2895 | } |
2896 | } else if (!live_in->Contains(GetPlaceId(instr))) { |
2897 | // Mark this store as down-ward exposed: They are the only |
2898 | // candidates for the global store elimination. |
2899 | if (exposed_stores == NULL) { |
2900 | const intptr_t kMaxExposedStoresInitialSize = 5; |
2901 | exposed_stores = new (zone) ZoneGrowableArray<Instruction*>( |
2902 | Utils::Minimum(kMaxExposedStoresInitialSize, |
2903 | aliased_set_->max_place_id())); |
2904 | } |
2905 | exposed_stores->Add(instr); |
2906 | } |
2907 | // Interfering stores kill only loads from the same place. |
2908 | kill->Add(GetPlaceId(instr)); |
2909 | live_in->Remove(GetPlaceId(instr)); |
2910 | continue; |
2911 | } |
2912 | |
2913 | // Handle side effects, deoptimization and function return. |
2914 | if (instr->HasUnknownSideEffects() || instr->CanDeoptimize() || |
2915 | instr->MayThrow() || instr->IsReturn()) { |
2916 | // Instructions that return from the function, instructions with side |
2917 | // effects and instructions that can deoptimize are considered as |
2918 | // loads from all places. |
2919 | live_in->CopyFrom(all_places); |
2920 | if (instr->IsThrow() || instr->IsReThrow() || instr->IsReturn()) { |
2921 | // Initialize live-out for exit blocks since it won't be computed |
2922 | // otherwise during the fixed point iteration. |
2923 | live_out->CopyFrom(all_places); |
2924 | } |
2925 | continue; |
2926 | } |
2927 | |
2928 | // Handle loads. |
2929 | Definition* defn = instr->AsDefinition(); |
2930 | if ((defn != NULL) && IsLoadEliminationCandidate(defn)) { |
2931 | const intptr_t alias = aliased_set_->LookupAliasId(place.ToAlias()); |
2932 | live_in->AddAll(aliased_set_->GetKilledSet(alias)); |
2933 | continue; |
2934 | } |
2935 | } |
2936 | exposed_stores_[postorder_number] = exposed_stores; |
2937 | } |
2938 | if (FLAG_trace_load_optimization) { |
2939 | Dump(); |
2940 | THR_Print("---\n" ); |
2941 | } |
2942 | } |
2943 | |
2944 | void EliminateDeadStores() { |
2945 | // Iteration order does not matter here. |
2946 | for (BlockIterator block_it = graph_->postorder_iterator(); |
2947 | !block_it.Done(); block_it.Advance()) { |
2948 | BlockEntryInstr* block = block_it.Current(); |
2949 | const intptr_t postorder_number = block->postorder_number(); |
2950 | |
2951 | BitVector* live_out = live_out_[postorder_number]; |
2952 | |
2953 | ZoneGrowableArray<Instruction*>* exposed_stores = |
2954 | exposed_stores_[postorder_number]; |
2955 | if (exposed_stores == NULL) continue; // No exposed stores. |
2956 | |
2957 | // Iterate over candidate stores. |
2958 | for (intptr_t i = 0; i < exposed_stores->length(); ++i) { |
2959 | Instruction* instr = (*exposed_stores)[i]; |
2960 | bool is_load = false; |
2961 | bool is_store = false; |
2962 | Place place(instr, &is_load, &is_store); |
2963 | ASSERT(!is_load && is_store); |
2964 | if (place.IsImmutableField()) { |
2965 | // Final field do not participate in dead store elimination. |
2966 | continue; |
2967 | } |
2968 | // Eliminate a downward exposed store if the corresponding place is not |
2969 | // in live-out. |
2970 | if (!live_out->Contains(GetPlaceId(instr)) && |
2971 | CanEliminateStore(instr)) { |
2972 | if (FLAG_trace_optimization) { |
2973 | THR_Print("Removing dead store to place %" Pd " block B%" Pd "\n" , |
2974 | GetPlaceId(instr), block->block_id()); |
2975 | } |
2976 | instr->RemoveFromGraph(/* ignored */ false); |
2977 | } |
2978 | } |
2979 | } |
2980 | } |
2981 | |
2982 | FlowGraph* graph_; |
2983 | DirectChainedHashMap<PointerKeyValueTrait<Place> >* map_; |
2984 | |
2985 | // Mapping between field offsets in words and expression ids of loads from |
2986 | // that offset. |
2987 | AliasedSet* aliased_set_; |
2988 | |
2989 | // Per block list of downward exposed stores. |
2990 | GrowableArray<ZoneGrowableArray<Instruction*>*> exposed_stores_; |
2991 | |
2992 | DISALLOW_COPY_AND_ASSIGN(StoreOptimizer); |
2993 | }; |
2994 | |
2995 | void DeadStoreElimination::Optimize(FlowGraph* graph) { |
2996 | if (FLAG_dead_store_elimination) { |
2997 | StoreOptimizer::OptimizeGraph(graph); |
2998 | } |
2999 | } |
3000 | |
3001 | // |
3002 | // Allocation Sinking |
3003 | // |
3004 | |
3005 | // Returns true if the given instruction is an allocation that |
3006 | // can be sunk by the Allocation Sinking pass. |
3007 | static bool IsSupportedAllocation(Instruction* instr) { |
3008 | return instr->IsAllocateObject() || instr->IsAllocateUninitializedContext(); |
3009 | } |
3010 | |
3011 | enum SafeUseCheck { kOptimisticCheck, kStrictCheck }; |
3012 | |
3013 | // Check if the use is safe for allocation sinking. Allocation sinking |
3014 | // candidates can only be used at store instructions: |
3015 | // |
3016 | // - any store into the allocation candidate itself is unconditionally safe |
3017 | // as it just changes the rematerialization state of this candidate; |
3018 | // - store into another object is only safe if another object is allocation |
3019 | // candidate. |
3020 | // |
3021 | // We use a simple fix-point algorithm to discover the set of valid candidates |
3022 | // (see CollectCandidates method), that's why this IsSafeUse can operate in two |
3023 | // modes: |
3024 | // |
3025 | // - optimistic, when every allocation is assumed to be an allocation |
3026 | // sinking candidate; |
3027 | // - strict, when only marked allocations are assumed to be allocation |
3028 | // sinking candidates. |
3029 | // |
3030 | // Fix-point algorithm in CollectCandiates first collects a set of allocations |
3031 | // optimistically and then checks each collected candidate strictly and unmarks |
3032 | // invalid candidates transitively until only strictly valid ones remain. |
3033 | static bool IsSafeUse(Value* use, SafeUseCheck check_type) { |
3034 | if (use->instruction()->IsMaterializeObject()) { |
3035 | return true; |
3036 | } |
3037 | |
3038 | StoreInstanceFieldInstr* store = use->instruction()->AsStoreInstanceField(); |
3039 | if (store != NULL) { |
3040 | if (use == store->value()) { |
3041 | Definition* instance = store->instance()->definition(); |
3042 | return IsSupportedAllocation(instance) && |
3043 | ((check_type == kOptimisticCheck) || |
3044 | instance->Identity().IsAllocationSinkingCandidate()); |
3045 | } |
3046 | return true; |
3047 | } |
3048 | |
3049 | return false; |
3050 | } |
3051 | |
3052 | // Right now we are attempting to sink allocation only into |
3053 | // deoptimization exit. So candidate should only be used in StoreInstanceField |
3054 | // instructions that write into fields of the allocated object. |
3055 | static bool IsAllocationSinkingCandidate(Definition* alloc, |
3056 | SafeUseCheck check_type) { |
3057 | for (Value* use = alloc->input_use_list(); use != NULL; |
3058 | use = use->next_use()) { |
3059 | if (!IsSafeUse(use, check_type)) { |
3060 | if (FLAG_support_il_printer && FLAG_trace_optimization) { |
3061 | THR_Print("use of %s at %s is unsafe for allocation sinking\n" , |
3062 | alloc->ToCString(), use->instruction()->ToCString()); |
3063 | } |
3064 | return false; |
3065 | } |
3066 | } |
3067 | |
3068 | return true; |
3069 | } |
3070 | |
3071 | // If the given use is a store into an object then return an object we are |
3072 | // storing into. |
3073 | static Definition* StoreInto(Value* use) { |
3074 | StoreInstanceFieldInstr* store = use->instruction()->AsStoreInstanceField(); |
3075 | if (store != NULL) { |
3076 | return store->instance()->definition(); |
3077 | } |
3078 | |
3079 | return NULL; |
3080 | } |
3081 | |
3082 | // Remove the given allocation from the graph. It is not observable. |
3083 | // If deoptimization occurs the object will be materialized. |
3084 | void AllocationSinking::EliminateAllocation(Definition* alloc) { |
3085 | ASSERT(IsAllocationSinkingCandidate(alloc, kStrictCheck)); |
3086 | |
3087 | if (FLAG_trace_optimization) { |
3088 | THR_Print("removing allocation from the graph: v%" Pd "\n" , |
3089 | alloc->ssa_temp_index()); |
3090 | } |
3091 | |
3092 | // As an allocation sinking candidate it is only used in stores to its own |
3093 | // fields. Remove these stores. |
3094 | for (Value* use = alloc->input_use_list(); use != NULL; |
3095 | use = alloc->input_use_list()) { |
3096 | use->instruction()->RemoveFromGraph(); |
3097 | } |
3098 | |
3099 | // There should be no environment uses. The pass replaced them with |
3100 | // MaterializeObject instructions. |
3101 | #ifdef DEBUG |
3102 | for (Value* use = alloc->env_use_list(); use != NULL; use = use->next_use()) { |
3103 | ASSERT(use->instruction()->IsMaterializeObject()); |
3104 | } |
3105 | #endif |
3106 | ASSERT(alloc->input_use_list() == NULL); |
3107 | alloc->RemoveFromGraph(); |
3108 | if (alloc->ArgumentCount() > 0) { |
3109 | ASSERT(alloc->ArgumentCount() == 1); |
3110 | ASSERT(!alloc->HasPushArguments()); |
3111 | } |
3112 | } |
3113 | |
3114 | // Find allocation instructions that can be potentially eliminated and |
3115 | // rematerialized at deoptimization exits if needed. See IsSafeUse |
3116 | // for the description of algorithm used below. |
3117 | void AllocationSinking::CollectCandidates() { |
3118 | // Optimistically collect all potential candidates. |
3119 | for (BlockIterator block_it = flow_graph_->reverse_postorder_iterator(); |
3120 | !block_it.Done(); block_it.Advance()) { |
3121 | BlockEntryInstr* block = block_it.Current(); |
3122 | for (ForwardInstructionIterator it(block); !it.Done(); it.Advance()) { |
3123 | Instruction* current = it.Current(); |
3124 | if (!IsSupportedAllocation(current)) { |
3125 | continue; |
3126 | } |
3127 | |
3128 | Definition* alloc = current->Cast<Definition>(); |
3129 | if (IsAllocationSinkingCandidate(alloc, kOptimisticCheck)) { |
3130 | alloc->SetIdentity(AliasIdentity::AllocationSinkingCandidate()); |
3131 | candidates_.Add(alloc); |
3132 | } |
3133 | } |
3134 | } |
3135 | |
3136 | // Transitively unmark all candidates that are not strictly valid. |
3137 | bool changed; |
3138 | do { |
3139 | changed = false; |
3140 | for (intptr_t i = 0; i < candidates_.length(); i++) { |
3141 | Definition* alloc = candidates_[i]; |
3142 | if (alloc->Identity().IsAllocationSinkingCandidate()) { |
3143 | if (!IsAllocationSinkingCandidate(alloc, kStrictCheck)) { |
3144 | alloc->SetIdentity(AliasIdentity::Unknown()); |
3145 | changed = true; |
3146 | } |
3147 | } |
3148 | } |
3149 | } while (changed); |
3150 | |
3151 | // Shrink the list of candidates removing all unmarked ones. |
3152 | intptr_t j = 0; |
3153 | for (intptr_t i = 0; i < candidates_.length(); i++) { |
3154 | Definition* alloc = candidates_[i]; |
3155 | if (alloc->Identity().IsAllocationSinkingCandidate()) { |
3156 | if (FLAG_trace_optimization) { |
3157 | THR_Print("discovered allocation sinking candidate: v%" Pd "\n" , |
3158 | alloc->ssa_temp_index()); |
3159 | } |
3160 | |
3161 | if (j != i) { |
3162 | candidates_[j] = alloc; |
3163 | } |
3164 | j++; |
3165 | } |
3166 | } |
3167 | candidates_.TruncateTo(j); |
3168 | } |
3169 | |
3170 | // If materialization references an allocation sinking candidate then replace |
3171 | // this reference with a materialization which should have been computed for |
3172 | // this side-exit. CollectAllExits should have collected this exit. |
3173 | void AllocationSinking::NormalizeMaterializations() { |
3174 | for (intptr_t i = 0; i < candidates_.length(); i++) { |
3175 | Definition* alloc = candidates_[i]; |
3176 | |
3177 | Value* next_use; |
3178 | for (Value* use = alloc->input_use_list(); use != NULL; use = next_use) { |
3179 | next_use = use->next_use(); |
3180 | if (use->instruction()->IsMaterializeObject()) { |
3181 | use->BindTo(MaterializationFor(alloc, use->instruction())); |
3182 | } |
3183 | } |
3184 | } |
3185 | } |
3186 | |
3187 | // We transitively insert materializations at each deoptimization exit that |
3188 | // might see the given allocation (see ExitsCollector). Some of this |
3189 | // materializations are not actually used and some fail to compute because |
3190 | // they are inserted in the block that is not dominated by the allocation. |
3191 | // Remove them unused materializations from the graph. |
3192 | void AllocationSinking::RemoveUnusedMaterializations() { |
3193 | intptr_t j = 0; |
3194 | for (intptr_t i = 0; i < materializations_.length(); i++) { |
3195 | MaterializeObjectInstr* mat = materializations_[i]; |
3196 | if ((mat->input_use_list() == NULL) && (mat->env_use_list() == NULL)) { |
3197 | // Check if this materialization failed to compute and remove any |
3198 | // unforwarded loads. There were no loads from any allocation sinking |
3199 | // candidate in the beginning so it is safe to assume that any encountered |
3200 | // load was inserted by CreateMaterializationAt. |
3201 | for (intptr_t i = 0; i < mat->InputCount(); i++) { |
3202 | LoadFieldInstr* load = mat->InputAt(i)->definition()->AsLoadField(); |
3203 | if ((load != NULL) && |
3204 | (load->instance()->definition() == mat->allocation())) { |
3205 | load->ReplaceUsesWith(flow_graph_->constant_null()); |
3206 | load->RemoveFromGraph(); |
3207 | } |
3208 | } |
3209 | mat->RemoveFromGraph(); |
3210 | } else { |
3211 | if (j != i) { |
3212 | materializations_[j] = mat; |
3213 | } |
3214 | j++; |
3215 | } |
3216 | } |
3217 | materializations_.TruncateTo(j); |
3218 | } |
3219 | |
3220 | // Some candidates might stop being eligible for allocation sinking after |
3221 | // the load forwarding because they flow into phis that load forwarding |
3222 | // inserts. Discover such allocations and remove them from the list |
3223 | // of allocation sinking candidates undoing all changes that we did |
3224 | // in preparation for sinking these allocations. |
3225 | void AllocationSinking::DiscoverFailedCandidates() { |
3226 | // Transitively unmark all candidates that are not strictly valid. |
3227 | bool changed; |
3228 | do { |
3229 | changed = false; |
3230 | for (intptr_t i = 0; i < candidates_.length(); i++) { |
3231 | Definition* alloc = candidates_[i]; |
3232 | if (alloc->Identity().IsAllocationSinkingCandidate()) { |
3233 | if (!IsAllocationSinkingCandidate(alloc, kStrictCheck)) { |
3234 | alloc->SetIdentity(AliasIdentity::Unknown()); |
3235 | changed = true; |
3236 | } |
3237 | } |
3238 | } |
3239 | } while (changed); |
3240 | |
3241 | // Remove all failed candidates from the candidates list. |
3242 | intptr_t j = 0; |
3243 | for (intptr_t i = 0; i < candidates_.length(); i++) { |
3244 | Definition* alloc = candidates_[i]; |
3245 | if (!alloc->Identity().IsAllocationSinkingCandidate()) { |
3246 | if (FLAG_trace_optimization) { |
3247 | THR_Print("allocation v%" Pd " can't be eliminated\n" , |
3248 | alloc->ssa_temp_index()); |
3249 | } |
3250 | |
3251 | #ifdef DEBUG |
3252 | for (Value* use = alloc->env_use_list(); use != NULL; |
3253 | use = use->next_use()) { |
3254 | ASSERT(use->instruction()->IsMaterializeObject()); |
3255 | } |
3256 | #endif |
3257 | |
3258 | // All materializations will be removed from the graph. Remove inserted |
3259 | // loads first and detach materializations from allocation's environment |
3260 | // use list: we will reconstruct it when we start removing |
3261 | // materializations. |
3262 | alloc->set_env_use_list(NULL); |
3263 | for (Value* use = alloc->input_use_list(); use != NULL; |
3264 | use = use->next_use()) { |
3265 | if (use->instruction()->IsLoadField()) { |
3266 | LoadFieldInstr* load = use->instruction()->AsLoadField(); |
3267 | load->ReplaceUsesWith(flow_graph_->constant_null()); |
3268 | load->RemoveFromGraph(); |
3269 | } else { |
3270 | ASSERT(use->instruction()->IsMaterializeObject() || |
3271 | use->instruction()->IsPhi() || |
3272 | use->instruction()->IsStoreInstanceField()); |
3273 | } |
3274 | } |
3275 | } else { |
3276 | if (j != i) { |
3277 | candidates_[j] = alloc; |
3278 | } |
3279 | j++; |
3280 | } |
3281 | } |
3282 | |
3283 | if (j != candidates_.length()) { // Something was removed from candidates. |
3284 | intptr_t k = 0; |
3285 | for (intptr_t i = 0; i < materializations_.length(); i++) { |
3286 | MaterializeObjectInstr* mat = materializations_[i]; |
3287 | if (!mat->allocation()->Identity().IsAllocationSinkingCandidate()) { |
3288 | // Restore environment uses of the allocation that were replaced |
3289 | // by this materialization and drop materialization. |
3290 | mat->ReplaceUsesWith(mat->allocation()); |
3291 | mat->RemoveFromGraph(); |
3292 | } else { |
3293 | if (k != i) { |
3294 | materializations_[k] = mat; |
3295 | } |
3296 | k++; |
3297 | } |
3298 | } |
3299 | materializations_.TruncateTo(k); |
3300 | } |
3301 | |
3302 | candidates_.TruncateTo(j); |
3303 | } |
3304 | |
3305 | void AllocationSinking::Optimize() { |
3306 | CollectCandidates(); |
3307 | |
3308 | // Insert MaterializeObject instructions that will describe the state of the |
3309 | // object at all deoptimization points. Each inserted materialization looks |
3310 | // like this (where v_0 is allocation that we are going to eliminate): |
3311 | // v_1 <- LoadField(v_0, field_1) |
3312 | // ... |
3313 | // v_N <- LoadField(v_0, field_N) |
3314 | // v_{N+1} <- MaterializeObject(field_1 = v_1, ..., field_N = v_{N}) |
3315 | for (intptr_t i = 0; i < candidates_.length(); i++) { |
3316 | InsertMaterializations(candidates_[i]); |
3317 | } |
3318 | |
3319 | // Run load forwarding to eliminate LoadField instructions inserted above. |
3320 | // All loads will be successfully eliminated because: |
3321 | // a) they use fields (not offsets) and thus provide precise aliasing |
3322 | // information |
3323 | // b) candidate does not escape and thus its fields is not affected by |
3324 | // external effects from calls. |
3325 | LoadOptimizer::OptimizeGraph(flow_graph_); |
3326 | |
3327 | NormalizeMaterializations(); |
3328 | |
3329 | RemoveUnusedMaterializations(); |
3330 | |
3331 | // If any candidates are no longer eligible for allocation sinking abort |
3332 | // the optimization for them and undo any changes we did in preparation. |
3333 | DiscoverFailedCandidates(); |
3334 | |
3335 | // At this point we have computed the state of object at each deoptimization |
3336 | // point and we can eliminate it. Loads inserted above were forwarded so there |
3337 | // are no uses of the allocation just as in the begging of the pass. |
3338 | for (intptr_t i = 0; i < candidates_.length(); i++) { |
3339 | EliminateAllocation(candidates_[i]); |
3340 | } |
3341 | |
3342 | // Process materializations and unbox their arguments: materializations |
3343 | // are part of the environment and can materialize boxes for double/mint/simd |
3344 | // values when needed. |
3345 | // TODO(vegorov): handle all box types here. |
3346 | for (intptr_t i = 0; i < materializations_.length(); i++) { |
3347 | MaterializeObjectInstr* mat = materializations_[i]; |
3348 | for (intptr_t j = 0; j < mat->InputCount(); j++) { |
3349 | Definition* defn = mat->InputAt(j)->definition(); |
3350 | if (defn->IsBox()) { |
3351 | mat->InputAt(j)->BindTo(defn->InputAt(0)->definition()); |
3352 | } |
3353 | } |
3354 | } |
3355 | } |
3356 | |
3357 | // Remove materializations from the graph. Register allocator will treat them |
3358 | // as part of the environment not as a real instruction. |
3359 | void AllocationSinking::DetachMaterializations() { |
3360 | for (intptr_t i = 0; i < materializations_.length(); i++) { |
3361 | materializations_[i]->previous()->LinkTo(materializations_[i]->next()); |
3362 | } |
3363 | } |
3364 | |
3365 | // Add a field/offset to the list of fields if it is not yet present there. |
3366 | static bool AddSlot(ZoneGrowableArray<const Slot*>* slots, const Slot& slot) { |
3367 | for (auto s : *slots) { |
3368 | if (s == &slot) { |
3369 | return false; |
3370 | } |
3371 | } |
3372 | slots->Add(&slot); |
3373 | return true; |
3374 | } |
3375 | |
3376 | // Find deoptimization exit for the given materialization assuming that all |
3377 | // materializations are emitted right before the instruction which is a |
3378 | // deoptimization exit. |
3379 | static Instruction* ExitForMaterialization(MaterializeObjectInstr* mat) { |
3380 | while (mat->next()->IsMaterializeObject()) { |
3381 | mat = mat->next()->AsMaterializeObject(); |
3382 | } |
3383 | return mat->next(); |
3384 | } |
3385 | |
3386 | // Given the deoptimization exit find first materialization that was inserted |
3387 | // before it. |
3388 | static Instruction* FirstMaterializationAt(Instruction* exit) { |
3389 | while (exit->previous()->IsMaterializeObject()) { |
3390 | exit = exit->previous(); |
3391 | } |
3392 | return exit; |
3393 | } |
3394 | |
3395 | // Given the allocation and deoptimization exit try to find MaterializeObject |
3396 | // instruction corresponding to this allocation at this exit. |
3397 | MaterializeObjectInstr* AllocationSinking::MaterializationFor( |
3398 | Definition* alloc, |
3399 | Instruction* exit) { |
3400 | if (exit->IsMaterializeObject()) { |
3401 | exit = ExitForMaterialization(exit->AsMaterializeObject()); |
3402 | } |
3403 | |
3404 | for (MaterializeObjectInstr* mat = exit->previous()->AsMaterializeObject(); |
3405 | mat != NULL; mat = mat->previous()->AsMaterializeObject()) { |
3406 | if (mat->allocation() == alloc) { |
3407 | return mat; |
3408 | } |
3409 | } |
3410 | |
3411 | return NULL; |
3412 | } |
3413 | |
3414 | // Insert MaterializeObject instruction for the given allocation before |
3415 | // the given instruction that can deoptimize. |
3416 | void AllocationSinking::CreateMaterializationAt( |
3417 | Instruction* exit, |
3418 | Definition* alloc, |
3419 | const ZoneGrowableArray<const Slot*>& slots) { |
3420 | ZoneGrowableArray<Value*>* values = |
3421 | new (Z) ZoneGrowableArray<Value*>(slots.length()); |
3422 | |
3423 | // All loads should be inserted before the first materialization so that |
3424 | // IR follows the following pattern: loads, materializations, deoptimizing |
3425 | // instruction. |
3426 | Instruction* load_point = FirstMaterializationAt(exit); |
3427 | |
3428 | // Insert load instruction for every field. |
3429 | for (auto slot : slots) { |
3430 | LoadFieldInstr* load = |
3431 | new (Z) LoadFieldInstr(new (Z) Value(alloc), *slot, alloc->token_pos()); |
3432 | flow_graph_->InsertBefore(load_point, load, nullptr, FlowGraph::kValue); |
3433 | values->Add(new (Z) Value(load)); |
3434 | } |
3435 | |
3436 | MaterializeObjectInstr* mat = nullptr; |
3437 | if (alloc->IsAllocateObject()) { |
3438 | mat = new (Z) |
3439 | MaterializeObjectInstr(alloc->AsAllocateObject(), slots, values); |
3440 | } else { |
3441 | ASSERT(alloc->IsAllocateUninitializedContext()); |
3442 | mat = new (Z) MaterializeObjectInstr( |
3443 | alloc->AsAllocateUninitializedContext(), slots, values); |
3444 | } |
3445 | |
3446 | flow_graph_->InsertBefore(exit, mat, nullptr, FlowGraph::kValue); |
3447 | |
3448 | // Replace all mentions of this allocation with a newly inserted |
3449 | // MaterializeObject instruction. |
3450 | // We must preserve the identity: all mentions are replaced by the same |
3451 | // materialization. |
3452 | exit->ReplaceInEnvironment(alloc, mat); |
3453 | |
3454 | // Mark MaterializeObject as an environment use of this allocation. |
3455 | // This will allow us to discover it when we are looking for deoptimization |
3456 | // exits for another allocation that potentially flows into this one. |
3457 | Value* val = new (Z) Value(alloc); |
3458 | val->set_instruction(mat); |
3459 | alloc->AddEnvUse(val); |
3460 | |
3461 | // Record inserted materialization. |
3462 | materializations_.Add(mat); |
3463 | } |
3464 | |
3465 | // Add given instruction to the list of the instructions if it is not yet |
3466 | // present there. |
3467 | template <typename T> |
3468 | void AddInstruction(GrowableArray<T*>* list, T* value) { |
3469 | ASSERT(!value->IsGraphEntry() && !value->IsFunctionEntry()); |
3470 | for (intptr_t i = 0; i < list->length(); i++) { |
3471 | if ((*list)[i] == value) { |
3472 | return; |
3473 | } |
3474 | } |
3475 | list->Add(value); |
3476 | } |
3477 | |
3478 | // Transitively collect all deoptimization exits that might need this allocation |
3479 | // rematerialized. It is not enough to collect only environment uses of this |
3480 | // allocation because it can flow into other objects that will be |
3481 | // dematerialized and that are referenced by deopt environments that |
3482 | // don't contain this allocation explicitly. |
3483 | void AllocationSinking::ExitsCollector::Collect(Definition* alloc) { |
3484 | for (Value* use = alloc->env_use_list(); use != NULL; use = use->next_use()) { |
3485 | if (use->instruction()->IsMaterializeObject()) { |
3486 | AddInstruction(&exits_, ExitForMaterialization( |
3487 | use->instruction()->AsMaterializeObject())); |
3488 | } else { |
3489 | AddInstruction(&exits_, use->instruction()); |
3490 | } |
3491 | } |
3492 | |
3493 | // Check if this allocation is stored into any other allocation sinking |
3494 | // candidate and put it on worklist so that we conservatively collect all |
3495 | // exits for that candidate as well because they potentially might see |
3496 | // this object. |
3497 | for (Value* use = alloc->input_use_list(); use != NULL; |
3498 | use = use->next_use()) { |
3499 | Definition* obj = StoreInto(use); |
3500 | if ((obj != NULL) && (obj != alloc)) { |
3501 | AddInstruction(&worklist_, obj); |
3502 | } |
3503 | } |
3504 | } |
3505 | |
3506 | void AllocationSinking::ExitsCollector::CollectTransitively(Definition* alloc) { |
3507 | exits_.TruncateTo(0); |
3508 | worklist_.TruncateTo(0); |
3509 | |
3510 | worklist_.Add(alloc); |
3511 | |
3512 | // Note: worklist potentially will grow while we are iterating over it. |
3513 | // We are not removing allocations from the worklist not to waste space on |
3514 | // the side maintaining BitVector of already processed allocations: worklist |
3515 | // is expected to be very small thus linear search in it is just as efficient |
3516 | // as a bitvector. |
3517 | for (intptr_t i = 0; i < worklist_.length(); i++) { |
3518 | Collect(worklist_[i]); |
3519 | } |
3520 | } |
3521 | |
3522 | void AllocationSinking::InsertMaterializations(Definition* alloc) { |
3523 | // Collect all fields that are written for this instance. |
3524 | auto slots = new (Z) ZoneGrowableArray<const Slot*>(5); |
3525 | |
3526 | for (Value* use = alloc->input_use_list(); use != NULL; |
3527 | use = use->next_use()) { |
3528 | StoreInstanceFieldInstr* store = use->instruction()->AsStoreInstanceField(); |
3529 | if ((store != NULL) && (store->instance()->definition() == alloc)) { |
3530 | AddSlot(slots, store->slot()); |
3531 | } |
3532 | } |
3533 | |
3534 | if (auto alloc_object = alloc->AsAllocateObject()) { |
3535 | if (alloc_object->type_arguments() != nullptr) { |
3536 | AddSlot(slots, Slot::GetTypeArgumentsSlotFor(flow_graph_->thread(), |
3537 | alloc_object->cls())); |
3538 | } |
3539 | } |
3540 | |
3541 | // Collect all instructions that mention this object in the environment. |
3542 | exits_collector_.CollectTransitively(alloc); |
3543 | |
3544 | // Insert materializations at environment uses. |
3545 | for (intptr_t i = 0; i < exits_collector_.exits().length(); i++) { |
3546 | CreateMaterializationAt(exits_collector_.exits()[i], alloc, *slots); |
3547 | } |
3548 | } |
3549 | |
3550 | // TryCatchAnalyzer tries to reduce the state that needs to be synchronized |
3551 | // on entry to the catch by discovering Parameter-s which are never used |
3552 | // or which are always constant. |
3553 | // |
3554 | // This analysis is similar to dead/redundant phi elimination because |
3555 | // Parameter instructions serve as "implicit" phis. |
3556 | // |
3557 | // Caveat: when analyzing which Parameter-s are redundant we limit ourselves to |
3558 | // constant values because CatchBlockEntry-s are hanging out directly from |
3559 | // GraphEntry and thus they are only dominated by constants from GraphEntry - |
3560 | // thus we can't replace Parameter with arbitrary Definition which is not a |
3561 | // Constant even if we know that this Parameter is redundant and would always |
3562 | // evaluate to that Definition. |
3563 | class TryCatchAnalyzer : public ValueObject { |
3564 | public: |
3565 | explicit TryCatchAnalyzer(FlowGraph* flow_graph, bool is_aot) |
3566 | : flow_graph_(flow_graph), |
3567 | is_aot_(is_aot), |
3568 | // Initial capacity is selected based on trivial examples. |
3569 | worklist_(flow_graph, /*initial_capacity=*/10) {} |
3570 | |
3571 | // Run analysis and eliminate dead/redundant Parameter-s. |
3572 | void Optimize(); |
3573 | |
3574 | private: |
3575 | // In precompiled mode we can eliminate all parameters that have no real uses |
3576 | // and subsequently clear out corresponding slots in the environments assigned |
3577 | // to instructions that can throw an exception which would be caught by |
3578 | // the corresponding CatchEntryBlock. |
3579 | // |
3580 | // Computing "dead" parameters is essentially a fixed point algorithm because |
3581 | // Parameter value can flow into another Parameter via an environment attached |
3582 | // to an instruction that can throw. |
3583 | // |
3584 | // Note: this optimization pass assumes that environment values are only |
3585 | // used during catching, that is why it should only be used in AOT mode. |
3586 | void OptimizeDeadParameters() { |
3587 | ASSERT(is_aot_); |
3588 | |
3589 | NumberCatchEntryParameters(); |
3590 | ComputeIncomingValues(); |
3591 | CollectAliveParametersOrPhis(); |
3592 | PropagateLivenessToInputs(); |
3593 | EliminateDeadParameters(); |
3594 | } |
3595 | |
3596 | static intptr_t GetParameterId(const Instruction* instr) { |
3597 | return instr->GetPassSpecificId(CompilerPass::kTryCatchOptimization); |
3598 | } |
3599 | |
3600 | static void SetParameterId(Instruction* instr, intptr_t id) { |
3601 | instr->SetPassSpecificId(CompilerPass::kTryCatchOptimization, id); |
3602 | } |
3603 | |
3604 | static bool HasParameterId(Instruction* instr) { |
3605 | return instr->HasPassSpecificId(CompilerPass::kTryCatchOptimization); |
3606 | } |
3607 | |
3608 | // Assign sequential ids to each ParameterInstr in each CatchEntryBlock. |
3609 | // Collect reverse mapping from try indexes to corresponding catches. |
3610 | void NumberCatchEntryParameters() { |
3611 | for (auto catch_entry : flow_graph_->graph_entry()->catch_entries()) { |
3612 | const GrowableArray<Definition*>& idefs = |
3613 | *catch_entry->initial_definitions(); |
3614 | for (auto idef : idefs) { |
3615 | if (idef->IsParameter()) { |
3616 | SetParameterId(idef, parameter_info_.length()); |
3617 | parameter_info_.Add(new ParameterInfo(idef->AsParameter())); |
3618 | } |
3619 | } |
3620 | |
3621 | catch_by_index_.EnsureLength(catch_entry->catch_try_index() + 1, nullptr); |
3622 | catch_by_index_[catch_entry->catch_try_index()] = catch_entry; |
3623 | } |
3624 | } |
3625 | |
3626 | // Compute potential incoming values for each Parameter in each catch block |
3627 | // by looking into environments assigned to MayThrow instructions within |
3628 | // blocks covered by the corresponding catch. |
3629 | void ComputeIncomingValues() { |
3630 | for (auto block : flow_graph_->reverse_postorder()) { |
3631 | if (block->try_index() == kInvalidTryIndex) { |
3632 | continue; |
3633 | } |
3634 | |
3635 | ASSERT(block->try_index() < catch_by_index_.length()); |
3636 | auto catch_entry = catch_by_index_[block->try_index()]; |
3637 | const auto& idefs = *catch_entry->initial_definitions(); |
3638 | |
3639 | for (ForwardInstructionIterator instr_it(block); !instr_it.Done(); |
3640 | instr_it.Advance()) { |
3641 | Instruction* current = instr_it.Current(); |
3642 | if (!current->MayThrow()) continue; |
3643 | |
3644 | Environment* env = current->env()->Outermost(); |
3645 | ASSERT(env != nullptr); |
3646 | |
3647 | for (intptr_t env_idx = 0; env_idx < idefs.length(); ++env_idx) { |
3648 | if (ParameterInstr* param = idefs[env_idx]->AsParameter()) { |
3649 | Definition* defn = env->ValueAt(env_idx)->definition(); |
3650 | |
3651 | // Add defn as an incoming value to the parameter if it is not |
3652 | // already present in the list. |
3653 | bool found = false; |
3654 | for (auto other_defn : |
3655 | parameter_info_[GetParameterId(param)]->incoming) { |
3656 | if (other_defn == defn) { |
3657 | found = true; |
3658 | break; |
3659 | } |
3660 | } |
3661 | if (!found) { |
3662 | parameter_info_[GetParameterId(param)]->incoming.Add(defn); |
3663 | } |
3664 | } |
3665 | } |
3666 | } |
3667 | } |
3668 | } |
3669 | |
3670 | // Find all parameters (and phis) that are definitely alive - because they |
3671 | // have non-phi uses and place them into worklist. |
3672 | // |
3673 | // Note: phis that only have phi (and environment) uses would be marked as |
3674 | // dead. |
3675 | void CollectAliveParametersOrPhis() { |
3676 | for (auto block : flow_graph_->reverse_postorder()) { |
3677 | if (JoinEntryInstr* join = block->AsJoinEntry()) { |
3678 | if (join->phis() == nullptr) continue; |
3679 | |
3680 | for (auto phi : *join->phis()) { |
3681 | phi->mark_dead(); |
3682 | if (HasNonPhiUse(phi)) { |
3683 | MarkLive(phi); |
3684 | } |
3685 | } |
3686 | } |
3687 | } |
3688 | |
3689 | for (auto info : parameter_info_) { |
3690 | if (HasNonPhiUse(info->instr)) { |
3691 | MarkLive(info->instr); |
3692 | } |
3693 | } |
3694 | } |
3695 | |
3696 | // Propagate liveness from live parameters and phis to other parameters and |
3697 | // phis transitively. |
3698 | void PropagateLivenessToInputs() { |
3699 | while (!worklist_.IsEmpty()) { |
3700 | Definition* defn = worklist_.RemoveLast(); |
3701 | if (ParameterInstr* param = defn->AsParameter()) { |
3702 | auto s = parameter_info_[GetParameterId(param)]; |
3703 | for (auto input : s->incoming) { |
3704 | MarkLive(input); |
3705 | } |
3706 | } else if (PhiInstr* phi = defn->AsPhi()) { |
3707 | for (intptr_t i = 0; i < phi->InputCount(); i++) { |
3708 | MarkLive(phi->InputAt(i)->definition()); |
3709 | } |
3710 | } |
3711 | } |
3712 | } |
3713 | |
3714 | // Mark definition as live if it is a dead Phi or a dead Parameter and place |
3715 | // them into worklist. |
3716 | void MarkLive(Definition* defn) { |
3717 | if (PhiInstr* phi = defn->AsPhi()) { |
3718 | if (!phi->is_alive()) { |
3719 | phi->mark_alive(); |
3720 | worklist_.Add(phi); |
3721 | } |
3722 | } else if (ParameterInstr* param = defn->AsParameter()) { |
3723 | if (HasParameterId(param)) { |
3724 | auto input_s = parameter_info_[GetParameterId(param)]; |
3725 | if (!input_s->alive) { |
3726 | input_s->alive = true; |
3727 | worklist_.Add(param); |
3728 | } |
3729 | } |
3730 | } |
3731 | } |
3732 | |
3733 | // Replace all dead parameters with null value and clear corresponding |
3734 | // slots in environments. |
3735 | void EliminateDeadParameters() { |
3736 | for (auto info : parameter_info_) { |
3737 | if (!info->alive) { |
3738 | info->instr->ReplaceUsesWith(flow_graph_->constant_null()); |
3739 | } |
3740 | } |
3741 | |
3742 | for (auto block : flow_graph_->reverse_postorder()) { |
3743 | if (block->try_index() == -1) continue; |
3744 | |
3745 | auto catch_entry = catch_by_index_[block->try_index()]; |
3746 | const auto& idefs = *catch_entry->initial_definitions(); |
3747 | |
3748 | for (ForwardInstructionIterator instr_it(block); !instr_it.Done(); |
3749 | instr_it.Advance()) { |
3750 | Instruction* current = instr_it.Current(); |
3751 | if (!current->MayThrow()) continue; |
3752 | |
3753 | Environment* env = current->env()->Outermost(); |
3754 | RELEASE_ASSERT(env != nullptr); |
3755 | |
3756 | for (intptr_t env_idx = 0; env_idx < idefs.length(); ++env_idx) { |
3757 | if (ParameterInstr* param = idefs[env_idx]->AsParameter()) { |
3758 | if (!parameter_info_[GetParameterId(param)]->alive) { |
3759 | env->ValueAt(env_idx)->BindToEnvironment( |
3760 | flow_graph_->constant_null()); |
3761 | } |
3762 | } |
3763 | } |
3764 | } |
3765 | } |
3766 | |
3767 | DeadCodeElimination::RemoveDeadAndRedundantPhisFromTheGraph(flow_graph_); |
3768 | } |
3769 | |
3770 | // Returns true if definition has a use in an instruction which is not a phi. |
3771 | static bool HasNonPhiUse(Definition* defn) { |
3772 | for (Value* use = defn->input_use_list(); use != nullptr; |
3773 | use = use->next_use()) { |
3774 | if (!use->instruction()->IsPhi()) { |
3775 | return true; |
3776 | } |
3777 | } |
3778 | return false; |
3779 | } |
3780 | |
3781 | struct ParameterInfo : public ZoneAllocated { |
3782 | explicit ParameterInfo(ParameterInstr* instr) : instr(instr) {} |
3783 | |
3784 | ParameterInstr* instr; |
3785 | bool alive = false; |
3786 | GrowableArray<Definition*> incoming; |
3787 | }; |
3788 | |
3789 | FlowGraph* const flow_graph_; |
3790 | const bool is_aot_; |
3791 | |
3792 | // Additional information for each Parameter from each CatchBlockEntry. |
3793 | // Parameter-s are numbered and their number is stored in |
3794 | // Instruction::place_id() field which is otherwise not used for anything |
3795 | // at this stage. |
3796 | GrowableArray<ParameterInfo*> parameter_info_; |
3797 | |
3798 | // Mapping from catch_try_index to corresponding CatchBlockEntry-s. |
3799 | GrowableArray<CatchBlockEntryInstr*> catch_by_index_; |
3800 | |
3801 | // Worklist for live Phi and Parameter instructions which need to be |
3802 | // processed by PropagateLivenessToInputs. |
3803 | DefinitionWorklist worklist_; |
3804 | }; |
3805 | |
3806 | void OptimizeCatchEntryStates(FlowGraph* flow_graph, bool is_aot) { |
3807 | if (flow_graph->graph_entry()->catch_entries().is_empty()) { |
3808 | return; |
3809 | } |
3810 | |
3811 | TryCatchAnalyzer analyzer(flow_graph, is_aot); |
3812 | analyzer.Optimize(); |
3813 | } |
3814 | |
3815 | void TryCatchAnalyzer::Optimize() { |
3816 | // Analyze catch entries and remove "dead" Parameter instructions. |
3817 | if (is_aot_) { |
3818 | OptimizeDeadParameters(); |
3819 | } |
3820 | |
3821 | // For every catch-block: Iterate over all call instructions inside the |
3822 | // corresponding try-block and figure out for each environment value if it |
3823 | // is the same constant at all calls. If yes, replace the initial definition |
3824 | // at the catch-entry with this constant. |
3825 | const GrowableArray<CatchBlockEntryInstr*>& catch_entries = |
3826 | flow_graph_->graph_entry()->catch_entries(); |
3827 | |
3828 | for (auto catch_entry : catch_entries) { |
3829 | // Initialize cdefs with the original initial definitions (ParameterInstr). |
3830 | // The following representation is used: |
3831 | // ParameterInstr => unknown |
3832 | // ConstantInstr => known constant |
3833 | // NULL => non-constant |
3834 | GrowableArray<Definition*>* idefs = catch_entry->initial_definitions(); |
3835 | GrowableArray<Definition*> cdefs(idefs->length()); |
3836 | cdefs.AddArray(*idefs); |
3837 | |
3838 | // exception_var and stacktrace_var are never constant. In asynchronous or |
3839 | // generator functions they may be context-allocated in which case they are |
3840 | // not tracked in the environment anyway. |
3841 | |
3842 | cdefs[flow_graph_->EnvIndex(catch_entry->raw_exception_var())] = nullptr; |
3843 | cdefs[flow_graph_->EnvIndex(catch_entry->raw_stacktrace_var())] = nullptr; |
3844 | |
3845 | for (BlockIterator block_it = flow_graph_->reverse_postorder_iterator(); |
3846 | !block_it.Done(); block_it.Advance()) { |
3847 | BlockEntryInstr* block = block_it.Current(); |
3848 | if (block->try_index() == catch_entry->catch_try_index()) { |
3849 | for (ForwardInstructionIterator instr_it(block); !instr_it.Done(); |
3850 | instr_it.Advance()) { |
3851 | Instruction* current = instr_it.Current(); |
3852 | if (current->MayThrow()) { |
3853 | Environment* env = current->env()->Outermost(); |
3854 | ASSERT(env != nullptr); |
3855 | for (intptr_t env_idx = 0; env_idx < cdefs.length(); ++env_idx) { |
3856 | if (cdefs[env_idx] != nullptr && !cdefs[env_idx]->IsConstant() && |
3857 | env->ValueAt(env_idx)->BindsToConstant()) { |
3858 | // If the recorded definition is not a constant, record this |
3859 | // definition as the current constant definition. |
3860 | cdefs[env_idx] = env->ValueAt(env_idx)->definition(); |
3861 | } |
3862 | if (cdefs[env_idx] != env->ValueAt(env_idx)->definition()) { |
3863 | // Non-constant definitions are reset to nullptr. |
3864 | cdefs[env_idx] = nullptr; |
3865 | } |
3866 | } |
3867 | } |
3868 | } |
3869 | } |
3870 | } |
3871 | for (intptr_t j = 0; j < idefs->length(); ++j) { |
3872 | if (cdefs[j] != nullptr && cdefs[j]->IsConstant()) { |
3873 | Definition* old = (*idefs)[j]; |
3874 | ConstantInstr* orig = cdefs[j]->AsConstant(); |
3875 | ConstantInstr* copy = |
3876 | new (flow_graph_->zone()) ConstantInstr(orig->value()); |
3877 | copy->set_ssa_temp_index(flow_graph_->alloc_ssa_temp_index()); |
3878 | if (FlowGraph::NeedsPairLocation(copy->representation())) { |
3879 | flow_graph_->alloc_ssa_temp_index(); |
3880 | } |
3881 | old->ReplaceUsesWith(copy); |
3882 | copy->set_previous(old->previous()); // partial link |
3883 | (*idefs)[j] = copy; |
3884 | } |
3885 | } |
3886 | } |
3887 | } |
3888 | |
3889 | // Returns true iff this definition is used in a non-phi instruction. |
3890 | static bool HasRealUse(Definition* def) { |
3891 | // Environment uses are real (non-phi) uses. |
3892 | if (def->env_use_list() != NULL) return true; |
3893 | |
3894 | for (Value::Iterator it(def->input_use_list()); !it.Done(); it.Advance()) { |
3895 | if (!it.Current()->instruction()->IsPhi()) return true; |
3896 | } |
3897 | return false; |
3898 | } |
3899 | |
3900 | void DeadCodeElimination::EliminateDeadPhis(FlowGraph* flow_graph) { |
3901 | GrowableArray<PhiInstr*> live_phis; |
3902 | for (BlockIterator b = flow_graph->postorder_iterator(); !b.Done(); |
3903 | b.Advance()) { |
3904 | JoinEntryInstr* join = b.Current()->AsJoinEntry(); |
3905 | if (join != NULL) { |
3906 | for (PhiIterator it(join); !it.Done(); it.Advance()) { |
3907 | PhiInstr* phi = it.Current(); |
3908 | // Phis that have uses and phis inside try blocks are |
3909 | // marked as live. |
3910 | if (HasRealUse(phi)) { |
3911 | live_phis.Add(phi); |
3912 | phi->mark_alive(); |
3913 | } else { |
3914 | phi->mark_dead(); |
3915 | } |
3916 | } |
3917 | } |
3918 | } |
3919 | |
3920 | while (!live_phis.is_empty()) { |
3921 | PhiInstr* phi = live_phis.RemoveLast(); |
3922 | for (intptr_t i = 0; i < phi->InputCount(); i++) { |
3923 | Value* val = phi->InputAt(i); |
3924 | PhiInstr* used_phi = val->definition()->AsPhi(); |
3925 | if ((used_phi != NULL) && !used_phi->is_alive()) { |
3926 | used_phi->mark_alive(); |
3927 | live_phis.Add(used_phi); |
3928 | } |
3929 | } |
3930 | } |
3931 | |
3932 | RemoveDeadAndRedundantPhisFromTheGraph(flow_graph); |
3933 | } |
3934 | |
3935 | void DeadCodeElimination::RemoveDeadAndRedundantPhisFromTheGraph( |
3936 | FlowGraph* flow_graph) { |
3937 | for (auto block : flow_graph->postorder()) { |
3938 | if (JoinEntryInstr* join = block->AsJoinEntry()) { |
3939 | if (join->phis_ == nullptr) continue; |
3940 | |
3941 | // Eliminate dead phis and compact the phis_ array of the block. |
3942 | intptr_t to_index = 0; |
3943 | for (intptr_t i = 0; i < join->phis_->length(); ++i) { |
3944 | PhiInstr* phi = (*join->phis_)[i]; |
3945 | if (phi != nullptr) { |
3946 | if (!phi->is_alive()) { |
3947 | phi->ReplaceUsesWith(flow_graph->constant_null()); |
3948 | phi->UnuseAllInputs(); |
3949 | (*join->phis_)[i] = nullptr; |
3950 | if (FLAG_trace_optimization) { |
3951 | THR_Print("Removing dead phi v%" Pd "\n" , phi->ssa_temp_index()); |
3952 | } |
3953 | } else { |
3954 | (*join->phis_)[to_index++] = phi; |
3955 | } |
3956 | } |
3957 | } |
3958 | if (to_index == 0) { |
3959 | join->phis_ = nullptr; |
3960 | } else { |
3961 | join->phis_->TruncateTo(to_index); |
3962 | } |
3963 | } |
3964 | } |
3965 | } |
3966 | |
3967 | // Returns true if [current] instruction can be possibly eliminated |
3968 | // (if its result is not used). |
3969 | static bool CanEliminateInstruction(Instruction* current, |
3970 | BlockEntryInstr* block) { |
3971 | ASSERT(current->GetBlock() == block); |
3972 | if (MayHaveVisibleEffect(current) || current->CanDeoptimize() || |
3973 | current == block->last_instruction() || current->IsMaterializeObject() || |
3974 | current->IsCheckStackOverflow() || current->IsReachabilityFence() || |
3975 | current->IsEnterHandleScope() || current->IsExitHandleScope() || |
3976 | current->IsRawStoreField()) { |
3977 | return false; |
3978 | } |
3979 | return true; |
3980 | } |
3981 | |
3982 | void DeadCodeElimination::EliminateDeadCode(FlowGraph* flow_graph) { |
3983 | GrowableArray<Instruction*> worklist; |
3984 | BitVector live(flow_graph->zone(), flow_graph->current_ssa_temp_index()); |
3985 | |
3986 | // Mark all instructions with side-effects as live. |
3987 | for (BlockIterator block_it = flow_graph->reverse_postorder_iterator(); |
3988 | !block_it.Done(); block_it.Advance()) { |
3989 | BlockEntryInstr* block = block_it.Current(); |
3990 | for (ForwardInstructionIterator it(block); !it.Done(); it.Advance()) { |
3991 | Instruction* current = it.Current(); |
3992 | ASSERT(!current->IsPushArgument()); |
3993 | // TODO(alexmarkov): take control dependencies into account and |
3994 | // eliminate dead branches/conditions. |
3995 | if (!CanEliminateInstruction(current, block)) { |
3996 | worklist.Add(current); |
3997 | if (Definition* def = current->AsDefinition()) { |
3998 | if (def->HasSSATemp()) { |
3999 | live.Add(def->ssa_temp_index()); |
4000 | } |
4001 | } |
4002 | } |
4003 | } |
4004 | } |
4005 | |
4006 | // Iteratively follow inputs of instructions in the work list. |
4007 | while (!worklist.is_empty()) { |
4008 | Instruction* current = worklist.RemoveLast(); |
4009 | for (intptr_t i = 0, n = current->InputCount(); i < n; ++i) { |
4010 | Definition* input = current->InputAt(i)->definition(); |
4011 | ASSERT(input->HasSSATemp()); |
4012 | if (!live.Contains(input->ssa_temp_index())) { |
4013 | worklist.Add(input); |
4014 | live.Add(input->ssa_temp_index()); |
4015 | } |
4016 | } |
4017 | for (intptr_t i = 0, n = current->ArgumentCount(); i < n; ++i) { |
4018 | Definition* input = current->ArgumentAt(i); |
4019 | ASSERT(input->HasSSATemp()); |
4020 | if (!live.Contains(input->ssa_temp_index())) { |
4021 | worklist.Add(input); |
4022 | live.Add(input->ssa_temp_index()); |
4023 | } |
4024 | } |
4025 | if (current->env() != nullptr) { |
4026 | for (Environment::DeepIterator it(current->env()); !it.Done(); |
4027 | it.Advance()) { |
4028 | Definition* input = it.CurrentValue()->definition(); |
4029 | ASSERT(!input->IsPushArgument()); |
4030 | if (input->HasSSATemp() && !live.Contains(input->ssa_temp_index())) { |
4031 | worklist.Add(input); |
4032 | live.Add(input->ssa_temp_index()); |
4033 | } |
4034 | } |
4035 | } |
4036 | } |
4037 | |
4038 | // Remove all instructions which are not marked as live. |
4039 | for (BlockIterator block_it = flow_graph->reverse_postorder_iterator(); |
4040 | !block_it.Done(); block_it.Advance()) { |
4041 | BlockEntryInstr* block = block_it.Current(); |
4042 | if (JoinEntryInstr* join = block->AsJoinEntry()) { |
4043 | for (PhiIterator it(join); !it.Done(); it.Advance()) { |
4044 | PhiInstr* current = it.Current(); |
4045 | if (!live.Contains(current->ssa_temp_index())) { |
4046 | it.RemoveCurrentFromGraph(); |
4047 | } |
4048 | } |
4049 | } |
4050 | for (ForwardInstructionIterator it(block); !it.Done(); it.Advance()) { |
4051 | Instruction* current = it.Current(); |
4052 | if (!CanEliminateInstruction(current, block)) { |
4053 | continue; |
4054 | } |
4055 | ASSERT(!current->IsPushArgument()); |
4056 | ASSERT((current->ArgumentCount() == 0) || !current->HasPushArguments()); |
4057 | if (Definition* def = current->AsDefinition()) { |
4058 | if (def->HasSSATemp() && live.Contains(def->ssa_temp_index())) { |
4059 | continue; |
4060 | } |
4061 | } |
4062 | it.RemoveCurrentFromGraph(); |
4063 | } |
4064 | } |
4065 | } |
4066 | |
4067 | void CheckStackOverflowElimination::EliminateStackOverflow(FlowGraph* graph) { |
4068 | CheckStackOverflowInstr* first_stack_overflow_instr = NULL; |
4069 | for (BlockIterator block_it = graph->reverse_postorder_iterator(); |
4070 | !block_it.Done(); block_it.Advance()) { |
4071 | BlockEntryInstr* entry = block_it.Current(); |
4072 | |
4073 | for (ForwardInstructionIterator it(entry); !it.Done(); it.Advance()) { |
4074 | Instruction* current = it.Current(); |
4075 | |
4076 | if (CheckStackOverflowInstr* instr = current->AsCheckStackOverflow()) { |
4077 | if (first_stack_overflow_instr == NULL) { |
4078 | first_stack_overflow_instr = instr; |
4079 | ASSERT(!first_stack_overflow_instr->in_loop()); |
4080 | } |
4081 | continue; |
4082 | } |
4083 | |
4084 | if (current->IsBranch()) { |
4085 | current = current->AsBranch()->comparison(); |
4086 | } |
4087 | |
4088 | if (current->HasUnknownSideEffects()) { |
4089 | return; |
4090 | } |
4091 | } |
4092 | } |
4093 | |
4094 | if (first_stack_overflow_instr != NULL) { |
4095 | first_stack_overflow_instr->RemoveFromGraph(); |
4096 | } |
4097 | } |
4098 | |
4099 | } // namespace dart |
4100 | |