1// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2// for details. All rights reserved. Use of this source code is governed by a
3// BSD-style license that can be found in the LICENSE file.
4
5#if !defined(DART_PRECOMPILED_RUNTIME)
6
7#include "vm/regexp_assembler_ir.h"
8
9#include "platform/unicode.h"
10#include "vm/bit_vector.h"
11#include "vm/compiler/backend/il_printer.h"
12#include "vm/compiler/frontend/flow_graph_builder.h"
13#include "vm/compiler/jit/compiler.h"
14#include "vm/dart_entry.h"
15#include "vm/longjump.h"
16#include "vm/object_store.h"
17#include "vm/regexp.h"
18#include "vm/resolver.h"
19#include "vm/runtime_entry.h"
20#include "vm/stack_frame.h"
21
22#define Z zone()
23
24// Debugging output macros. TAG() is called at the head of each interesting
25// function and prints its name during execution if irregexp tracing is enabled.
26#define TAG() \
27 if (FLAG_trace_irregexp) { \
28 TAG_(); \
29 }
30#define TAG_() \
31 Print(Bind(new (Z) ConstantInstr(String::ZoneHandle( \
32 Z, String::Concat(String::Handle(String::New("TAG: ")), \
33 String::Handle(String::New(__FUNCTION__)), \
34 Heap::kOld)))));
35
36#define PRINT(arg) \
37 if (FLAG_trace_irregexp) { \
38 Print(arg); \
39 }
40
41namespace dart {
42
43static const intptr_t kMinStackSize = 512;
44
45/*
46 * This assembler uses the following main local variables:
47 * - stack_: A pointer to a growable list which we use as an all-purpose stack
48 * storing backtracking offsets, positions & stored register values.
49 * - current_character_: Stores the currently loaded characters (possibly more
50 * than one).
51 * - current_position_: The current position within the string, stored as a
52 * negative offset from the end of the string (i.e. the
53 * position corresponding to str[0] is -str.length).
54 * Note that current_position_ is *not* byte-based, unlike
55 * original V8 code.
56 *
57 * Results are returned though an array of capture indices, stored at
58 * matches_param_. A null array specifies a failure to match. The match indices
59 * [start_inclusive, end_exclusive] for capture group i are stored at positions
60 * matches_param_[i * 2] and matches_param_[i * 2 + 1], respectively. Match
61 * indices of -1 denote non-matched groups. Note that we store these indices
62 * as a negative offset from the end of the string in registers_array_
63 * during processing, and convert them to standard indexes when copying them
64 * to matches_param_ on successful match.
65 */
66IRRegExpMacroAssembler::IRRegExpMacroAssembler(
67 intptr_t specialization_cid,
68 intptr_t capture_count,
69 const ParsedFunction* parsed_function,
70 const ZoneGrowableArray<const ICData*>& ic_data_array,
71 intptr_t osr_id,
72 Zone* zone)
73 : RegExpMacroAssembler(zone),
74 thread_(Thread::Current()),
75 specialization_cid_(specialization_cid),
76 parsed_function_(parsed_function),
77 ic_data_array_(ic_data_array),
78 current_instruction_(NULL),
79 stack_(NULL),
80 stack_pointer_(NULL),
81 current_character_(NULL),
82 current_position_(NULL),
83 string_param_(NULL),
84 string_param_length_(NULL),
85 start_index_param_(NULL),
86 registers_count_(0),
87 saved_registers_count_((capture_count + 1) * 2),
88 stack_array_cell_(Array::ZoneHandle(zone, Array::New(1, Heap::kOld))),
89 // The registers array is allocated at a fixed size after assembly.
90 registers_array_(TypedData::ZoneHandle(zone, TypedData::null())),
91 // B0 is taken by GraphEntry thus block ids must start at 1.
92 block_id_(1) {
93 switch (specialization_cid) {
94 case kOneByteStringCid:
95 case kExternalOneByteStringCid:
96 mode_ = ASCII;
97 break;
98 case kTwoByteStringCid:
99 case kExternalTwoByteStringCid:
100 mode_ = UC16;
101 break;
102 default:
103 UNREACHABLE();
104 }
105
106 InitializeLocals();
107
108 // Allocate an initial stack backing of the minimum stack size. The stack
109 // backing is indirectly referred to so we can reuse it on subsequent matches
110 // even in the case where the backing has been enlarged and thus reallocated.
111 stack_array_cell_.SetAt(
112 0,
113 TypedData::Handle(zone, TypedData::New(kTypedDataInt32ArrayCid,
114 kMinStackSize / 4, Heap::kOld)));
115
116 // Create and generate all preset blocks.
117 entry_block_ = new (zone) GraphEntryInstr(*parsed_function_, osr_id);
118
119 auto function_entry = new (zone) FunctionEntryInstr(
120 entry_block_, block_id_.Alloc(), kInvalidTryIndex, GetNextDeoptId());
121 entry_block_->set_normal_entry(function_entry);
122
123 start_block_ = new (zone)
124 JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex, GetNextDeoptId());
125 success_block_ = new (zone)
126 JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex, GetNextDeoptId());
127 backtrack_block_ = new (zone)
128 JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex, GetNextDeoptId());
129 exit_block_ = new (zone)
130 JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex, GetNextDeoptId());
131
132 GenerateEntryBlock();
133 GenerateSuccessBlock();
134 GenerateExitBlock();
135
136 blocks_.Add(entry_block_);
137 blocks_.Add(entry_block_->normal_entry());
138 blocks_.Add(start_block_);
139 blocks_.Add(success_block_);
140 blocks_.Add(backtrack_block_);
141 blocks_.Add(exit_block_);
142
143 // Begin emission at the start_block_.
144 set_current_instruction(start_block_);
145}
146
147IRRegExpMacroAssembler::~IRRegExpMacroAssembler() {}
148
149void IRRegExpMacroAssembler::InitializeLocals() {
150 // All generated functions are expected to have a current-context variable.
151 // This variable is unused in irregexp functions.
152 parsed_function_->current_context_var()->set_index(
153 VariableIndex(GetNextLocalIndex()));
154
155 // Create local variables and parameters.
156 stack_ = Local(Symbols::stack());
157 stack_pointer_ = Local(Symbols::stack_pointer());
158 registers_ = Local(Symbols::position_registers());
159 current_character_ = Local(Symbols::current_character());
160 current_position_ = Local(Symbols::current_position());
161 string_param_length_ = Local(Symbols::string_param_length());
162 capture_length_ = Local(Symbols::capture_length());
163 match_start_index_ = Local(Symbols::match_start_index());
164 capture_start_index_ = Local(Symbols::capture_start_index());
165 match_end_index_ = Local(Symbols::match_end_index());
166 char_in_capture_ = Local(Symbols::char_in_capture());
167 char_in_match_ = Local(Symbols::char_in_match());
168 index_temp_ = Local(Symbols::index_temp());
169 result_ = Local(Symbols::result());
170
171 string_param_ = Parameter(Symbols::string_param(),
172 RegExpMacroAssembler::kParamStringIndex);
173 start_index_param_ = Parameter(Symbols::start_index_param(),
174 RegExpMacroAssembler::kParamStartOffsetIndex);
175}
176
177void IRRegExpMacroAssembler::GenerateEntryBlock() {
178 set_current_instruction(entry_block_->normal_entry());
179 TAG();
180
181 // Store string.length.
182 Value* string_push = PushLocal(string_param_);
183
184 StoreLocal(string_param_length_,
185 Bind(InstanceCall(InstanceCallDescriptor(String::ZoneHandle(
186 Field::GetterSymbol(Symbols::Length()))),
187 string_push)));
188
189 // Store (start_index - string.length) as the current position (since it's a
190 // negative offset from the end of the string).
191 Value* start_index_push = PushLocal(start_index_param_);
192 Value* length_push = PushLocal(string_param_length_);
193
194 StoreLocal(current_position_, Bind(Sub(start_index_push, length_push)));
195
196 // Generate a local list variable to represent "registers" and
197 // initialize capture registers (others remain garbage).
198 StoreLocal(registers_, Bind(new (Z) ConstantInstr(registers_array_)));
199 ClearRegisters(0, saved_registers_count_ - 1);
200
201 // Generate a local list variable to represent the backtracking stack.
202 Value* stack_cell_push = Bind(new (Z) ConstantInstr(stack_array_cell_));
203 StoreLocal(stack_,
204 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
205 stack_cell_push, Bind(Uint64Constant(0)))));
206 StoreLocal(stack_pointer_, Bind(Int64Constant(-1)));
207
208 // Jump to the start block.
209 current_instruction_->Goto(start_block_);
210}
211
212void IRRegExpMacroAssembler::GenerateBacktrackBlock() {
213 set_current_instruction(backtrack_block_);
214 TAG();
215 CheckPreemption(/*is_backtrack=*/true);
216
217 const intptr_t entries_count = entry_block_->indirect_entries().length();
218
219 TypedData& offsets = TypedData::ZoneHandle(
220 Z, TypedData::New(kTypedDataInt32ArrayCid, entries_count, Heap::kOld));
221
222 Value* block_offsets_push = Bind(new (Z) ConstantInstr(offsets));
223 Value* block_id_push = Bind(PopStack());
224
225 Value* offset_value =
226 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
227 block_offsets_push, block_id_push));
228
229 backtrack_goto_ = new (Z) IndirectGotoInstr(&offsets, offset_value);
230 CloseBlockWith(backtrack_goto_);
231
232 // Add an edge from the "indirect" goto to each of the targets.
233 for (intptr_t j = 0; j < entries_count; j++) {
234 backtrack_goto_->AddSuccessor(
235 TargetWithJoinGoto(entry_block_->indirect_entries().At(j)));
236 }
237}
238
239void IRRegExpMacroAssembler::GenerateSuccessBlock() {
240 set_current_instruction(success_block_);
241 TAG();
242
243 Value* type = Bind(new (Z) ConstantInstr(TypeArguments::ZoneHandle(
244 Z, Isolate::Current()->object_store()->type_argument_int())));
245 Value* length = Bind(Uint64Constant(saved_registers_count_));
246 Value* array = Bind(new (Z) CreateArrayInstr(TokenPosition::kNoSource, type,
247 length, GetNextDeoptId()));
248 StoreLocal(result_, array);
249
250 // Store captured offsets in the `matches` parameter.
251 for (intptr_t i = 0; i < saved_registers_count_; i++) {
252 Value* matches_push = PushLocal(result_);
253 Value* index_push = Bind(Uint64Constant(i));
254
255 // Convert negative offsets from the end of the string to string indices.
256 // TODO(zerny): use positive offsets from the get-go.
257 Value* offset_push = LoadRegister(i);
258 Value* len_push = PushLocal(string_param_length_);
259 Value* value_push = Bind(Add(offset_push, len_push));
260
261 Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX),
262 matches_push, index_push, value_push));
263 }
264
265 // Print the result if tracing.
266 PRINT(PushLocal(result_));
267
268 // Return true on success.
269 AppendInstruction(new (Z) ReturnInstr(
270 TokenPosition::kNoSource, Bind(LoadLocal(result_)), GetNextDeoptId()));
271}
272
273void IRRegExpMacroAssembler::GenerateExitBlock() {
274 set_current_instruction(exit_block_);
275 TAG();
276
277 // Return false on failure.
278 AppendInstruction(new (Z) ReturnInstr(
279 TokenPosition::kNoSource, Bind(LoadLocal(result_)), GetNextDeoptId()));
280}
281
282void IRRegExpMacroAssembler::FinalizeRegistersArray() {
283 ASSERT(registers_count_ >= saved_registers_count_);
284 registers_array_ =
285 TypedData::New(kTypedDataInt32ArrayCid, registers_count_, Heap::kOld);
286}
287
288bool IRRegExpMacroAssembler::CanReadUnaligned() {
289 return !slow_safe();
290}
291
292ArrayPtr IRRegExpMacroAssembler::Execute(const RegExp& regexp,
293 const String& input,
294 const Smi& start_offset,
295 bool sticky,
296 Zone* zone) {
297 const intptr_t cid = input.GetClassId();
298 const Function& fun = Function::Handle(regexp.function(cid, sticky));
299 ASSERT(!fun.IsNull());
300 // Create the argument list.
301 const Array& args =
302 Array::Handle(Array::New(RegExpMacroAssembler::kParamCount));
303 args.SetAt(RegExpMacroAssembler::kParamRegExpIndex, regexp);
304 args.SetAt(RegExpMacroAssembler::kParamStringIndex, input);
305 args.SetAt(RegExpMacroAssembler::kParamStartOffsetIndex, start_offset);
306
307 // And finally call the generated code.
308
309 const Object& retval =
310 Object::Handle(zone, DartEntry::InvokeFunction(fun, args));
311 if (retval.IsUnwindError()) {
312 Exceptions::PropagateError(Error::Cast(retval));
313 }
314 if (retval.IsError()) {
315 const Error& error = Error::Cast(retval);
316 OS::PrintErr("%s\n", error.ToErrorCString());
317 // Should never happen.
318 UNREACHABLE();
319 }
320
321 if (retval.IsNull()) {
322 return Array::null();
323 }
324
325 ASSERT(retval.IsArray());
326 return Array::Cast(retval).raw();
327}
328
329LocalVariable* IRRegExpMacroAssembler::Parameter(const String& name,
330 intptr_t index) const {
331 LocalVariable* local =
332 new (Z) LocalVariable(TokenPosition::kNoSource, TokenPosition::kNoSource,
333 name, Object::dynamic_type());
334
335 intptr_t param_frame_index = kParamCount - index;
336 local->set_index(VariableIndex(param_frame_index));
337
338 return local;
339}
340
341LocalVariable* IRRegExpMacroAssembler::Local(const String& name) {
342 LocalVariable* local =
343 new (Z) LocalVariable(TokenPosition::kNoSource, TokenPosition::kNoSource,
344 name, Object::dynamic_type());
345 local->set_index(VariableIndex(GetNextLocalIndex()));
346
347 return local;
348}
349
350ConstantInstr* IRRegExpMacroAssembler::Int64Constant(int64_t value) const {
351 return new (Z)
352 ConstantInstr(Integer::ZoneHandle(Z, Integer::NewCanonical(value)));
353}
354
355ConstantInstr* IRRegExpMacroAssembler::Uint64Constant(uint64_t value) const {
356 ASSERT(value < static_cast<uint64_t>(kMaxInt64));
357 return Int64Constant(static_cast<int64_t>(value));
358}
359
360ConstantInstr* IRRegExpMacroAssembler::BoolConstant(bool value) const {
361 return new (Z) ConstantInstr(value ? Bool::True() : Bool::False());
362}
363
364ConstantInstr* IRRegExpMacroAssembler::StringConstant(const char* value) const {
365 return new (Z)
366 ConstantInstr(String::ZoneHandle(Z, String::New(value, Heap::kOld)));
367}
368
369ConstantInstr* IRRegExpMacroAssembler::WordCharacterMapConstant() const {
370 const Library& lib = Library::Handle(Z, Library::CoreLibrary());
371 const Class& regexp_class =
372 Class::Handle(Z, lib.LookupClassAllowPrivate(Symbols::_RegExp()));
373 const Field& word_character_field = Field::ZoneHandle(
374 Z,
375 regexp_class.LookupStaticFieldAllowPrivate(Symbols::_wordCharacterMap()));
376 ASSERT(!word_character_field.IsNull());
377
378 DEBUG_ASSERT(Thread::Current()->TopErrorHandlerIsSetJump());
379 if (word_character_field.IsUninitialized()) {
380 ASSERT(!Compiler::IsBackgroundCompilation());
381 const Error& error =
382 Error::Handle(Z, word_character_field.InitializeStatic());
383 if (!error.IsNull()) {
384 Report::LongJump(error);
385 }
386 }
387 ASSERT(!word_character_field.IsUninitialized());
388
389 return new (Z) ConstantInstr(
390 Instance::ZoneHandle(Z, word_character_field.StaticValue()));
391}
392
393ComparisonInstr* IRRegExpMacroAssembler::Comparison(ComparisonKind kind,
394 Value* lhs,
395 Value* rhs) {
396 Token::Kind strict_comparison = Token::kEQ_STRICT;
397 Token::Kind intermediate_operator = Token::kILLEGAL;
398 switch (kind) {
399 case kEQ:
400 intermediate_operator = Token::kEQ;
401 break;
402 case kNE:
403 intermediate_operator = Token::kEQ;
404 strict_comparison = Token::kNE_STRICT;
405 break;
406 case kLT:
407 intermediate_operator = Token::kLT;
408 break;
409 case kGT:
410 intermediate_operator = Token::kGT;
411 break;
412 case kLTE:
413 intermediate_operator = Token::kLTE;
414 break;
415 case kGTE:
416 intermediate_operator = Token::kGTE;
417 break;
418 default:
419 UNREACHABLE();
420 }
421
422 ASSERT(intermediate_operator != Token::kILLEGAL);
423
424 Value* lhs_value = Bind(InstanceCall(
425 InstanceCallDescriptor::FromToken(intermediate_operator), lhs, rhs));
426 Value* rhs_value = Bind(BoolConstant(true));
427
428 return new (Z)
429 StrictCompareInstr(TokenPosition::kNoSource, strict_comparison, lhs_value,
430 rhs_value, true, GetNextDeoptId());
431}
432
433ComparisonInstr* IRRegExpMacroAssembler::Comparison(ComparisonKind kind,
434 Definition* lhs,
435 Definition* rhs) {
436 Value* lhs_push = Bind(lhs);
437 Value* rhs_push = Bind(rhs);
438 return Comparison(kind, lhs_push, rhs_push);
439}
440
441StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
442 const Function& function,
443 ICData::RebindRule rebind_rule) const {
444 InputsArray* arguments = new (Z) InputsArray(Z, 0);
445 return StaticCall(function, arguments, rebind_rule);
446}
447
448StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
449 const Function& function,
450 Value* arg1,
451 ICData::RebindRule rebind_rule) const {
452 InputsArray* arguments = new (Z) InputsArray(Z, 1);
453 arguments->Add(arg1);
454
455 return StaticCall(function, arguments, rebind_rule);
456}
457
458StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
459 const Function& function,
460 Value* arg1,
461 Value* arg2,
462 ICData::RebindRule rebind_rule) const {
463 InputsArray* arguments = new (Z) InputsArray(Z, 2);
464 arguments->Add(arg1);
465 arguments->Add(arg2);
466
467 return StaticCall(function, arguments, rebind_rule);
468}
469
470StaticCallInstr* IRRegExpMacroAssembler::StaticCall(
471 const Function& function,
472 InputsArray* arguments,
473 ICData::RebindRule rebind_rule) const {
474 const intptr_t kTypeArgsLen = 0;
475 return new (Z) StaticCallInstr(TokenPosition::kNoSource, function,
476 kTypeArgsLen, Object::null_array(), arguments,
477 ic_data_array_, GetNextDeoptId(), rebind_rule);
478}
479
480InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
481 const InstanceCallDescriptor& desc,
482 Value* arg1) const {
483 InputsArray* arguments = new (Z) InputsArray(Z, 1);
484 arguments->Add(arg1);
485
486 return InstanceCall(desc, arguments);
487}
488
489InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
490 const InstanceCallDescriptor& desc,
491 Value* arg1,
492 Value* arg2) const {
493 InputsArray* arguments = new (Z) InputsArray(Z, 2);
494 arguments->Add(arg1);
495 arguments->Add(arg2);
496
497 return InstanceCall(desc, arguments);
498}
499
500InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
501 const InstanceCallDescriptor& desc,
502 Value* arg1,
503 Value* arg2,
504 Value* arg3) const {
505 InputsArray* arguments = new (Z) InputsArray(Z, 3);
506 arguments->Add(arg1);
507 arguments->Add(arg2);
508 arguments->Add(arg3);
509
510 return InstanceCall(desc, arguments);
511}
512
513InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall(
514 const InstanceCallDescriptor& desc,
515 InputsArray* arguments) const {
516 const intptr_t kTypeArgsLen = 0;
517 return new (Z) InstanceCallInstr(
518 TokenPosition::kNoSource, desc.name, desc.token_kind, arguments,
519 kTypeArgsLen, Object::null_array(), desc.checked_argument_count,
520 ic_data_array_, GetNextDeoptId());
521}
522
523LoadLocalInstr* IRRegExpMacroAssembler::LoadLocal(LocalVariable* local) const {
524 return new (Z) LoadLocalInstr(*local, TokenPosition::kNoSource);
525}
526
527void IRRegExpMacroAssembler::StoreLocal(LocalVariable* local, Value* value) {
528 Do(new (Z) StoreLocalInstr(*local, value, TokenPosition::kNoSource));
529}
530
531void IRRegExpMacroAssembler::set_current_instruction(Instruction* instruction) {
532 current_instruction_ = instruction;
533}
534
535Value* IRRegExpMacroAssembler::Bind(Definition* definition) {
536 AppendInstruction(definition);
537 definition->set_temp_index(temp_id_.Alloc());
538
539 return new (Z) Value(definition);
540}
541
542void IRRegExpMacroAssembler::Do(Definition* definition) {
543 AppendInstruction(definition);
544}
545
546Value* IRRegExpMacroAssembler::BindLoadLocal(const LocalVariable& local) {
547 if (local.IsConst()) {
548 return Bind(new (Z) ConstantInstr(*local.ConstValue()));
549 }
550 ASSERT(!local.is_captured());
551 return Bind(new (Z) LoadLocalInstr(local, TokenPosition::kNoSource));
552}
553
554// In some cases, the V8 irregexp engine generates unreachable code by emitting
555// a jmp not followed by a bind. We cannot do the same, since it is impossible
556// to append to a block following a jmp. In such cases, assume that we are doing
557// the correct thing, but output a warning when tracing.
558#define HANDLE_DEAD_CODE_EMISSION() \
559 if (current_instruction_ == NULL) { \
560 if (FLAG_trace_irregexp) { \
561 OS::PrintErr( \
562 "WARNING: Attempting to append to a closed assembler. " \
563 "This could be either a bug or generation of dead code " \
564 "inherited from V8.\n"); \
565 } \
566 BlockLabel dummy; \
567 BindBlock(&dummy); \
568 }
569
570void IRRegExpMacroAssembler::AppendInstruction(Instruction* instruction) {
571 HANDLE_DEAD_CODE_EMISSION();
572
573 ASSERT(current_instruction_ != NULL);
574 ASSERT(current_instruction_->next() == NULL);
575
576 temp_id_.Dealloc(instruction->InputCount());
577
578 current_instruction_->LinkTo(instruction);
579 set_current_instruction(instruction);
580}
581
582void IRRegExpMacroAssembler::CloseBlockWith(Instruction* instruction) {
583 HANDLE_DEAD_CODE_EMISSION();
584
585 ASSERT(current_instruction_ != NULL);
586 ASSERT(current_instruction_->next() == NULL);
587
588 temp_id_.Dealloc(instruction->InputCount());
589
590 current_instruction_->LinkTo(instruction);
591 set_current_instruction(NULL);
592}
593
594void IRRegExpMacroAssembler::GoTo(BlockLabel* to) {
595 if (to == NULL) {
596 Backtrack();
597 } else {
598 to->SetLinked();
599 GoTo(to->block());
600 }
601}
602
603// Closes the current block with a goto, and unsets current_instruction_.
604// BindBlock() must be called before emission can continue.
605void IRRegExpMacroAssembler::GoTo(JoinEntryInstr* to) {
606 HANDLE_DEAD_CODE_EMISSION();
607
608 ASSERT(current_instruction_ != NULL);
609 ASSERT(current_instruction_->next() == NULL);
610 current_instruction_->Goto(to);
611 set_current_instruction(NULL);
612}
613
614Value* IRRegExpMacroAssembler::PushLocal(LocalVariable* local) {
615 return Bind(LoadLocal(local));
616}
617
618void IRRegExpMacroAssembler::Print(const char* str) {
619 Print(Bind(new (Z) ConstantInstr(
620 String::ZoneHandle(Z, String::New(str, Heap::kOld)))));
621}
622
623void IRRegExpMacroAssembler::Print(Value* argument) {
624 const Library& lib = Library::Handle(Library::CoreLibrary());
625 const Function& print_fn =
626 Function::ZoneHandle(Z, lib.LookupFunctionAllowPrivate(Symbols::print()));
627 Do(StaticCall(print_fn, argument, ICData::kStatic));
628}
629
630void IRRegExpMacroAssembler::PrintBlocks() {
631 for (intptr_t i = 0; i < blocks_.length(); i++) {
632 FlowGraphPrinter::PrintBlock(blocks_[i], false);
633 }
634}
635
636intptr_t IRRegExpMacroAssembler::stack_limit_slack() {
637 return 32;
638}
639
640void IRRegExpMacroAssembler::AdvanceCurrentPosition(intptr_t by) {
641 TAG();
642 if (by != 0) {
643 Value* cur_pos_push = PushLocal(current_position_);
644 Value* by_push = Bind(Int64Constant(by));
645
646 Value* new_pos_value = Bind(Add(cur_pos_push, by_push));
647 StoreLocal(current_position_, new_pos_value);
648 }
649}
650
651void IRRegExpMacroAssembler::AdvanceRegister(intptr_t reg, intptr_t by) {
652 TAG();
653 ASSERT(reg >= 0);
654 ASSERT(reg < registers_count_);
655
656 if (by != 0) {
657 Value* registers_push = PushLocal(registers_);
658 Value* index_push = PushRegisterIndex(reg);
659 Value* reg_push = LoadRegister(reg);
660 Value* by_push = Bind(Int64Constant(by));
661 Value* value_push = Bind(Add(reg_push, by_push));
662 StoreRegister(registers_push, index_push, value_push);
663 }
664}
665
666void IRRegExpMacroAssembler::Backtrack() {
667 TAG();
668 GoTo(backtrack_block_);
669}
670
671// A BindBlock is analogous to assigning a label to a basic block.
672// If the BlockLabel does not yet contain a block, it is created.
673// If there is a current instruction, append a goto to the bound block.
674void IRRegExpMacroAssembler::BindBlock(BlockLabel* label) {
675 ASSERT(!label->is_bound());
676 ASSERT(label->block()->next() == NULL);
677
678 label->BindTo(block_id_.Alloc());
679 blocks_.Add(label->block());
680
681 if (current_instruction_ != NULL) {
682 GoTo(label);
683 }
684 set_current_instruction(label->block());
685
686 // Print the id of the current block if tracing.
687 PRINT(Bind(Uint64Constant(label->block()->block_id())));
688}
689
690intptr_t IRRegExpMacroAssembler::GetNextLocalIndex() {
691 intptr_t id = local_id_.Alloc();
692 return -id;
693}
694
695Value* IRRegExpMacroAssembler::LoadRegister(intptr_t index) {
696 Value* registers_push = PushLocal(registers_);
697 Value* index_push = PushRegisterIndex(index);
698 return Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
699 registers_push, index_push));
700}
701
702void IRRegExpMacroAssembler::StoreRegister(intptr_t index, intptr_t value) {
703 Value* registers_push = PushLocal(registers_);
704 Value* index_push = PushRegisterIndex(index);
705 Value* value_push = Bind(Uint64Constant(value));
706 StoreRegister(registers_push, index_push, value_push);
707}
708
709void IRRegExpMacroAssembler::StoreRegister(Value* registers,
710 Value* index,
711 Value* value) {
712 TAG();
713 Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX),
714 registers, index, value));
715}
716
717Value* IRRegExpMacroAssembler::PushRegisterIndex(intptr_t index) {
718 if (registers_count_ <= index) {
719 registers_count_ = index + 1;
720 }
721 return Bind(Uint64Constant(index));
722}
723
724void IRRegExpMacroAssembler::CheckCharacter(uint32_t c, BlockLabel* on_equal) {
725 TAG();
726 Definition* cur_char_def = LoadLocal(current_character_);
727 Definition* char_def = Uint64Constant(c);
728
729 BranchOrBacktrack(Comparison(kEQ, cur_char_def, char_def), on_equal);
730}
731
732void IRRegExpMacroAssembler::CheckCharacterGT(uint16_t limit,
733 BlockLabel* on_greater) {
734 TAG();
735 BranchOrBacktrack(
736 Comparison(kGT, LoadLocal(current_character_), Uint64Constant(limit)),
737 on_greater);
738}
739
740void IRRegExpMacroAssembler::CheckAtStart(BlockLabel* on_at_start) {
741 TAG();
742
743 // Are we at the start of the input, i.e. is (offset == string_length * -1)?
744 Definition* neg_len_def =
745 InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE),
746 PushLocal(string_param_length_));
747 Definition* offset_def = LoadLocal(current_position_);
748 BranchOrBacktrack(Comparison(kEQ, neg_len_def, offset_def), on_at_start);
749}
750
751// cp_offset => offset from the current (character) pointer
752// This offset may be negative due to traversing backwards during lookbehind.
753void IRRegExpMacroAssembler::CheckNotAtStart(intptr_t cp_offset,
754 BlockLabel* on_not_at_start) {
755 TAG();
756
757 // Are we at the start of the input, i.e. is (offset == string_length * -1)?
758 auto neg_len_def =
759 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE),
760 PushLocal(string_param_length_)));
761 auto current_pos_def = PushLocal(current_position_);
762 auto cp_offset_def = Bind(Int64Constant(cp_offset));
763 auto offset_def = Bind(Add(current_pos_def, cp_offset_def));
764 BranchOrBacktrack(Comparison(kNE, neg_len_def, offset_def), on_not_at_start);
765}
766
767void IRRegExpMacroAssembler::CheckCharacterLT(uint16_t limit,
768 BlockLabel* on_less) {
769 TAG();
770 BranchOrBacktrack(
771 Comparison(kLT, LoadLocal(current_character_), Uint64Constant(limit)),
772 on_less);
773}
774
775void IRRegExpMacroAssembler::CheckGreedyLoop(BlockLabel* on_equal) {
776 TAG();
777
778 BlockLabel fallthrough;
779
780 Definition* head = PeekStack();
781 Definition* cur_pos_def = LoadLocal(current_position_);
782 BranchOrBacktrack(Comparison(kNE, head, cur_pos_def), &fallthrough);
783
784 // Pop, throwing away the value.
785 Do(PopStack());
786
787 BranchOrBacktrack(NULL, on_equal);
788
789 BindBlock(&fallthrough);
790}
791
792void IRRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
793 intptr_t start_reg,
794 bool read_backward,
795 bool unicode,
796 BlockLabel* on_no_match) {
797 TAG();
798 ASSERT(start_reg + 1 <= registers_count_);
799
800 BlockLabel fallthrough;
801
802 Value* end_push = LoadRegister(start_reg + 1);
803 Value* start_push = LoadRegister(start_reg);
804 StoreLocal(capture_length_, Bind(Sub(end_push, start_push)));
805
806 // The length of a capture should not be negative. This can only happen
807 // if the end of the capture is unrecorded, or at a point earlier than
808 // the start of the capture.
809 // BranchOrBacktrack(less, on_no_match);
810
811 BranchOrBacktrack(
812 Comparison(kLT, LoadLocal(capture_length_), Uint64Constant(0)),
813 on_no_match);
814
815 // If length is zero, either the capture is empty or it is completely
816 // uncaptured. In either case succeed immediately.
817 BranchOrBacktrack(
818 Comparison(kEQ, LoadLocal(capture_length_), Uint64Constant(0)),
819 &fallthrough);
820
821 Value* pos_push = nullptr;
822 Value* len_push = nullptr;
823
824 if (!read_backward) {
825 // Check that there are sufficient characters left in the input.
826 pos_push = PushLocal(current_position_);
827 len_push = PushLocal(capture_length_);
828 BranchOrBacktrack(
829 Comparison(kGT,
830 InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD),
831 pos_push, len_push),
832 Uint64Constant(0)),
833 on_no_match);
834 }
835
836 pos_push = PushLocal(current_position_);
837 len_push = PushLocal(string_param_length_);
838 StoreLocal(match_start_index_, Bind(Add(pos_push, len_push)));
839
840 if (read_backward) {
841 // First check that there are enough characters before this point in
842 // the string that we can match the backreference.
843 BranchOrBacktrack(Comparison(kLT, LoadLocal(match_start_index_),
844 LoadLocal(capture_length_)),
845 on_no_match);
846
847 // The string to check is before the current position, not at it.
848 pos_push = PushLocal(match_start_index_);
849 len_push = PushLocal(capture_length_);
850 StoreLocal(match_start_index_, Bind(Sub(pos_push, len_push)));
851 }
852
853 pos_push = LoadRegister(start_reg);
854 len_push = PushLocal(string_param_length_);
855 StoreLocal(capture_start_index_, Bind(Add(pos_push, len_push)));
856
857 pos_push = PushLocal(match_start_index_);
858 len_push = PushLocal(capture_length_);
859 StoreLocal(match_end_index_, Bind(Add(pos_push, len_push)));
860
861 BlockLabel success;
862 if (mode_ == ASCII) {
863 BlockLabel loop_increment;
864 BlockLabel loop;
865 BindBlock(&loop);
866
867 StoreLocal(char_in_capture_, CharacterAt(capture_start_index_));
868 StoreLocal(char_in_match_, CharacterAt(match_start_index_));
869
870 BranchOrBacktrack(
871 Comparison(kEQ, LoadLocal(char_in_capture_), LoadLocal(char_in_match_)),
872 &loop_increment);
873
874 // Mismatch, try case-insensitive match (converting letters to lower-case).
875 Value* match_char_push = PushLocal(char_in_match_);
876 Value* mask_push = Bind(Uint64Constant(0x20));
877 StoreLocal(
878 char_in_match_,
879 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_OR),
880 match_char_push, mask_push)));
881
882 BlockLabel convert_capture;
883 BlockLabel on_not_in_range;
884 BranchOrBacktrack(
885 Comparison(kLT, LoadLocal(char_in_match_), Uint64Constant('a')),
886 &on_not_in_range);
887 BranchOrBacktrack(
888 Comparison(kGT, LoadLocal(char_in_match_), Uint64Constant('z')),
889 &on_not_in_range);
890 GoTo(&convert_capture);
891 BindBlock(&on_not_in_range);
892
893 // Latin-1: Check for values in range [224,254] but not 247.
894 BranchOrBacktrack(
895 Comparison(kLT, LoadLocal(char_in_match_), Uint64Constant(224)),
896 on_no_match);
897 BranchOrBacktrack(
898 Comparison(kGT, LoadLocal(char_in_match_), Uint64Constant(254)),
899 on_no_match);
900
901 BranchOrBacktrack(
902 Comparison(kEQ, LoadLocal(char_in_match_), Uint64Constant(247)),
903 on_no_match);
904
905 // Also convert capture character.
906 BindBlock(&convert_capture);
907
908 Value* capture_char_push = PushLocal(char_in_capture_);
909 mask_push = Bind(Uint64Constant(0x20));
910 StoreLocal(
911 char_in_capture_,
912 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_OR),
913 capture_char_push, mask_push)));
914
915 BranchOrBacktrack(
916 Comparison(kNE, LoadLocal(char_in_match_), LoadLocal(char_in_capture_)),
917 on_no_match);
918
919 BindBlock(&loop_increment);
920
921 // Increment indexes into capture and match strings.
922 Value* index_push = PushLocal(capture_start_index_);
923 Value* inc_push = Bind(Uint64Constant(1));
924 StoreLocal(capture_start_index_, Bind(Add(index_push, inc_push)));
925
926 index_push = PushLocal(match_start_index_);
927 inc_push = Bind(Uint64Constant(1));
928 StoreLocal(match_start_index_, Bind(Add(index_push, inc_push)));
929
930 // Compare to end of match, and loop if not done.
931 BranchOrBacktrack(Comparison(kLT, LoadLocal(match_start_index_),
932 LoadLocal(match_end_index_)),
933 &loop);
934 } else {
935 ASSERT(mode_ == UC16);
936
937 Value* string_value = Bind(LoadLocal(string_param_));
938 Value* lhs_index_value = Bind(LoadLocal(match_start_index_));
939 Value* rhs_index_value = Bind(LoadLocal(capture_start_index_));
940 Value* length_value = Bind(LoadLocal(capture_length_));
941
942 Definition* is_match_def;
943
944 if (unicode) {
945 is_match_def = new (Z) CaseInsensitiveCompareInstr(
946 string_value, lhs_index_value, rhs_index_value, length_value,
947 kCaseInsensitiveCompareUTF16RuntimeEntry, specialization_cid_);
948 } else {
949 is_match_def = new (Z) CaseInsensitiveCompareInstr(
950 string_value, lhs_index_value, rhs_index_value, length_value,
951 kCaseInsensitiveCompareUCS2RuntimeEntry, specialization_cid_);
952 }
953
954 BranchOrBacktrack(Comparison(kNE, is_match_def, BoolConstant(true)),
955 on_no_match);
956 }
957
958 BindBlock(&success);
959
960 if (read_backward) {
961 // Move current character position to start of match.
962 pos_push = PushLocal(current_position_);
963 len_push = PushLocal(capture_length_);
964 StoreLocal(current_position_, Bind(Sub(pos_push, len_push)));
965 } else {
966 // Move current character position to position after match.
967 Value* match_end_push = PushLocal(match_end_index_);
968 len_push = PushLocal(string_param_length_);
969 StoreLocal(current_position_, Bind(Sub(match_end_push, len_push)));
970 }
971
972 BindBlock(&fallthrough);
973}
974
975void IRRegExpMacroAssembler::CheckNotBackReference(intptr_t start_reg,
976 bool read_backward,
977 BlockLabel* on_no_match) {
978 TAG();
979 ASSERT(start_reg + 1 <= registers_count_);
980
981 BlockLabel fallthrough;
982 BlockLabel success;
983
984 // Find length of back-referenced capture.
985 Value* end_push = LoadRegister(start_reg + 1);
986 Value* start_push = LoadRegister(start_reg);
987 StoreLocal(capture_length_, Bind(Sub(end_push, start_push)));
988
989 // Fail on partial or illegal capture (start of capture after end of capture).
990 BranchOrBacktrack(
991 Comparison(kLT, LoadLocal(capture_length_), Uint64Constant(0)),
992 on_no_match);
993
994 // Succeed on empty capture (including no capture)
995 BranchOrBacktrack(
996 Comparison(kEQ, LoadLocal(capture_length_), Uint64Constant(0)),
997 &fallthrough);
998
999 Value* pos_push = nullptr;
1000 Value* len_push = nullptr;
1001
1002 if (!read_backward) {
1003 // Check that there are sufficient characters left in the input.
1004 pos_push = PushLocal(current_position_);
1005 len_push = PushLocal(capture_length_);
1006 BranchOrBacktrack(
1007 Comparison(kGT,
1008 InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD),
1009 pos_push, len_push),
1010 Uint64Constant(0)),
1011 on_no_match);
1012 }
1013
1014 // Compute pointers to match string and capture string.
1015 pos_push = PushLocal(current_position_);
1016 len_push = PushLocal(string_param_length_);
1017 StoreLocal(match_start_index_, Bind(Add(pos_push, len_push)));
1018
1019 if (read_backward) {
1020 // First check that there are enough characters before this point in
1021 // the string that we can match the backreference.
1022 BranchOrBacktrack(Comparison(kLT, LoadLocal(match_start_index_),
1023 LoadLocal(capture_length_)),
1024 on_no_match);
1025
1026 // The string to check is before the current position, not at it.
1027 pos_push = PushLocal(match_start_index_);
1028 len_push = PushLocal(capture_length_);
1029 StoreLocal(match_start_index_, Bind(Sub(pos_push, len_push)));
1030 }
1031
1032 pos_push = LoadRegister(start_reg);
1033 len_push = PushLocal(string_param_length_);
1034 StoreLocal(capture_start_index_, Bind(Add(pos_push, len_push)));
1035
1036 pos_push = PushLocal(match_start_index_);
1037 len_push = PushLocal(capture_length_);
1038 StoreLocal(match_end_index_, Bind(Add(pos_push, len_push)));
1039
1040 BlockLabel loop;
1041 BindBlock(&loop);
1042
1043 StoreLocal(char_in_capture_, CharacterAt(capture_start_index_));
1044 StoreLocal(char_in_match_, CharacterAt(match_start_index_));
1045
1046 BranchOrBacktrack(
1047 Comparison(kNE, LoadLocal(char_in_capture_), LoadLocal(char_in_match_)),
1048 on_no_match);
1049
1050 // Increment indexes into capture and match strings.
1051 Value* index_push = PushLocal(capture_start_index_);
1052 Value* inc_push = Bind(Uint64Constant(1));
1053 StoreLocal(capture_start_index_, Bind(Add(index_push, inc_push)));
1054
1055 index_push = PushLocal(match_start_index_);
1056 inc_push = Bind(Uint64Constant(1));
1057 StoreLocal(match_start_index_, Bind(Add(index_push, inc_push)));
1058
1059 // Check if we have reached end of match area.
1060 BranchOrBacktrack(Comparison(kLT, LoadLocal(match_start_index_),
1061 LoadLocal(match_end_index_)),
1062 &loop);
1063
1064 BindBlock(&success);
1065
1066 if (read_backward) {
1067 // Move current character position to start of match.
1068 pos_push = PushLocal(current_position_);
1069 len_push = PushLocal(capture_length_);
1070 StoreLocal(current_position_, Bind(Sub(pos_push, len_push)));
1071 } else {
1072 // Move current character position to position after match.
1073 Value* match_end_push = PushLocal(match_end_index_);
1074 len_push = PushLocal(string_param_length_);
1075 StoreLocal(current_position_, Bind(Sub(match_end_push, len_push)));
1076 }
1077
1078 BindBlock(&fallthrough);
1079}
1080
1081void IRRegExpMacroAssembler::CheckNotCharacter(uint32_t c,
1082 BlockLabel* on_not_equal) {
1083 TAG();
1084 BranchOrBacktrack(
1085 Comparison(kNE, LoadLocal(current_character_), Uint64Constant(c)),
1086 on_not_equal);
1087}
1088
1089void IRRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c,
1090 uint32_t mask,
1091 BlockLabel* on_equal) {
1092 TAG();
1093
1094 Definition* actual_def = LoadLocal(current_character_);
1095
1096 Value* actual_push = Bind(actual_def);
1097 Value* mask_push = Bind(Uint64Constant(mask));
1098 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1099 actual_push, mask_push);
1100 Definition* expected_def = Uint64Constant(c);
1101
1102 BranchOrBacktrack(Comparison(kEQ, actual_def, expected_def), on_equal);
1103}
1104
1105void IRRegExpMacroAssembler::CheckNotCharacterAfterAnd(
1106 uint32_t c,
1107 uint32_t mask,
1108 BlockLabel* on_not_equal) {
1109 TAG();
1110
1111 Definition* actual_def = LoadLocal(current_character_);
1112
1113 Value* actual_push = Bind(actual_def);
1114 Value* mask_push = Bind(Uint64Constant(mask));
1115 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1116 actual_push, mask_push);
1117 Definition* expected_def = Uint64Constant(c);
1118
1119 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
1120}
1121
1122void IRRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(
1123 uint16_t c,
1124 uint16_t minus,
1125 uint16_t mask,
1126 BlockLabel* on_not_equal) {
1127 TAG();
1128 ASSERT(minus < Utf16::kMaxCodeUnit); // NOLINT
1129
1130 Definition* actual_def = LoadLocal(current_character_);
1131
1132 Value* actual_push = Bind(actual_def);
1133 Value* minus_push = Bind(Uint64Constant(minus));
1134
1135 actual_push = Bind(Sub(actual_push, minus_push));
1136 Value* mask_push = Bind(Uint64Constant(mask));
1137 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1138 actual_push, mask_push);
1139 Definition* expected_def = Uint64Constant(c);
1140
1141 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal);
1142}
1143
1144void IRRegExpMacroAssembler::CheckCharacterInRange(uint16_t from,
1145 uint16_t to,
1146 BlockLabel* on_in_range) {
1147 TAG();
1148 ASSERT(from <= to);
1149
1150 // TODO(zerny): All range comparisons could be done cheaper with unsigned
1151 // compares. This pattern repeats in various places.
1152
1153 BlockLabel on_not_in_range;
1154 BranchOrBacktrack(
1155 Comparison(kLT, LoadLocal(current_character_), Uint64Constant(from)),
1156 &on_not_in_range);
1157 BranchOrBacktrack(
1158 Comparison(kGT, LoadLocal(current_character_), Uint64Constant(to)),
1159 &on_not_in_range);
1160 BranchOrBacktrack(NULL, on_in_range);
1161
1162 BindBlock(&on_not_in_range);
1163}
1164
1165void IRRegExpMacroAssembler::CheckCharacterNotInRange(
1166 uint16_t from,
1167 uint16_t to,
1168 BlockLabel* on_not_in_range) {
1169 TAG();
1170 ASSERT(from <= to);
1171
1172 BranchOrBacktrack(
1173 Comparison(kLT, LoadLocal(current_character_), Uint64Constant(from)),
1174 on_not_in_range);
1175
1176 BranchOrBacktrack(
1177 Comparison(kGT, LoadLocal(current_character_), Uint64Constant(to)),
1178 on_not_in_range);
1179}
1180
1181void IRRegExpMacroAssembler::CheckBitInTable(const TypedData& table,
1182 BlockLabel* on_bit_set) {
1183 TAG();
1184
1185 Value* table_push = Bind(new (Z) ConstantInstr(table));
1186 Value* index_push = PushLocal(current_character_);
1187
1188 if (mode_ != ASCII || kTableMask != Symbols::kMaxOneCharCodeSymbol) {
1189 Value* mask_push = Bind(Uint64Constant(kTableSize - 1));
1190 index_push =
1191 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND),
1192 index_push, mask_push));
1193 }
1194
1195 Definition* byte_def = InstanceCall(
1196 InstanceCallDescriptor::FromToken(Token::kINDEX), table_push, index_push);
1197 Definition* zero_def = Int64Constant(0);
1198
1199 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_bit_set);
1200}
1201
1202bool IRRegExpMacroAssembler::CheckSpecialCharacterClass(
1203 uint16_t type,
1204 BlockLabel* on_no_match) {
1205 TAG();
1206
1207 // Range checks (c in min..max) are generally implemented by an unsigned
1208 // (c - min) <= (max - min) check
1209 switch (type) {
1210 case 's':
1211 // Match space-characters
1212 if (mode_ == ASCII) {
1213 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
1214 BlockLabel success;
1215 // Space (' ').
1216 BranchOrBacktrack(
1217 Comparison(kEQ, LoadLocal(current_character_), Uint64Constant(' ')),
1218 &success);
1219 // Check range 0x09..0x0d.
1220 CheckCharacterInRange('\t', '\r', &success);
1221 // \u00a0 (NBSP).
1222 BranchOrBacktrack(Comparison(kNE, LoadLocal(current_character_),
1223 Uint64Constant(0x00a0)),
1224 on_no_match);
1225 BindBlock(&success);
1226 return true;
1227 }
1228 return false;
1229 case 'S':
1230 // The emitted code for generic character classes is good enough.
1231 return false;
1232 case 'd':
1233 // Match ASCII digits ('0'..'9')
1234 CheckCharacterNotInRange('0', '9', on_no_match);
1235 return true;
1236 case 'D':
1237 // Match non ASCII-digits
1238 CheckCharacterInRange('0', '9', on_no_match);
1239 return true;
1240 case '.': {
1241 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
1242 BranchOrBacktrack(
1243 Comparison(kEQ, LoadLocal(current_character_), Uint64Constant('\n')),
1244 on_no_match);
1245 BranchOrBacktrack(
1246 Comparison(kEQ, LoadLocal(current_character_), Uint64Constant('\r')),
1247 on_no_match);
1248 if (mode_ == UC16) {
1249 BranchOrBacktrack(Comparison(kEQ, LoadLocal(current_character_),
1250 Uint64Constant(0x2028)),
1251 on_no_match);
1252 BranchOrBacktrack(Comparison(kEQ, LoadLocal(current_character_),
1253 Uint64Constant(0x2029)),
1254 on_no_match);
1255 }
1256 return true;
1257 }
1258 case 'w': {
1259 if (mode_ != ASCII) {
1260 // Table is 128 entries, so all ASCII characters can be tested.
1261 BranchOrBacktrack(
1262 Comparison(kGT, LoadLocal(current_character_), Uint64Constant('z')),
1263 on_no_match);
1264 }
1265
1266 Value* table_push = Bind(WordCharacterMapConstant());
1267 Value* index_push = PushLocal(current_character_);
1268
1269 Definition* byte_def =
1270 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1271 table_push, index_push);
1272 Definition* zero_def = Int64Constant(0);
1273
1274 BranchOrBacktrack(Comparison(kEQ, byte_def, zero_def), on_no_match);
1275
1276 return true;
1277 }
1278 case 'W': {
1279 BlockLabel done;
1280 if (mode_ != ASCII) {
1281 // Table is 128 entries, so all ASCII characters can be tested.
1282 BranchOrBacktrack(
1283 Comparison(kGT, LoadLocal(current_character_), Uint64Constant('z')),
1284 &done);
1285 }
1286
1287 // TODO(zerny): Refactor to use CheckBitInTable if possible.
1288
1289 Value* table_push = Bind(WordCharacterMapConstant());
1290 Value* index_push = PushLocal(current_character_);
1291
1292 Definition* byte_def =
1293 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1294 table_push, index_push);
1295 Definition* zero_def = Int64Constant(0);
1296
1297 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_no_match);
1298
1299 if (mode_ != ASCII) {
1300 BindBlock(&done);
1301 }
1302 return true;
1303 }
1304 // Non-standard classes (with no syntactic shorthand) used internally.
1305 case '*':
1306 // Match any character.
1307 return true;
1308 case 'n': {
1309 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029).
1310 // The opposite of '.'.
1311 BlockLabel success;
1312 BranchOrBacktrack(
1313 Comparison(kEQ, LoadLocal(current_character_), Uint64Constant('\n')),
1314 &success);
1315 BranchOrBacktrack(
1316 Comparison(kEQ, LoadLocal(current_character_), Uint64Constant('\r')),
1317 &success);
1318 if (mode_ == UC16) {
1319 BranchOrBacktrack(Comparison(kEQ, LoadLocal(current_character_),
1320 Uint64Constant(0x2028)),
1321 &success);
1322 BranchOrBacktrack(Comparison(kEQ, LoadLocal(current_character_),
1323 Uint64Constant(0x2029)),
1324 &success);
1325 }
1326 BranchOrBacktrack(NULL, on_no_match);
1327 BindBlock(&success);
1328 return true;
1329 }
1330 // No custom implementation (yet): s(uint16_t), S(uint16_t).
1331 default:
1332 return false;
1333 }
1334}
1335
1336void IRRegExpMacroAssembler::Fail() {
1337 TAG();
1338 ASSERT(FAILURE == 0); // Return value for failure is zero.
1339 if (!global()) {
1340 UNREACHABLE(); // Dart regexps are always global.
1341 }
1342 GoTo(exit_block_);
1343}
1344
1345void IRRegExpMacroAssembler::IfRegisterGE(intptr_t reg,
1346 intptr_t comparand,
1347 BlockLabel* if_ge) {
1348 TAG();
1349 Value* reg_push = LoadRegister(reg);
1350 Value* pos = Bind(Int64Constant(comparand));
1351 BranchOrBacktrack(Comparison(kGTE, reg_push, pos), if_ge);
1352}
1353
1354void IRRegExpMacroAssembler::IfRegisterLT(intptr_t reg,
1355 intptr_t comparand,
1356 BlockLabel* if_lt) {
1357 TAG();
1358 Value* reg_push = LoadRegister(reg);
1359 Value* pos = Bind(Int64Constant(comparand));
1360 BranchOrBacktrack(Comparison(kLT, reg_push, pos), if_lt);
1361}
1362
1363void IRRegExpMacroAssembler::IfRegisterEqPos(intptr_t reg, BlockLabel* if_eq) {
1364 TAG();
1365 Value* reg_push = LoadRegister(reg);
1366 Value* pos = Bind(LoadLocal(current_position_));
1367 BranchOrBacktrack(Comparison(kEQ, reg_push, pos), if_eq);
1368}
1369
1370RegExpMacroAssembler::IrregexpImplementation
1371IRRegExpMacroAssembler::Implementation() {
1372 return kIRImplementation;
1373}
1374
1375void IRRegExpMacroAssembler::LoadCurrentCharacter(intptr_t cp_offset,
1376 BlockLabel* on_end_of_input,
1377 bool check_bounds,
1378 intptr_t characters) {
1379 TAG();
1380 ASSERT(cp_offset < (1 << 30)); // Be sane! (And ensure negation works)
1381 if (check_bounds) {
1382 if (cp_offset >= 0) {
1383 CheckPosition(cp_offset + characters - 1, on_end_of_input);
1384 } else {
1385 CheckPosition(cp_offset, on_end_of_input);
1386 }
1387 }
1388 LoadCurrentCharacterUnchecked(cp_offset, characters);
1389}
1390
1391void IRRegExpMacroAssembler::PopCurrentPosition() {
1392 TAG();
1393 StoreLocal(current_position_, Bind(PopStack()));
1394}
1395
1396void IRRegExpMacroAssembler::PopRegister(intptr_t reg) {
1397 TAG();
1398 ASSERT(reg < registers_count_);
1399 Value* registers_push = PushLocal(registers_);
1400 Value* index_push = PushRegisterIndex(reg);
1401 Value* pop_push = Bind(PopStack());
1402 StoreRegister(registers_push, index_push, pop_push);
1403}
1404
1405void IRRegExpMacroAssembler::PushStack(Definition* definition) {
1406 Value* stack_push = PushLocal(stack_);
1407 Value* stack_pointer_push = PushLocal(stack_pointer_);
1408 StoreLocal(stack_pointer_,
1409 Bind(Add(stack_pointer_push, Bind(Uint64Constant(1)))));
1410 stack_pointer_push = PushLocal(stack_pointer_);
1411 // TODO(zerny): bind value and push could break stack discipline.
1412 Value* value_push = Bind(definition);
1413 Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX),
1414 stack_push, stack_pointer_push, value_push));
1415}
1416
1417Definition* IRRegExpMacroAssembler::PopStack() {
1418 Value* stack_push = PushLocal(stack_);
1419 Value* stack_pointer_push1 = PushLocal(stack_pointer_);
1420 Value* stack_pointer_push2 = PushLocal(stack_pointer_);
1421 StoreLocal(stack_pointer_,
1422 Bind(Sub(stack_pointer_push2, Bind(Uint64Constant(1)))));
1423 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1424 stack_push, stack_pointer_push1);
1425}
1426
1427Definition* IRRegExpMacroAssembler::PeekStack() {
1428 Value* stack_push = PushLocal(stack_);
1429 Value* stack_pointer_push = PushLocal(stack_pointer_);
1430 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX),
1431 stack_push, stack_pointer_push);
1432}
1433
1434// Pushes the location corresponding to label to the backtracking stack.
1435void IRRegExpMacroAssembler::PushBacktrack(BlockLabel* label) {
1436 TAG();
1437
1438 // Ensure that targets of indirect jumps are never accessed through a
1439 // normal control flow instructions by creating a new block for each backtrack
1440 // target.
1441 IndirectEntryInstr* indirect_target = IndirectWithJoinGoto(label->block());
1442
1443 // Add a fake edge from the graph entry for data flow analysis.
1444 entry_block_->AddIndirectEntry(indirect_target);
1445
1446 ConstantInstr* offset = Uint64Constant(indirect_target->indirect_id());
1447 PushStack(offset);
1448 CheckStackLimit();
1449}
1450
1451void IRRegExpMacroAssembler::PushCurrentPosition() {
1452 TAG();
1453 PushStack(LoadLocal(current_position_));
1454}
1455
1456void IRRegExpMacroAssembler::PushRegister(intptr_t reg) {
1457 TAG();
1458 // TODO(zerny): Refactor PushStack so it can be reused here.
1459 Value* stack_push = PushLocal(stack_);
1460 Value* stack_pointer_push = PushLocal(stack_pointer_);
1461 StoreLocal(stack_pointer_,
1462 Bind(Add(stack_pointer_push, Bind(Uint64Constant(1)))));
1463 stack_pointer_push = PushLocal(stack_pointer_);
1464 // TODO(zerny): bind value and push could break stack discipline.
1465 Value* value_push = LoadRegister(reg);
1466 Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX),
1467 stack_push, stack_pointer_push, value_push));
1468 CheckStackLimit();
1469}
1470
1471// Checks that (stack.capacity - stack_limit_slack) > stack_pointer.
1472// This ensures that up to stack_limit_slack stack pushes can be
1473// done without exhausting the stack space. If the check fails the
1474// stack will be grown.
1475void IRRegExpMacroAssembler::CheckStackLimit() {
1476 TAG();
1477 Value* stack_push = PushLocal(stack_);
1478 Value* length_push =
1479 Bind(InstanceCall(InstanceCallDescriptor(String::ZoneHandle(
1480 Field::GetterSymbol(Symbols::Length()))),
1481 stack_push));
1482 Value* capacity_push =
1483 Bind(Sub(length_push, Bind(Uint64Constant(stack_limit_slack()))));
1484 Value* stack_pointer_push = PushLocal(stack_pointer_);
1485 BranchInstr* branch = new (Z) BranchInstr(
1486 Comparison(kGT, capacity_push, stack_pointer_push), GetNextDeoptId());
1487 CloseBlockWith(branch);
1488
1489 BlockLabel grow_stack;
1490 BlockLabel fallthrough;
1491 *branch->true_successor_address() = TargetWithJoinGoto(fallthrough.block());
1492 *branch->false_successor_address() = TargetWithJoinGoto(grow_stack.block());
1493
1494 BindBlock(&grow_stack);
1495 GrowStack();
1496
1497 BindBlock(&fallthrough);
1498}
1499
1500void IRRegExpMacroAssembler::GrowStack() {
1501 TAG();
1502 const Library& lib = Library::Handle(Library::InternalLibrary());
1503 const Function& grow_function = Function::ZoneHandle(
1504 Z, lib.LookupFunctionAllowPrivate(Symbols::GrowRegExpStack()));
1505 StoreLocal(stack_, Bind(StaticCall(grow_function, PushLocal(stack_),
1506 ICData::kStatic)));
1507
1508 // Note: :stack and stack_array_cell content might diverge because each
1509 // instance of :matcher code has its own stack_array_cell embedded into it
1510 // as a constant but :stack is a local variable and its value might be
1511 // comming from OSR or deoptimization. This means we should never use
1512 // stack_array_cell in the body of the :matcher to reload the :stack.
1513 Value* stack_cell_push = Bind(new (Z) ConstantInstr(stack_array_cell_));
1514 Value* index_push = Bind(Uint64Constant(0));
1515 Value* stack_push = PushLocal(stack_);
1516 Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX),
1517 stack_cell_push, index_push, stack_push));
1518}
1519
1520void IRRegExpMacroAssembler::ReadCurrentPositionFromRegister(intptr_t reg) {
1521 TAG();
1522 StoreLocal(current_position_, LoadRegister(reg));
1523}
1524
1525// Resets the tip of the stack to the value stored in reg.
1526void IRRegExpMacroAssembler::ReadStackPointerFromRegister(intptr_t reg) {
1527 TAG();
1528 ASSERT(reg < registers_count_);
1529 StoreLocal(stack_pointer_, LoadRegister(reg));
1530}
1531
1532void IRRegExpMacroAssembler::SetCurrentPositionFromEnd(intptr_t by) {
1533 TAG();
1534
1535 BlockLabel after_position;
1536
1537 Definition* cur_pos_def = LoadLocal(current_position_);
1538 Definition* by_value_def = Int64Constant(-by);
1539
1540 BranchOrBacktrack(Comparison(kGTE, cur_pos_def, by_value_def),
1541 &after_position);
1542
1543 StoreLocal(current_position_, Bind(Int64Constant(-by)));
1544
1545 // On RegExp code entry (where this operation is used), the character before
1546 // the current position is expected to be already loaded.
1547 // We have advanced the position, so it's safe to read backwards.
1548 LoadCurrentCharacterUnchecked(-1, 1);
1549
1550 BindBlock(&after_position);
1551}
1552
1553void IRRegExpMacroAssembler::SetRegister(intptr_t reg, intptr_t to) {
1554 TAG();
1555 // Reserved for positions!
1556 ASSERT(reg >= saved_registers_count_);
1557 StoreRegister(reg, to);
1558}
1559
1560bool IRRegExpMacroAssembler::Succeed() {
1561 TAG();
1562 GoTo(success_block_);
1563 return global();
1564}
1565
1566void IRRegExpMacroAssembler::WriteCurrentPositionToRegister(
1567 intptr_t reg,
1568 intptr_t cp_offset) {
1569 TAG();
1570
1571 Value* registers_push = PushLocal(registers_);
1572 Value* index_push = PushRegisterIndex(reg);
1573 Value* pos_push = PushLocal(current_position_);
1574 Value* off_push = Bind(Int64Constant(cp_offset));
1575 Value* neg_off_push = Bind(Add(pos_push, off_push));
1576 // Push the negative offset; these are converted to positive string positions
1577 // within the success block.
1578 StoreRegister(registers_push, index_push, neg_off_push);
1579}
1580
1581void IRRegExpMacroAssembler::ClearRegisters(intptr_t reg_from,
1582 intptr_t reg_to) {
1583 TAG();
1584
1585 ASSERT(reg_from <= reg_to);
1586
1587 // In order to clear registers to a final result value of -1, set them to
1588 // (-1 - string length), the offset of -1 from the end of the string.
1589
1590 for (intptr_t reg = reg_from; reg <= reg_to; reg++) {
1591 Value* registers_push = PushLocal(registers_);
1592 Value* index_push = PushRegisterIndex(reg);
1593 Value* minus_one_push = Bind(Int64Constant(-1));
1594 Value* length_push = PushLocal(string_param_length_);
1595 Value* value_push = Bind(Sub(minus_one_push, length_push));
1596 StoreRegister(registers_push, index_push, value_push);
1597 }
1598}
1599
1600void IRRegExpMacroAssembler::WriteStackPointerToRegister(intptr_t reg) {
1601 TAG();
1602
1603 Value* registers_push = PushLocal(registers_);
1604 Value* index_push = PushRegisterIndex(reg);
1605 Value* tip_push = PushLocal(stack_pointer_);
1606 StoreRegister(registers_push, index_push, tip_push);
1607}
1608
1609// Private methods:
1610
1611void IRRegExpMacroAssembler::CheckPosition(intptr_t cp_offset,
1612 BlockLabel* on_outside_input) {
1613 TAG();
1614 if (cp_offset >= 0) {
1615 Definition* curpos_def = LoadLocal(current_position_);
1616 Definition* cp_off_def = Int64Constant(-cp_offset);
1617 // If (current_position_ < -cp_offset), we are in bounds.
1618 // Remember, current_position_ is a negative offset from the string end.
1619
1620 BranchOrBacktrack(Comparison(kGTE, curpos_def, cp_off_def),
1621 on_outside_input);
1622 } else {
1623 // We need to see if there's enough characters left in the string to go
1624 // back cp_offset characters, so get the normalized position and then
1625 // make sure that (normalized_position >= -cp_offset).
1626 Value* pos_push = PushLocal(current_position_);
1627 Value* len_push = PushLocal(string_param_length_);
1628 BranchOrBacktrack(
1629 Comparison(kLT, Add(pos_push, len_push), Uint64Constant(-cp_offset)),
1630 on_outside_input);
1631 }
1632}
1633
1634void IRRegExpMacroAssembler::BranchOrBacktrack(ComparisonInstr* comparison,
1635 BlockLabel* true_successor) {
1636 if (comparison == NULL) { // No condition
1637 if (true_successor == NULL) {
1638 Backtrack();
1639 return;
1640 }
1641 GoTo(true_successor);
1642 return;
1643 }
1644
1645 // If no successor block has been passed in, backtrack.
1646 JoinEntryInstr* true_successor_block = backtrack_block_;
1647 if (true_successor != NULL) {
1648 true_successor->SetLinked();
1649 true_successor_block = true_successor->block();
1650 }
1651 ASSERT(true_successor_block != NULL);
1652
1653 // If the condition is not true, fall through to a new block.
1654 BlockLabel fallthrough;
1655
1656 BranchInstr* branch = new (Z) BranchInstr(comparison, GetNextDeoptId());
1657 *branch->true_successor_address() = TargetWithJoinGoto(true_successor_block);
1658 *branch->false_successor_address() = TargetWithJoinGoto(fallthrough.block());
1659
1660 CloseBlockWith(branch);
1661 BindBlock(&fallthrough);
1662}
1663
1664TargetEntryInstr* IRRegExpMacroAssembler::TargetWithJoinGoto(
1665 JoinEntryInstr* dst) {
1666 TargetEntryInstr* target = new (Z)
1667 TargetEntryInstr(block_id_.Alloc(), kInvalidTryIndex, GetNextDeoptId());
1668 blocks_.Add(target);
1669
1670 target->AppendInstruction(new (Z) GotoInstr(dst, GetNextDeoptId()));
1671
1672 return target;
1673}
1674
1675IndirectEntryInstr* IRRegExpMacroAssembler::IndirectWithJoinGoto(
1676 JoinEntryInstr* dst) {
1677 IndirectEntryInstr* target =
1678 new (Z) IndirectEntryInstr(block_id_.Alloc(), indirect_id_.Alloc(),
1679 kInvalidTryIndex, GetNextDeoptId());
1680 blocks_.Add(target);
1681
1682 target->AppendInstruction(new (Z) GotoInstr(dst, GetNextDeoptId()));
1683
1684 return target;
1685}
1686
1687void IRRegExpMacroAssembler::CheckPreemption(bool is_backtrack) {
1688 TAG();
1689
1690 // We don't have the loop_depth available when compiling regexps, but
1691 // we set loop_depth to a non-zero value because this instruction does
1692 // not act as an OSR entry outside loops.
1693 AppendInstruction(new (Z) CheckStackOverflowInstr(
1694 TokenPosition::kNoSource,
1695 /*stack_depth=*/0,
1696 /*loop_depth=*/1, GetNextDeoptId(),
1697 is_backtrack ? CheckStackOverflowInstr::kOsrAndPreemption
1698 : CheckStackOverflowInstr::kOsrOnly));
1699}
1700
1701Definition* IRRegExpMacroAssembler::Add(Value* lhs, Value* rhs) {
1702 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD), lhs, rhs);
1703}
1704
1705Definition* IRRegExpMacroAssembler::Sub(Value* lhs, Value* rhs) {
1706 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kSUB), lhs, rhs);
1707}
1708
1709void IRRegExpMacroAssembler::LoadCurrentCharacterUnchecked(
1710 intptr_t cp_offset,
1711 intptr_t characters) {
1712 TAG();
1713
1714 ASSERT(characters == 1 || CanReadUnaligned());
1715 if (mode_ == ASCII) {
1716 ASSERT(characters == 1 || characters == 2 || characters == 4);
1717 } else {
1718 ASSERT(mode_ == UC16);
1719 ASSERT(characters == 1 || characters == 2);
1720 }
1721
1722 // Calculate the addressed string index as:
1723 // cp_offset + current_position_ + string_param_length_
1724 // TODO(zerny): Avoid generating 'add' instance-calls here.
1725 Value* off_arg = Bind(Int64Constant(cp_offset));
1726 Value* pos_arg = BindLoadLocal(*current_position_);
1727 Value* off_pos_arg = Bind(Add(off_arg, pos_arg));
1728 Value* len_arg = BindLoadLocal(*string_param_length_);
1729 // Index is stored in a temporary local so that we can later load it safely.
1730 StoreLocal(index_temp_, Bind(Add(off_pos_arg, len_arg)));
1731
1732 // Load and store the code units.
1733 Value* code_unit_value = LoadCodeUnitsAt(index_temp_, characters);
1734 StoreLocal(current_character_, code_unit_value);
1735 PRINT(PushLocal(current_character_));
1736}
1737
1738Value* IRRegExpMacroAssembler::CharacterAt(LocalVariable* index) {
1739 return LoadCodeUnitsAt(index, 1);
1740}
1741
1742Value* IRRegExpMacroAssembler::LoadCodeUnitsAt(LocalVariable* index,
1743 intptr_t characters) {
1744 // Bind the pattern as the load receiver.
1745 Value* pattern_val = BindLoadLocal(*string_param_);
1746 if (IsExternalStringClassId(specialization_cid_)) {
1747 // The data of an external string is stored through one indirection.
1748 intptr_t external_offset = 0;
1749 if (specialization_cid_ == kExternalOneByteStringCid) {
1750 external_offset = ExternalOneByteString::external_data_offset();
1751 } else if (specialization_cid_ == kExternalTwoByteStringCid) {
1752 external_offset = ExternalTwoByteString::external_data_offset();
1753 } else {
1754 UNREACHABLE();
1755 }
1756 // This pushes an untagged value on the stack which is immediately consumed
1757 // by LoadCodeUnitsAtInstr below.
1758 pattern_val = Bind(new (Z) LoadUntaggedInstr(pattern_val, external_offset));
1759 }
1760
1761 // Here pattern_val might be untagged so this must not trigger a GC.
1762 Value* index_val = BindLoadLocal(*index);
1763
1764 return Bind(new (Z) LoadCodeUnitsInstr(pattern_val, index_val, characters,
1765 specialization_cid_,
1766 TokenPosition::kNoSource));
1767}
1768
1769#undef __
1770
1771} // namespace dart
1772
1773#endif // !defined(DART_PRECOMPILED_RUNTIME)
1774