1// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
2// for details. All rights reserved. Use of this source code is governed by a
3// BSD-style license that can be found in the LICENSE file.
4
5// A simple interpreter for the Irregexp byte code.
6
7#include "vm/regexp_interpreter.h"
8
9#include <memory>
10#include <utility>
11
12#include "platform/unicode.h"
13#include "vm/object.h"
14#include "vm/regexp_assembler.h"
15#include "vm/regexp_bytecodes.h"
16#include "vm/unibrow-inl.h"
17#include "vm/unibrow.h"
18
19namespace dart {
20
21DEFINE_FLAG(bool, trace_regexp_bytecodes, false, "trace_regexp_bytecodes");
22
23typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize;
24
25template <typename Char>
26static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize,
27 intptr_t from,
28 intptr_t current,
29 intptr_t len,
30 const String& subject,
31 bool unicode);
32
33template <>
34bool BackRefMatchesNoCase<uint16_t>(Canonicalize* interp_canonicalize,
35 intptr_t from,
36 intptr_t current,
37 intptr_t len,
38 const String& subject,
39 bool unicode) {
40 Bool& ret = Bool::Handle();
41 if (unicode) {
42 ret = static_cast<BoolPtr>(CaseInsensitiveCompareUTF16(
43 static_cast<uword>(subject.raw()), static_cast<uword>(Smi::New(from)),
44 static_cast<uword>(Smi::New(current)),
45 static_cast<uword>(Smi::New(len))));
46 } else {
47 ret = static_cast<BoolPtr>(CaseInsensitiveCompareUCS2(
48 static_cast<uword>(subject.raw()), static_cast<uword>(Smi::New(from)),
49 static_cast<uword>(Smi::New(current)),
50 static_cast<uword>(Smi::New(len))));
51 }
52 return ret.value();
53}
54
55template <>
56bool BackRefMatchesNoCase<uint8_t>(Canonicalize* interp_canonicalize,
57 intptr_t from,
58 intptr_t current,
59 intptr_t len,
60 const String& subject,
61 bool unicode) {
62 // For Latin1 characters the unicode flag makes no difference.
63 for (int i = 0; i < len; i++) {
64 unsigned int old_char = subject.CharAt(from++);
65 unsigned int new_char = subject.CharAt(current++);
66 if (old_char == new_char) continue;
67 // Convert both characters to lower case.
68 old_char |= 0x20;
69 new_char |= 0x20;
70 if (old_char != new_char) return false;
71 // Not letters in the ASCII range and Latin-1 range.
72 if (!(old_char - 'a' <= 'z' - 'a') &&
73 !(old_char - 224 <= 254 - 224 && old_char != 247)) {
74 return false;
75 }
76 }
77 return true;
78}
79
80#ifdef DEBUG
81static void TraceInterpreter(const uint8_t* code_base,
82 const uint8_t* pc,
83 int stack_depth,
84 int current_position,
85 uint32_t current_char,
86 int bytecode_length,
87 const char* bytecode_name) {
88 if (FLAG_trace_regexp_bytecodes) {
89 bool printable = (current_char < 127 && current_char >= 32);
90 const char* format =
91 printable
92 ? "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s"
93 : "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
94 OS::PrintErr(format, pc - code_base, stack_depth, current_position,
95 current_char, printable ? current_char : '.', bytecode_name);
96 for (int i = 0; i < bytecode_length; i++) {
97 OS::PrintErr(", %02x", pc[i]);
98 }
99 OS::PrintErr(" ");
100 for (int i = 1; i < bytecode_length; i++) {
101 unsigned char b = pc[i];
102 if (b < 127 && b >= 32) {
103 OS::PrintErr("%c", b);
104 } else {
105 OS::PrintErr(".");
106 }
107 }
108 OS::PrintErr("\n");
109 }
110}
111
112#define BYTECODE(name) \
113 case BC_##name: \
114 TraceInterpreter(code_base, pc, \
115 static_cast<int>(backtrack_sp - backtrack_stack_base), \
116 current, current_char, BC_##name##_LENGTH, #name);
117#else
118#define BYTECODE(name) case BC_##name:
119#endif
120
121static int32_t Load32Aligned(const uint8_t* pc) {
122 ASSERT((reinterpret_cast<intptr_t>(pc) & 3) == 0);
123 return *reinterpret_cast<const int32_t*>(pc);
124}
125
126static int32_t Load16Aligned(const uint8_t* pc) {
127 ASSERT((reinterpret_cast<intptr_t>(pc) & 1) == 0);
128 return *reinterpret_cast<const uint16_t*>(pc);
129}
130
131// A simple abstraction over the backtracking stack used by the interpreter.
132// This backtracking stack does not grow automatically, but it ensures that the
133// the memory held by the stack is released or remembered in a cache if the
134// matching terminates.
135class BacktrackStack {
136 public:
137 explicit BacktrackStack(Zone* zone) {
138 memory_ = Isolate::Current()->TakeRegexpBacktrackStack();
139 // Note: using malloc here has a potential of triggering jemalloc/tcmalloc
140 // bugs which cause application to leak memory and eventually OOM.
141 // See https://github.com/dart-lang/sdk/issues/38820 and
142 // https://github.com/flutter/flutter/issues/29007 for examples.
143 // So intead we directly ask OS to provide us memory.
144 if (memory_ == nullptr) {
145 memory_ = std::unique_ptr<VirtualMemory>(VirtualMemory::Allocate(
146 sizeof(intptr_t) * kBacktrackStackSize, /*is_executable=*/false,
147 "regexp-backtrack-stack"));
148 }
149 if (memory_ == nullptr) {
150 OUT_OF_MEMORY();
151 }
152 }
153
154 ~BacktrackStack() {
155 Isolate::Current()->CacheRegexpBacktrackStack(std::move(memory_));
156 }
157
158 intptr_t* data() const {
159 return reinterpret_cast<intptr_t*>(memory_->address());
160 }
161
162 intptr_t max_size() const { return kBacktrackStackSize; }
163
164 private:
165 static const intptr_t kBacktrackStackSize = 1 << 16;
166
167 std::unique_ptr<VirtualMemory> memory_;
168
169 DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
170};
171
172template <typename Char>
173static IrregexpInterpreter::IrregexpResult RawMatch(const uint8_t* code_base,
174 const String& subject,
175 int32_t* registers,
176 intptr_t current,
177 uint32_t current_char,
178 Zone* zone) {
179 const uint8_t* pc = code_base;
180 // BacktrackStack ensures that the memory allocated for the backtracking stack
181 // is returned to the system or cached if there is no stack being cached at
182 // the moment.
183 BacktrackStack backtrack_stack(zone);
184 intptr_t* backtrack_stack_base = backtrack_stack.data();
185 intptr_t* backtrack_sp = backtrack_stack_base;
186 intptr_t backtrack_stack_space = backtrack_stack.max_size();
187
188 // TODO(zerny): Optimize as single instance. V8 has this as an
189 // isolate member.
190 unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
191
192 intptr_t subject_length = subject.Length();
193
194#ifdef DEBUG
195 if (FLAG_trace_regexp_bytecodes) {
196 OS::PrintErr("Start irregexp bytecode interpreter\n");
197 }
198#endif
199 while (true) {
200 int32_t insn = Load32Aligned(pc);
201 switch (insn & BYTECODE_MASK) {
202 BYTECODE(BREAK)
203 UNREACHABLE();
204 return IrregexpInterpreter::RE_FAILURE;
205 BYTECODE(PUSH_CP)
206 if (--backtrack_stack_space < 0) {
207 return IrregexpInterpreter::RE_EXCEPTION;
208 }
209 *backtrack_sp++ = current;
210 pc += BC_PUSH_CP_LENGTH;
211 break;
212 BYTECODE(PUSH_BT)
213 if (--backtrack_stack_space < 0) {
214 return IrregexpInterpreter::RE_EXCEPTION;
215 }
216 *backtrack_sp++ = Load32Aligned(pc + 4);
217 pc += BC_PUSH_BT_LENGTH;
218 break;
219 BYTECODE(PUSH_REGISTER)
220 if (--backtrack_stack_space < 0) {
221 return IrregexpInterpreter::RE_EXCEPTION;
222 }
223 *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT];
224 pc += BC_PUSH_REGISTER_LENGTH;
225 break;
226 BYTECODE(SET_REGISTER)
227 registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
228 pc += BC_SET_REGISTER_LENGTH;
229 break;
230 BYTECODE(ADVANCE_REGISTER)
231 registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
232 pc += BC_ADVANCE_REGISTER_LENGTH;
233 break;
234 BYTECODE(SET_REGISTER_TO_CP)
235 registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
236 pc += BC_SET_REGISTER_TO_CP_LENGTH;
237 break;
238 BYTECODE(SET_CP_TO_REGISTER)
239 current = registers[insn >> BYTECODE_SHIFT];
240 pc += BC_SET_CP_TO_REGISTER_LENGTH;
241 break;
242 BYTECODE(SET_REGISTER_TO_SP)
243 registers[insn >> BYTECODE_SHIFT] =
244 static_cast<int>(backtrack_sp - backtrack_stack_base);
245 pc += BC_SET_REGISTER_TO_SP_LENGTH;
246 break;
247 BYTECODE(SET_SP_TO_REGISTER)
248 backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT];
249 backtrack_stack_space =
250 backtrack_stack.max_size() -
251 static_cast<int>(backtrack_sp - backtrack_stack_base);
252 pc += BC_SET_SP_TO_REGISTER_LENGTH;
253 break;
254 BYTECODE(POP_CP)
255 backtrack_stack_space++;
256 --backtrack_sp;
257 current = *backtrack_sp;
258 pc += BC_POP_CP_LENGTH;
259 break;
260 BYTECODE(POP_BT)
261 backtrack_stack_space++;
262 --backtrack_sp;
263 pc = code_base + *backtrack_sp;
264 break;
265 BYTECODE(POP_REGISTER)
266 backtrack_stack_space++;
267 --backtrack_sp;
268 registers[insn >> BYTECODE_SHIFT] = *backtrack_sp;
269 pc += BC_POP_REGISTER_LENGTH;
270 break;
271 BYTECODE(FAIL)
272 return IrregexpInterpreter::RE_FAILURE;
273 BYTECODE(SUCCEED)
274 return IrregexpInterpreter::RE_SUCCESS;
275 BYTECODE(ADVANCE_CP)
276 current += insn >> BYTECODE_SHIFT;
277 pc += BC_ADVANCE_CP_LENGTH;
278 break;
279 BYTECODE(GOTO)
280 pc = code_base + Load32Aligned(pc + 4);
281 break;
282 BYTECODE(ADVANCE_CP_AND_GOTO)
283 current += insn >> BYTECODE_SHIFT;
284 pc = code_base + Load32Aligned(pc + 4);
285 break;
286 BYTECODE(CHECK_GREEDY)
287 if (current == backtrack_sp[-1]) {
288 backtrack_sp--;
289 backtrack_stack_space++;
290 pc = code_base + Load32Aligned(pc + 4);
291 } else {
292 pc += BC_CHECK_GREEDY_LENGTH;
293 }
294 break;
295 BYTECODE(LOAD_CURRENT_CHAR) {
296 int pos = current + (insn >> BYTECODE_SHIFT);
297 if (pos < 0 || pos >= subject_length) {
298 pc = code_base + Load32Aligned(pc + 4);
299 } else {
300 current_char = subject.CharAt(pos);
301 pc += BC_LOAD_CURRENT_CHAR_LENGTH;
302 }
303 break;
304 }
305 BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
306 int pos = current + (insn >> BYTECODE_SHIFT);
307 current_char = subject.CharAt(pos);
308 pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
309 break;
310 }
311 BYTECODE(LOAD_2_CURRENT_CHARS) {
312 int pos = current + (insn >> BYTECODE_SHIFT);
313 if (pos + 2 > subject_length) {
314 pc = code_base + Load32Aligned(pc + 4);
315 } else {
316 Char next = subject.CharAt(pos + 1);
317 current_char =
318 subject.CharAt(pos) | (next << (kBitsPerByte * sizeof(Char)));
319 pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
320 }
321 break;
322 }
323 BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
324 int pos = current + (insn >> BYTECODE_SHIFT);
325 Char next = subject.CharAt(pos + 1);
326 current_char =
327 subject.CharAt(pos) | (next << (kBitsPerByte * sizeof(Char)));
328 pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
329 break;
330 }
331 BYTECODE(LOAD_4_CURRENT_CHARS) {
332 ASSERT(sizeof(Char) == 1);
333 int pos = current + (insn >> BYTECODE_SHIFT);
334 if (pos + 4 > subject_length) {
335 pc = code_base + Load32Aligned(pc + 4);
336 } else {
337 Char next1 = subject.CharAt(pos + 1);
338 Char next2 = subject.CharAt(pos + 2);
339 Char next3 = subject.CharAt(pos + 3);
340 current_char = (subject.CharAt(pos) | (next1 << 8) | (next2 << 16) |
341 (next3 << 24));
342 pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
343 }
344 break;
345 }
346 BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
347 ASSERT(sizeof(Char) == 1);
348 int pos = current + (insn >> BYTECODE_SHIFT);
349 Char next1 = subject.CharAt(pos + 1);
350 Char next2 = subject.CharAt(pos + 2);
351 Char next3 = subject.CharAt(pos + 3);
352 current_char = (subject.CharAt(pos) | (next1 << 8) | (next2 << 16) |
353 (next3 << 24));
354 pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
355 break;
356 }
357 BYTECODE(CHECK_4_CHARS) {
358 uint32_t c = Load32Aligned(pc + 4);
359 if (c == current_char) {
360 pc = code_base + Load32Aligned(pc + 8);
361 } else {
362 pc += BC_CHECK_4_CHARS_LENGTH;
363 }
364 break;
365 }
366 BYTECODE(CHECK_CHAR) {
367 uint32_t c = (insn >> BYTECODE_SHIFT);
368 if (c == current_char) {
369 pc = code_base + Load32Aligned(pc + 4);
370 } else {
371 pc += BC_CHECK_CHAR_LENGTH;
372 }
373 break;
374 }
375 BYTECODE(CHECK_NOT_4_CHARS) {
376 uint32_t c = Load32Aligned(pc + 4);
377 if (c != current_char) {
378 pc = code_base + Load32Aligned(pc + 8);
379 } else {
380 pc += BC_CHECK_NOT_4_CHARS_LENGTH;
381 }
382 break;
383 }
384 BYTECODE(CHECK_NOT_CHAR) {
385 uint32_t c = (insn >> BYTECODE_SHIFT);
386 if (c != current_char) {
387 pc = code_base + Load32Aligned(pc + 4);
388 } else {
389 pc += BC_CHECK_NOT_CHAR_LENGTH;
390 }
391 break;
392 }
393 BYTECODE(AND_CHECK_4_CHARS) {
394 uint32_t c = Load32Aligned(pc + 4);
395 if (c == (current_char & Load32Aligned(pc + 8))) {
396 pc = code_base + Load32Aligned(pc + 12);
397 } else {
398 pc += BC_AND_CHECK_4_CHARS_LENGTH;
399 }
400 break;
401 }
402 BYTECODE(AND_CHECK_CHAR) {
403 uint32_t c = (insn >> BYTECODE_SHIFT);
404 if (c == (current_char & Load32Aligned(pc + 4))) {
405 pc = code_base + Load32Aligned(pc + 8);
406 } else {
407 pc += BC_AND_CHECK_CHAR_LENGTH;
408 }
409 break;
410 }
411 BYTECODE(AND_CHECK_NOT_4_CHARS) {
412 uint32_t c = Load32Aligned(pc + 4);
413 if (c != (current_char & Load32Aligned(pc + 8))) {
414 pc = code_base + Load32Aligned(pc + 12);
415 } else {
416 pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
417 }
418 break;
419 }
420 BYTECODE(AND_CHECK_NOT_CHAR) {
421 uint32_t c = (insn >> BYTECODE_SHIFT);
422 if (c != (current_char & Load32Aligned(pc + 4))) {
423 pc = code_base + Load32Aligned(pc + 8);
424 } else {
425 pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
426 }
427 break;
428 }
429 BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
430 uint32_t c = (insn >> BYTECODE_SHIFT);
431 uint32_t minus = Load16Aligned(pc + 4);
432 uint32_t mask = Load16Aligned(pc + 6);
433 if (c != ((current_char - minus) & mask)) {
434 pc = code_base + Load32Aligned(pc + 8);
435 } else {
436 pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
437 }
438 break;
439 }
440 BYTECODE(CHECK_CHAR_IN_RANGE) {
441 uint32_t from = Load16Aligned(pc + 4);
442 uint32_t to = Load16Aligned(pc + 6);
443 if (from <= current_char && current_char <= to) {
444 pc = code_base + Load32Aligned(pc + 8);
445 } else {
446 pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
447 }
448 break;
449 }
450 BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
451 uint32_t from = Load16Aligned(pc + 4);
452 uint32_t to = Load16Aligned(pc + 6);
453 if (from > current_char || current_char > to) {
454 pc = code_base + Load32Aligned(pc + 8);
455 } else {
456 pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
457 }
458 break;
459 }
460 BYTECODE(CHECK_BIT_IN_TABLE) {
461 int mask = RegExpMacroAssembler::kTableMask;
462 uint8_t b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
463 int bit = (current_char & (kBitsPerByte - 1));
464 if ((b & (1 << bit)) != 0) {
465 pc = code_base + Load32Aligned(pc + 4);
466 } else {
467 pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
468 }
469 break;
470 }
471 BYTECODE(CHECK_LT) {
472 uint32_t limit = (insn >> BYTECODE_SHIFT);
473 if (current_char < limit) {
474 pc = code_base + Load32Aligned(pc + 4);
475 } else {
476 pc += BC_CHECK_LT_LENGTH;
477 }
478 break;
479 }
480 BYTECODE(CHECK_GT) {
481 uint32_t limit = (insn >> BYTECODE_SHIFT);
482 if (current_char > limit) {
483 pc = code_base + Load32Aligned(pc + 4);
484 } else {
485 pc += BC_CHECK_GT_LENGTH;
486 }
487 break;
488 }
489 BYTECODE(CHECK_REGISTER_LT)
490 if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) {
491 pc = code_base + Load32Aligned(pc + 8);
492 } else {
493 pc += BC_CHECK_REGISTER_LT_LENGTH;
494 }
495 break;
496 BYTECODE(CHECK_REGISTER_GE)
497 if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) {
498 pc = code_base + Load32Aligned(pc + 8);
499 } else {
500 pc += BC_CHECK_REGISTER_GE_LENGTH;
501 }
502 break;
503 BYTECODE(CHECK_REGISTER_EQ_POS)
504 if (registers[insn >> BYTECODE_SHIFT] == current) {
505 pc = code_base + Load32Aligned(pc + 4);
506 } else {
507 pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
508 }
509 break;
510 BYTECODE(CHECK_NOT_REGS_EQUAL)
511 if (registers[insn >> BYTECODE_SHIFT] ==
512 registers[Load32Aligned(pc + 4)]) {
513 pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
514 } else {
515 pc = code_base + Load32Aligned(pc + 8);
516 }
517 break;
518 BYTECODE(CHECK_NOT_BACK_REF) {
519 int from = registers[insn >> BYTECODE_SHIFT];
520 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
521 if (from < 0 || len <= 0) {
522 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
523 break;
524 }
525 if (current + len > subject_length) {
526 pc = code_base + Load32Aligned(pc + 4);
527 break;
528 } else {
529 int i;
530 for (i = 0; i < len; i++) {
531 if (subject.CharAt(from + i) != subject.CharAt(current + i)) {
532 pc = code_base + Load32Aligned(pc + 4);
533 break;
534 }
535 }
536 if (i < len) break;
537 current += len;
538 }
539 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
540 break;
541 }
542 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE)
543 FALL_THROUGH;
544 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
545 const bool unicode =
546 (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE;
547 int from = registers[insn >> BYTECODE_SHIFT];
548 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
549 if (from < 0 || len <= 0) {
550 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
551 break;
552 }
553 if (current + len > subject_length) {
554 pc = code_base + Load32Aligned(pc + 4);
555 break;
556 } else {
557 if (BackRefMatchesNoCase<Char>(&canonicalize, from, current, len,
558 subject, unicode)) {
559 current += len;
560 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
561 } else {
562 pc = code_base + Load32Aligned(pc + 4);
563 }
564 }
565 break;
566 }
567 BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
568 const int from = registers[insn >> BYTECODE_SHIFT];
569 const int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
570 if (from < 0 || len <= 0) {
571 pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
572 break;
573 }
574 if ((current - len) < 0) {
575 pc = code_base + Load32Aligned(pc + 4);
576 break;
577 } else {
578 // When looking behind, the string to match (if it is there) lies
579 // before the current position, so we will check the [len] characters
580 // before the current position, excluding the current position itself.
581 const int start = current - len;
582 int i;
583 for (i = 0; i < len; i++) {
584 if (subject.CharAt(from + i) != subject.CharAt(start + i)) {
585 pc = code_base + Load32Aligned(pc + 4);
586 break;
587 }
588 }
589 if (i < len) break;
590 current -= len;
591 }
592 pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
593 break;
594 }
595 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD)
596 FALL_THROUGH;
597 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
598 bool unicode = (insn & BYTECODE_MASK) ==
599 BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD;
600 int from = registers[insn >> BYTECODE_SHIFT];
601 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
602 if (from < 0 || len <= 0) {
603 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
604 break;
605 }
606 if (current < len) {
607 pc = code_base + Load32Aligned(pc + 4);
608 break;
609 } else {
610 if (BackRefMatchesNoCase<Char>(&canonicalize, from, current - len,
611 len, subject, unicode)) {
612 current -= len;
613 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
614 } else {
615 pc = code_base + Load32Aligned(pc + 4);
616 }
617 }
618 break;
619 }
620 BYTECODE(CHECK_AT_START)
621 if (current == 0) {
622 pc = code_base + Load32Aligned(pc + 4);
623 } else {
624 pc += BC_CHECK_AT_START_LENGTH;
625 }
626 break;
627 BYTECODE(CHECK_NOT_AT_START) {
628 const int32_t cp_offset = insn >> BYTECODE_SHIFT;
629 if (current + cp_offset == 0) {
630 pc += BC_CHECK_NOT_AT_START_LENGTH;
631 } else {
632 pc = code_base + Load32Aligned(pc + 4);
633 }
634 break;
635 }
636 BYTECODE(SET_CURRENT_POSITION_FROM_END) {
637 int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
638 if (subject_length - current > by) {
639 current = subject_length - by;
640 current_char = subject.CharAt(current - 1);
641 }
642 pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
643 break;
644 }
645 default:
646 UNREACHABLE();
647 break;
648 }
649 }
650}
651
652IrregexpInterpreter::IrregexpResult IrregexpInterpreter::Match(
653 const TypedData& bytecode,
654 const String& subject,
655 int32_t* registers,
656 intptr_t start_position,
657 Zone* zone) {
658 NoSafepointScope no_safepoint;
659 const uint8_t* code_base = reinterpret_cast<uint8_t*>(bytecode.DataAddr(0));
660
661 uint16_t previous_char = '\n';
662 if (start_position != 0) {
663 previous_char = subject.CharAt(start_position - 1);
664 }
665
666 if (subject.IsOneByteString() || subject.IsExternalOneByteString()) {
667 return RawMatch<uint8_t>(code_base, subject, registers, start_position,
668 previous_char, zone);
669 } else if (subject.IsTwoByteString() || subject.IsExternalTwoByteString()) {
670 return RawMatch<uint16_t>(code_base, subject, registers, start_position,
671 previous_char, zone);
672 } else {
673 UNREACHABLE();
674 return IrregexpInterpreter::RE_FAILURE;
675 }
676}
677
678} // namespace dart
679