1 | // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file |
2 | // for details. All rights reserved. Use of this source code is governed by a |
3 | // BSD-style license that can be found in the LICENSE file. |
4 | |
5 | // A simple interpreter for the Irregexp byte code. |
6 | |
7 | #include "vm/regexp_interpreter.h" |
8 | |
9 | #include <memory> |
10 | #include <utility> |
11 | |
12 | #include "platform/unicode.h" |
13 | #include "vm/object.h" |
14 | #include "vm/regexp_assembler.h" |
15 | #include "vm/regexp_bytecodes.h" |
16 | #include "vm/unibrow-inl.h" |
17 | #include "vm/unibrow.h" |
18 | |
19 | namespace dart { |
20 | |
21 | DEFINE_FLAG(bool, trace_regexp_bytecodes, false, "trace_regexp_bytecodes" ); |
22 | |
23 | typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize; |
24 | |
25 | template <typename Char> |
26 | static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize, |
27 | intptr_t from, |
28 | intptr_t current, |
29 | intptr_t len, |
30 | const String& subject, |
31 | bool unicode); |
32 | |
33 | template <> |
34 | bool BackRefMatchesNoCase<uint16_t>(Canonicalize* interp_canonicalize, |
35 | intptr_t from, |
36 | intptr_t current, |
37 | intptr_t len, |
38 | const String& subject, |
39 | bool unicode) { |
40 | Bool& ret = Bool::Handle(); |
41 | if (unicode) { |
42 | ret = static_cast<BoolPtr>(CaseInsensitiveCompareUTF16( |
43 | static_cast<uword>(subject.raw()), static_cast<uword>(Smi::New(from)), |
44 | static_cast<uword>(Smi::New(current)), |
45 | static_cast<uword>(Smi::New(len)))); |
46 | } else { |
47 | ret = static_cast<BoolPtr>(CaseInsensitiveCompareUCS2( |
48 | static_cast<uword>(subject.raw()), static_cast<uword>(Smi::New(from)), |
49 | static_cast<uword>(Smi::New(current)), |
50 | static_cast<uword>(Smi::New(len)))); |
51 | } |
52 | return ret.value(); |
53 | } |
54 | |
55 | template <> |
56 | bool BackRefMatchesNoCase<uint8_t>(Canonicalize* interp_canonicalize, |
57 | intptr_t from, |
58 | intptr_t current, |
59 | intptr_t len, |
60 | const String& subject, |
61 | bool unicode) { |
62 | // For Latin1 characters the unicode flag makes no difference. |
63 | for (int i = 0; i < len; i++) { |
64 | unsigned int old_char = subject.CharAt(from++); |
65 | unsigned int new_char = subject.CharAt(current++); |
66 | if (old_char == new_char) continue; |
67 | // Convert both characters to lower case. |
68 | old_char |= 0x20; |
69 | new_char |= 0x20; |
70 | if (old_char != new_char) return false; |
71 | // Not letters in the ASCII range and Latin-1 range. |
72 | if (!(old_char - 'a' <= 'z' - 'a') && |
73 | !(old_char - 224 <= 254 - 224 && old_char != 247)) { |
74 | return false; |
75 | } |
76 | } |
77 | return true; |
78 | } |
79 | |
80 | #ifdef DEBUG |
81 | static void TraceInterpreter(const uint8_t* code_base, |
82 | const uint8_t* pc, |
83 | int stack_depth, |
84 | int current_position, |
85 | uint32_t current_char, |
86 | int bytecode_length, |
87 | const char* bytecode_name) { |
88 | if (FLAG_trace_regexp_bytecodes) { |
89 | bool printable = (current_char < 127 && current_char >= 32); |
90 | const char* format = |
91 | printable |
92 | ? "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" |
93 | : "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s" ; |
94 | OS::PrintErr(format, pc - code_base, stack_depth, current_position, |
95 | current_char, printable ? current_char : '.', bytecode_name); |
96 | for (int i = 0; i < bytecode_length; i++) { |
97 | OS::PrintErr(", %02x" , pc[i]); |
98 | } |
99 | OS::PrintErr(" " ); |
100 | for (int i = 1; i < bytecode_length; i++) { |
101 | unsigned char b = pc[i]; |
102 | if (b < 127 && b >= 32) { |
103 | OS::PrintErr("%c" , b); |
104 | } else { |
105 | OS::PrintErr("." ); |
106 | } |
107 | } |
108 | OS::PrintErr("\n" ); |
109 | } |
110 | } |
111 | |
112 | #define BYTECODE(name) \ |
113 | case BC_##name: \ |
114 | TraceInterpreter(code_base, pc, \ |
115 | static_cast<int>(backtrack_sp - backtrack_stack_base), \ |
116 | current, current_char, BC_##name##_LENGTH, #name); |
117 | #else |
118 | #define BYTECODE(name) case BC_##name: |
119 | #endif |
120 | |
121 | static int32_t Load32Aligned(const uint8_t* pc) { |
122 | ASSERT((reinterpret_cast<intptr_t>(pc) & 3) == 0); |
123 | return *reinterpret_cast<const int32_t*>(pc); |
124 | } |
125 | |
126 | static int32_t Load16Aligned(const uint8_t* pc) { |
127 | ASSERT((reinterpret_cast<intptr_t>(pc) & 1) == 0); |
128 | return *reinterpret_cast<const uint16_t*>(pc); |
129 | } |
130 | |
131 | // A simple abstraction over the backtracking stack used by the interpreter. |
132 | // This backtracking stack does not grow automatically, but it ensures that the |
133 | // the memory held by the stack is released or remembered in a cache if the |
134 | // matching terminates. |
135 | class BacktrackStack { |
136 | public: |
137 | explicit BacktrackStack(Zone* zone) { |
138 | memory_ = Isolate::Current()->TakeRegexpBacktrackStack(); |
139 | // Note: using malloc here has a potential of triggering jemalloc/tcmalloc |
140 | // bugs which cause application to leak memory and eventually OOM. |
141 | // See https://github.com/dart-lang/sdk/issues/38820 and |
142 | // https://github.com/flutter/flutter/issues/29007 for examples. |
143 | // So intead we directly ask OS to provide us memory. |
144 | if (memory_ == nullptr) { |
145 | memory_ = std::unique_ptr<VirtualMemory>(VirtualMemory::Allocate( |
146 | sizeof(intptr_t) * kBacktrackStackSize, /*is_executable=*/false, |
147 | "regexp-backtrack-stack" )); |
148 | } |
149 | if (memory_ == nullptr) { |
150 | OUT_OF_MEMORY(); |
151 | } |
152 | } |
153 | |
154 | ~BacktrackStack() { |
155 | Isolate::Current()->CacheRegexpBacktrackStack(std::move(memory_)); |
156 | } |
157 | |
158 | intptr_t* data() const { |
159 | return reinterpret_cast<intptr_t*>(memory_->address()); |
160 | } |
161 | |
162 | intptr_t max_size() const { return kBacktrackStackSize; } |
163 | |
164 | private: |
165 | static const intptr_t kBacktrackStackSize = 1 << 16; |
166 | |
167 | std::unique_ptr<VirtualMemory> memory_; |
168 | |
169 | DISALLOW_COPY_AND_ASSIGN(BacktrackStack); |
170 | }; |
171 | |
172 | template <typename Char> |
173 | static IrregexpInterpreter::IrregexpResult RawMatch(const uint8_t* code_base, |
174 | const String& subject, |
175 | int32_t* registers, |
176 | intptr_t current, |
177 | uint32_t current_char, |
178 | Zone* zone) { |
179 | const uint8_t* pc = code_base; |
180 | // BacktrackStack ensures that the memory allocated for the backtracking stack |
181 | // is returned to the system or cached if there is no stack being cached at |
182 | // the moment. |
183 | BacktrackStack backtrack_stack(zone); |
184 | intptr_t* backtrack_stack_base = backtrack_stack.data(); |
185 | intptr_t* backtrack_sp = backtrack_stack_base; |
186 | intptr_t backtrack_stack_space = backtrack_stack.max_size(); |
187 | |
188 | // TODO(zerny): Optimize as single instance. V8 has this as an |
189 | // isolate member. |
190 | unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize; |
191 | |
192 | intptr_t subject_length = subject.Length(); |
193 | |
194 | #ifdef DEBUG |
195 | if (FLAG_trace_regexp_bytecodes) { |
196 | OS::PrintErr("Start irregexp bytecode interpreter\n" ); |
197 | } |
198 | #endif |
199 | while (true) { |
200 | int32_t insn = Load32Aligned(pc); |
201 | switch (insn & BYTECODE_MASK) { |
202 | BYTECODE(BREAK) |
203 | UNREACHABLE(); |
204 | return IrregexpInterpreter::RE_FAILURE; |
205 | BYTECODE(PUSH_CP) |
206 | if (--backtrack_stack_space < 0) { |
207 | return IrregexpInterpreter::RE_EXCEPTION; |
208 | } |
209 | *backtrack_sp++ = current; |
210 | pc += BC_PUSH_CP_LENGTH; |
211 | break; |
212 | BYTECODE(PUSH_BT) |
213 | if (--backtrack_stack_space < 0) { |
214 | return IrregexpInterpreter::RE_EXCEPTION; |
215 | } |
216 | *backtrack_sp++ = Load32Aligned(pc + 4); |
217 | pc += BC_PUSH_BT_LENGTH; |
218 | break; |
219 | BYTECODE(PUSH_REGISTER) |
220 | if (--backtrack_stack_space < 0) { |
221 | return IrregexpInterpreter::RE_EXCEPTION; |
222 | } |
223 | *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT]; |
224 | pc += BC_PUSH_REGISTER_LENGTH; |
225 | break; |
226 | BYTECODE(SET_REGISTER) |
227 | registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4); |
228 | pc += BC_SET_REGISTER_LENGTH; |
229 | break; |
230 | BYTECODE(ADVANCE_REGISTER) |
231 | registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4); |
232 | pc += BC_ADVANCE_REGISTER_LENGTH; |
233 | break; |
234 | BYTECODE(SET_REGISTER_TO_CP) |
235 | registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4); |
236 | pc += BC_SET_REGISTER_TO_CP_LENGTH; |
237 | break; |
238 | BYTECODE(SET_CP_TO_REGISTER) |
239 | current = registers[insn >> BYTECODE_SHIFT]; |
240 | pc += BC_SET_CP_TO_REGISTER_LENGTH; |
241 | break; |
242 | BYTECODE(SET_REGISTER_TO_SP) |
243 | registers[insn >> BYTECODE_SHIFT] = |
244 | static_cast<int>(backtrack_sp - backtrack_stack_base); |
245 | pc += BC_SET_REGISTER_TO_SP_LENGTH; |
246 | break; |
247 | BYTECODE(SET_SP_TO_REGISTER) |
248 | backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT]; |
249 | backtrack_stack_space = |
250 | backtrack_stack.max_size() - |
251 | static_cast<int>(backtrack_sp - backtrack_stack_base); |
252 | pc += BC_SET_SP_TO_REGISTER_LENGTH; |
253 | break; |
254 | BYTECODE(POP_CP) |
255 | backtrack_stack_space++; |
256 | --backtrack_sp; |
257 | current = *backtrack_sp; |
258 | pc += BC_POP_CP_LENGTH; |
259 | break; |
260 | BYTECODE(POP_BT) |
261 | backtrack_stack_space++; |
262 | --backtrack_sp; |
263 | pc = code_base + *backtrack_sp; |
264 | break; |
265 | BYTECODE(POP_REGISTER) |
266 | backtrack_stack_space++; |
267 | --backtrack_sp; |
268 | registers[insn >> BYTECODE_SHIFT] = *backtrack_sp; |
269 | pc += BC_POP_REGISTER_LENGTH; |
270 | break; |
271 | BYTECODE(FAIL) |
272 | return IrregexpInterpreter::RE_FAILURE; |
273 | BYTECODE(SUCCEED) |
274 | return IrregexpInterpreter::RE_SUCCESS; |
275 | BYTECODE(ADVANCE_CP) |
276 | current += insn >> BYTECODE_SHIFT; |
277 | pc += BC_ADVANCE_CP_LENGTH; |
278 | break; |
279 | BYTECODE(GOTO) |
280 | pc = code_base + Load32Aligned(pc + 4); |
281 | break; |
282 | BYTECODE(ADVANCE_CP_AND_GOTO) |
283 | current += insn >> BYTECODE_SHIFT; |
284 | pc = code_base + Load32Aligned(pc + 4); |
285 | break; |
286 | BYTECODE(CHECK_GREEDY) |
287 | if (current == backtrack_sp[-1]) { |
288 | backtrack_sp--; |
289 | backtrack_stack_space++; |
290 | pc = code_base + Load32Aligned(pc + 4); |
291 | } else { |
292 | pc += BC_CHECK_GREEDY_LENGTH; |
293 | } |
294 | break; |
295 | BYTECODE(LOAD_CURRENT_CHAR) { |
296 | int pos = current + (insn >> BYTECODE_SHIFT); |
297 | if (pos < 0 || pos >= subject_length) { |
298 | pc = code_base + Load32Aligned(pc + 4); |
299 | } else { |
300 | current_char = subject.CharAt(pos); |
301 | pc += BC_LOAD_CURRENT_CHAR_LENGTH; |
302 | } |
303 | break; |
304 | } |
305 | BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) { |
306 | int pos = current + (insn >> BYTECODE_SHIFT); |
307 | current_char = subject.CharAt(pos); |
308 | pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH; |
309 | break; |
310 | } |
311 | BYTECODE(LOAD_2_CURRENT_CHARS) { |
312 | int pos = current + (insn >> BYTECODE_SHIFT); |
313 | if (pos + 2 > subject_length) { |
314 | pc = code_base + Load32Aligned(pc + 4); |
315 | } else { |
316 | Char next = subject.CharAt(pos + 1); |
317 | current_char = |
318 | subject.CharAt(pos) | (next << (kBitsPerByte * sizeof(Char))); |
319 | pc += BC_LOAD_2_CURRENT_CHARS_LENGTH; |
320 | } |
321 | break; |
322 | } |
323 | BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) { |
324 | int pos = current + (insn >> BYTECODE_SHIFT); |
325 | Char next = subject.CharAt(pos + 1); |
326 | current_char = |
327 | subject.CharAt(pos) | (next << (kBitsPerByte * sizeof(Char))); |
328 | pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH; |
329 | break; |
330 | } |
331 | BYTECODE(LOAD_4_CURRENT_CHARS) { |
332 | ASSERT(sizeof(Char) == 1); |
333 | int pos = current + (insn >> BYTECODE_SHIFT); |
334 | if (pos + 4 > subject_length) { |
335 | pc = code_base + Load32Aligned(pc + 4); |
336 | } else { |
337 | Char next1 = subject.CharAt(pos + 1); |
338 | Char next2 = subject.CharAt(pos + 2); |
339 | Char next3 = subject.CharAt(pos + 3); |
340 | current_char = (subject.CharAt(pos) | (next1 << 8) | (next2 << 16) | |
341 | (next3 << 24)); |
342 | pc += BC_LOAD_4_CURRENT_CHARS_LENGTH; |
343 | } |
344 | break; |
345 | } |
346 | BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) { |
347 | ASSERT(sizeof(Char) == 1); |
348 | int pos = current + (insn >> BYTECODE_SHIFT); |
349 | Char next1 = subject.CharAt(pos + 1); |
350 | Char next2 = subject.CharAt(pos + 2); |
351 | Char next3 = subject.CharAt(pos + 3); |
352 | current_char = (subject.CharAt(pos) | (next1 << 8) | (next2 << 16) | |
353 | (next3 << 24)); |
354 | pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH; |
355 | break; |
356 | } |
357 | BYTECODE(CHECK_4_CHARS) { |
358 | uint32_t c = Load32Aligned(pc + 4); |
359 | if (c == current_char) { |
360 | pc = code_base + Load32Aligned(pc + 8); |
361 | } else { |
362 | pc += BC_CHECK_4_CHARS_LENGTH; |
363 | } |
364 | break; |
365 | } |
366 | BYTECODE(CHECK_CHAR) { |
367 | uint32_t c = (insn >> BYTECODE_SHIFT); |
368 | if (c == current_char) { |
369 | pc = code_base + Load32Aligned(pc + 4); |
370 | } else { |
371 | pc += BC_CHECK_CHAR_LENGTH; |
372 | } |
373 | break; |
374 | } |
375 | BYTECODE(CHECK_NOT_4_CHARS) { |
376 | uint32_t c = Load32Aligned(pc + 4); |
377 | if (c != current_char) { |
378 | pc = code_base + Load32Aligned(pc + 8); |
379 | } else { |
380 | pc += BC_CHECK_NOT_4_CHARS_LENGTH; |
381 | } |
382 | break; |
383 | } |
384 | BYTECODE(CHECK_NOT_CHAR) { |
385 | uint32_t c = (insn >> BYTECODE_SHIFT); |
386 | if (c != current_char) { |
387 | pc = code_base + Load32Aligned(pc + 4); |
388 | } else { |
389 | pc += BC_CHECK_NOT_CHAR_LENGTH; |
390 | } |
391 | break; |
392 | } |
393 | BYTECODE(AND_CHECK_4_CHARS) { |
394 | uint32_t c = Load32Aligned(pc + 4); |
395 | if (c == (current_char & Load32Aligned(pc + 8))) { |
396 | pc = code_base + Load32Aligned(pc + 12); |
397 | } else { |
398 | pc += BC_AND_CHECK_4_CHARS_LENGTH; |
399 | } |
400 | break; |
401 | } |
402 | BYTECODE(AND_CHECK_CHAR) { |
403 | uint32_t c = (insn >> BYTECODE_SHIFT); |
404 | if (c == (current_char & Load32Aligned(pc + 4))) { |
405 | pc = code_base + Load32Aligned(pc + 8); |
406 | } else { |
407 | pc += BC_AND_CHECK_CHAR_LENGTH; |
408 | } |
409 | break; |
410 | } |
411 | BYTECODE(AND_CHECK_NOT_4_CHARS) { |
412 | uint32_t c = Load32Aligned(pc + 4); |
413 | if (c != (current_char & Load32Aligned(pc + 8))) { |
414 | pc = code_base + Load32Aligned(pc + 12); |
415 | } else { |
416 | pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH; |
417 | } |
418 | break; |
419 | } |
420 | BYTECODE(AND_CHECK_NOT_CHAR) { |
421 | uint32_t c = (insn >> BYTECODE_SHIFT); |
422 | if (c != (current_char & Load32Aligned(pc + 4))) { |
423 | pc = code_base + Load32Aligned(pc + 8); |
424 | } else { |
425 | pc += BC_AND_CHECK_NOT_CHAR_LENGTH; |
426 | } |
427 | break; |
428 | } |
429 | BYTECODE(MINUS_AND_CHECK_NOT_CHAR) { |
430 | uint32_t c = (insn >> BYTECODE_SHIFT); |
431 | uint32_t minus = Load16Aligned(pc + 4); |
432 | uint32_t mask = Load16Aligned(pc + 6); |
433 | if (c != ((current_char - minus) & mask)) { |
434 | pc = code_base + Load32Aligned(pc + 8); |
435 | } else { |
436 | pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH; |
437 | } |
438 | break; |
439 | } |
440 | BYTECODE(CHECK_CHAR_IN_RANGE) { |
441 | uint32_t from = Load16Aligned(pc + 4); |
442 | uint32_t to = Load16Aligned(pc + 6); |
443 | if (from <= current_char && current_char <= to) { |
444 | pc = code_base + Load32Aligned(pc + 8); |
445 | } else { |
446 | pc += BC_CHECK_CHAR_IN_RANGE_LENGTH; |
447 | } |
448 | break; |
449 | } |
450 | BYTECODE(CHECK_CHAR_NOT_IN_RANGE) { |
451 | uint32_t from = Load16Aligned(pc + 4); |
452 | uint32_t to = Load16Aligned(pc + 6); |
453 | if (from > current_char || current_char > to) { |
454 | pc = code_base + Load32Aligned(pc + 8); |
455 | } else { |
456 | pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH; |
457 | } |
458 | break; |
459 | } |
460 | BYTECODE(CHECK_BIT_IN_TABLE) { |
461 | int mask = RegExpMacroAssembler::kTableMask; |
462 | uint8_t b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)]; |
463 | int bit = (current_char & (kBitsPerByte - 1)); |
464 | if ((b & (1 << bit)) != 0) { |
465 | pc = code_base + Load32Aligned(pc + 4); |
466 | } else { |
467 | pc += BC_CHECK_BIT_IN_TABLE_LENGTH; |
468 | } |
469 | break; |
470 | } |
471 | BYTECODE(CHECK_LT) { |
472 | uint32_t limit = (insn >> BYTECODE_SHIFT); |
473 | if (current_char < limit) { |
474 | pc = code_base + Load32Aligned(pc + 4); |
475 | } else { |
476 | pc += BC_CHECK_LT_LENGTH; |
477 | } |
478 | break; |
479 | } |
480 | BYTECODE(CHECK_GT) { |
481 | uint32_t limit = (insn >> BYTECODE_SHIFT); |
482 | if (current_char > limit) { |
483 | pc = code_base + Load32Aligned(pc + 4); |
484 | } else { |
485 | pc += BC_CHECK_GT_LENGTH; |
486 | } |
487 | break; |
488 | } |
489 | BYTECODE(CHECK_REGISTER_LT) |
490 | if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) { |
491 | pc = code_base + Load32Aligned(pc + 8); |
492 | } else { |
493 | pc += BC_CHECK_REGISTER_LT_LENGTH; |
494 | } |
495 | break; |
496 | BYTECODE(CHECK_REGISTER_GE) |
497 | if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) { |
498 | pc = code_base + Load32Aligned(pc + 8); |
499 | } else { |
500 | pc += BC_CHECK_REGISTER_GE_LENGTH; |
501 | } |
502 | break; |
503 | BYTECODE(CHECK_REGISTER_EQ_POS) |
504 | if (registers[insn >> BYTECODE_SHIFT] == current) { |
505 | pc = code_base + Load32Aligned(pc + 4); |
506 | } else { |
507 | pc += BC_CHECK_REGISTER_EQ_POS_LENGTH; |
508 | } |
509 | break; |
510 | BYTECODE(CHECK_NOT_REGS_EQUAL) |
511 | if (registers[insn >> BYTECODE_SHIFT] == |
512 | registers[Load32Aligned(pc + 4)]) { |
513 | pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH; |
514 | } else { |
515 | pc = code_base + Load32Aligned(pc + 8); |
516 | } |
517 | break; |
518 | BYTECODE(CHECK_NOT_BACK_REF) { |
519 | int from = registers[insn >> BYTECODE_SHIFT]; |
520 | int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; |
521 | if (from < 0 || len <= 0) { |
522 | pc += BC_CHECK_NOT_BACK_REF_LENGTH; |
523 | break; |
524 | } |
525 | if (current + len > subject_length) { |
526 | pc = code_base + Load32Aligned(pc + 4); |
527 | break; |
528 | } else { |
529 | int i; |
530 | for (i = 0; i < len; i++) { |
531 | if (subject.CharAt(from + i) != subject.CharAt(current + i)) { |
532 | pc = code_base + Load32Aligned(pc + 4); |
533 | break; |
534 | } |
535 | } |
536 | if (i < len) break; |
537 | current += len; |
538 | } |
539 | pc += BC_CHECK_NOT_BACK_REF_LENGTH; |
540 | break; |
541 | } |
542 | BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) |
543 | FALL_THROUGH; |
544 | BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) { |
545 | const bool unicode = |
546 | (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE; |
547 | int from = registers[insn >> BYTECODE_SHIFT]; |
548 | int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; |
549 | if (from < 0 || len <= 0) { |
550 | pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; |
551 | break; |
552 | } |
553 | if (current + len > subject_length) { |
554 | pc = code_base + Load32Aligned(pc + 4); |
555 | break; |
556 | } else { |
557 | if (BackRefMatchesNoCase<Char>(&canonicalize, from, current, len, |
558 | subject, unicode)) { |
559 | current += len; |
560 | pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH; |
561 | } else { |
562 | pc = code_base + Load32Aligned(pc + 4); |
563 | } |
564 | } |
565 | break; |
566 | } |
567 | BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) { |
568 | const int from = registers[insn >> BYTECODE_SHIFT]; |
569 | const int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; |
570 | if (from < 0 || len <= 0) { |
571 | pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH; |
572 | break; |
573 | } |
574 | if ((current - len) < 0) { |
575 | pc = code_base + Load32Aligned(pc + 4); |
576 | break; |
577 | } else { |
578 | // When looking behind, the string to match (if it is there) lies |
579 | // before the current position, so we will check the [len] characters |
580 | // before the current position, excluding the current position itself. |
581 | const int start = current - len; |
582 | int i; |
583 | for (i = 0; i < len; i++) { |
584 | if (subject.CharAt(from + i) != subject.CharAt(start + i)) { |
585 | pc = code_base + Load32Aligned(pc + 4); |
586 | break; |
587 | } |
588 | } |
589 | if (i < len) break; |
590 | current -= len; |
591 | } |
592 | pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH; |
593 | break; |
594 | } |
595 | BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD) |
596 | FALL_THROUGH; |
597 | BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) { |
598 | bool unicode = (insn & BYTECODE_MASK) == |
599 | BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD; |
600 | int from = registers[insn >> BYTECODE_SHIFT]; |
601 | int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; |
602 | if (from < 0 || len <= 0) { |
603 | pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; |
604 | break; |
605 | } |
606 | if (current < len) { |
607 | pc = code_base + Load32Aligned(pc + 4); |
608 | break; |
609 | } else { |
610 | if (BackRefMatchesNoCase<Char>(&canonicalize, from, current - len, |
611 | len, subject, unicode)) { |
612 | current -= len; |
613 | pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; |
614 | } else { |
615 | pc = code_base + Load32Aligned(pc + 4); |
616 | } |
617 | } |
618 | break; |
619 | } |
620 | BYTECODE(CHECK_AT_START) |
621 | if (current == 0) { |
622 | pc = code_base + Load32Aligned(pc + 4); |
623 | } else { |
624 | pc += BC_CHECK_AT_START_LENGTH; |
625 | } |
626 | break; |
627 | BYTECODE(CHECK_NOT_AT_START) { |
628 | const int32_t cp_offset = insn >> BYTECODE_SHIFT; |
629 | if (current + cp_offset == 0) { |
630 | pc += BC_CHECK_NOT_AT_START_LENGTH; |
631 | } else { |
632 | pc = code_base + Load32Aligned(pc + 4); |
633 | } |
634 | break; |
635 | } |
636 | BYTECODE(SET_CURRENT_POSITION_FROM_END) { |
637 | int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT; |
638 | if (subject_length - current > by) { |
639 | current = subject_length - by; |
640 | current_char = subject.CharAt(current - 1); |
641 | } |
642 | pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH; |
643 | break; |
644 | } |
645 | default: |
646 | UNREACHABLE(); |
647 | break; |
648 | } |
649 | } |
650 | } |
651 | |
652 | IrregexpInterpreter::IrregexpResult IrregexpInterpreter::Match( |
653 | const TypedData& bytecode, |
654 | const String& subject, |
655 | int32_t* registers, |
656 | intptr_t start_position, |
657 | Zone* zone) { |
658 | NoSafepointScope no_safepoint; |
659 | const uint8_t* code_base = reinterpret_cast<uint8_t*>(bytecode.DataAddr(0)); |
660 | |
661 | uint16_t previous_char = '\n'; |
662 | if (start_position != 0) { |
663 | previous_char = subject.CharAt(start_position - 1); |
664 | } |
665 | |
666 | if (subject.IsOneByteString() || subject.IsExternalOneByteString()) { |
667 | return RawMatch<uint8_t>(code_base, subject, registers, start_position, |
668 | previous_char, zone); |
669 | } else if (subject.IsTwoByteString() || subject.IsExternalTwoByteString()) { |
670 | return RawMatch<uint16_t>(code_base, subject, registers, start_position, |
671 | previous_char, zone); |
672 | } else { |
673 | UNREACHABLE(); |
674 | return IrregexpInterpreter::RE_FAILURE; |
675 | } |
676 | } |
677 | |
678 | } // namespace dart |
679 | |