1// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2// for details. All rights reserved. Use of this source code is governed by a
3// BSD-style license that can be found in the LICENSE file.
4
5#include "vm/regexp_assembler.h"
6
7#include "unicode/uchar.h"
8
9#include "platform/unicode.h"
10
11#include "vm/flags.h"
12#include "vm/regexp.h"
13#include "vm/runtime_entry.h"
14#include "vm/unibrow-inl.h"
15
16namespace dart {
17
18void PrintUtf16(uint16_t c) {
19 const char* format =
20 (0x20 <= c && c <= 0x7F) ? "%c" : (c <= 0xff) ? "\\x%02x" : "\\u%04x";
21 OS::PrintErr(format, c);
22}
23
24uword /*BoolPtr*/ CaseInsensitiveCompareUCS2(uword /*StringPtr*/ str_raw,
25 uword /*SmiPtr*/ lhs_index_raw,
26 uword /*SmiPtr*/ rhs_index_raw,
27 uword /*SmiPtr*/ length_raw) {
28 const String& str = String::Handle(static_cast<StringPtr>(str_raw));
29 const Smi& lhs_index = Smi::Handle(static_cast<SmiPtr>(lhs_index_raw));
30 const Smi& rhs_index = Smi::Handle(static_cast<SmiPtr>(rhs_index_raw));
31 const Smi& length = Smi::Handle(static_cast<SmiPtr>(length_raw));
32
33 // TODO(zerny): Optimize as single instance. V8 has this as an
34 // isolate member.
35 unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize;
36
37 for (intptr_t i = 0; i < length.Value(); i++) {
38 int32_t c1 = str.CharAt(lhs_index.Value() + i);
39 int32_t c2 = str.CharAt(rhs_index.Value() + i);
40 if (c1 != c2) {
41 int32_t s1[1] = {c1};
42 canonicalize.get(c1, '\0', s1);
43 if (s1[0] != c2) {
44 int32_t s2[1] = {c2};
45 canonicalize.get(c2, '\0', s2);
46 if (s1[0] != s2[0]) {
47 return static_cast<uword>(Bool::False().raw());
48 }
49 }
50 }
51 }
52 return static_cast<uword>(Bool::True().raw());
53}
54
55uword /*BoolPtr*/ CaseInsensitiveCompareUTF16(uword /*StringPtr*/ str_raw,
56 uword /*SmiPtr*/ lhs_index_raw,
57 uword /*SmiPtr*/ rhs_index_raw,
58 uword /*SmiPtr*/ length_raw) {
59 const String& str = String::Handle(static_cast<StringPtr>(str_raw));
60 const Smi& lhs_index = Smi::Handle(static_cast<SmiPtr>(lhs_index_raw));
61 const Smi& rhs_index = Smi::Handle(static_cast<SmiPtr>(rhs_index_raw));
62 const Smi& length = Smi::Handle(static_cast<SmiPtr>(length_raw));
63
64 for (intptr_t i = 0; i < length.Value(); i++) {
65 int32_t c1 = str.CharAt(lhs_index.Value() + i);
66 int32_t c2 = str.CharAt(rhs_index.Value() + i);
67 if (Utf16::IsLeadSurrogate(c1)) {
68 // Non-BMP characters do not have case-equivalents in the BMP.
69 // Both have to be non-BMP for them to be able to match.
70 if (!Utf16::IsLeadSurrogate(c2))
71 return static_cast<uword>(Bool::False().raw());
72 if (i + 1 < length.Value()) {
73 uint16_t c1t = str.CharAt(lhs_index.Value() + i + 1);
74 uint16_t c2t = str.CharAt(rhs_index.Value() + i + 1);
75 if (Utf16::IsTrailSurrogate(c1t) && Utf16::IsTrailSurrogate(c2t)) {
76 c1 = Utf16::Decode(c1, c1t);
77 c2 = Utf16::Decode(c2, c2t);
78 i++;
79 }
80 }
81 }
82 c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT);
83 c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT);
84 if (c1 != c2) return static_cast<uword>(Bool::False().raw());
85 }
86 return static_cast<uword>(Bool::True().raw());
87}
88
89DEFINE_RAW_LEAF_RUNTIME_ENTRY(
90 CaseInsensitiveCompareUCS2,
91 4,
92 false /* is_float */,
93 reinterpret_cast<RuntimeFunction>(&CaseInsensitiveCompareUCS2));
94
95DEFINE_RAW_LEAF_RUNTIME_ENTRY(
96 CaseInsensitiveCompareUTF16,
97 4,
98 false /* is_float */,
99 reinterpret_cast<RuntimeFunction>(&CaseInsensitiveCompareUTF16));
100
101BlockLabel::BlockLabel() {
102#if !defined(DART_PRECOMPILED_RUNTIME)
103 if (!FLAG_interpret_irregexp) {
104 // Only needed by the compiled IR backend.
105 block_ =
106 new JoinEntryInstr(-1, -1, CompilerState::Current().GetNextDeoptId());
107 }
108#endif
109}
110
111RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone)
112 : slow_safe_compiler_(false), global_mode_(NOT_GLOBAL), zone_(zone) {}
113
114RegExpMacroAssembler::~RegExpMacroAssembler() {}
115
116void RegExpMacroAssembler::CheckNotInSurrogatePair(intptr_t cp_offset,
117 BlockLabel* on_failure) {
118 BlockLabel ok;
119 // Check that current character is not a trail surrogate.
120 LoadCurrentCharacter(cp_offset, &ok);
121 CheckCharacterNotInRange(Utf16::kTrailSurrogateStart,
122 Utf16::kTrailSurrogateEnd, &ok);
123 // Check that previous character is not a lead surrogate.
124 LoadCurrentCharacter(cp_offset - 1, &ok);
125 CheckCharacterInRange(Utf16::kLeadSurrogateStart, Utf16::kLeadSurrogateEnd,
126 on_failure);
127 BindBlock(&ok);
128}
129
130} // namespace dart
131