1// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2// for details. All rights reserved. Use of this source code is governed by a
3// BSD-style license that can be found in the LICENSE file.
4
5#ifndef RUNTIME_VM_REGEXP_ASSEMBLER_H_
6#define RUNTIME_VM_REGEXP_ASSEMBLER_H_
7
8#include "vm/object.h"
9
10#if !defined(DART_PRECOMPILED_RUNTIME)
11#include "vm/compiler/assembler/assembler.h"
12#include "vm/compiler/backend/il.h"
13#endif // !defined(DART_PRECOMPILED_RUNTIME)
14
15namespace dart {
16
17// Utility function for the DotPrinter
18void PrintUtf16(uint16_t c);
19
20extern "C" {
21// Compares two-byte strings case insensitively as UCS2.
22// Called from generated RegExp code.
23uword /*BoolPtr*/ CaseInsensitiveCompareUCS2(uword /*StringPtr*/ str_raw,
24 uword /*SmiPtr*/ lhs_index_raw,
25 uword /*SmiPtr*/ rhs_index_raw,
26 uword /*SmiPtr*/ length_raw);
27
28// Compares two-byte strings case insensitively as UTF16.
29// Called from generated RegExp code.
30uword /*BoolPtr*/ CaseInsensitiveCompareUTF16(uword /*StringPtr*/ str_raw,
31 uword /*SmiPtr*/ lhs_index_raw,
32 uword /*SmiPtr*/ rhs_index_raw,
33 uword /*SmiPtr*/ length_raw);
34}
35
36/// Convenience wrapper around a BlockEntryInstr pointer.
37class BlockLabel : public ValueObject {
38 // Used by the IR assembler.
39 public:
40 BlockLabel();
41 ~BlockLabel() { ASSERT(!is_linked()); }
42
43 intptr_t pos() const { return pos_; }
44 bool is_bound() const { return is_bound_; }
45 bool is_linked() const { return !is_bound_ && is_linked_; }
46#if !defined(DART_PRECOMPILED_RUNTIME)
47 JoinEntryInstr* block() const { return block_; }
48#endif // !defined(DART_PRECOMPILED_RUNTIME)
49
50 void Unuse() {
51 pos_ = -1;
52 is_bound_ = false;
53 is_linked_ = false;
54 }
55
56 void BindTo(intptr_t pos) {
57 pos_ = pos;
58#if !defined(DART_PRECOMPILED_RUNTIME)
59 if (block_ != nullptr) block_->set_block_id(pos);
60#endif // !defined(DART_PRECOMPILED_RUNTIME)
61 is_bound_ = true;
62 is_linked_ = false;
63 ASSERT(is_bound());
64 }
65
66 // Used by bytecode assembler to form a linked list out of
67 // forward jumps to an unbound label.
68 void LinkTo(intptr_t pos) {
69#if !defined(DART_PRECOMPILED_RUNTIME)
70 ASSERT(block_ == nullptr);
71#endif
72 ASSERT(!is_bound_);
73 pos_ = pos;
74 is_linked_ = true;
75 }
76
77 // Used by IR builder to mark block label as used.
78 void SetLinked() {
79#if !defined(DART_PRECOMPILED_RUNTIME)
80 ASSERT(block_ != nullptr);
81#endif
82 if (!is_bound_) {
83 is_linked_ = true;
84 }
85 }
86
87 private:
88 bool is_bound_ = false;
89 bool is_linked_ = false;
90 intptr_t pos_ = -1;
91#if !defined(DART_PRECOMPILED_RUNTIME)
92 JoinEntryInstr* block_ = nullptr;
93#endif // !defined(DART_PRECOMPILED_RUNTIME)
94};
95
96class RegExpMacroAssembler : public ZoneAllocated {
97 public:
98 // The implementation must be able to handle at least:
99 static const intptr_t kMaxRegister = (1 << 16) - 1;
100 static const intptr_t kMaxCPOffset = (1 << 15) - 1;
101 static const intptr_t kMinCPOffset = -(1 << 15);
102
103 static const intptr_t kTableSizeBits = 7;
104 static const intptr_t kTableSize = 1 << kTableSizeBits;
105 static const intptr_t kTableMask = kTableSize - 1;
106
107 enum {
108 kParamRegExpIndex = 0,
109 kParamStringIndex,
110 kParamStartOffsetIndex,
111 kParamCount
112 };
113
114 enum IrregexpImplementation { kBytecodeImplementation, kIRImplementation };
115
116 explicit RegExpMacroAssembler(Zone* zone);
117 virtual ~RegExpMacroAssembler();
118 // The maximal number of pushes between stack checks. Users must supply
119 // kCheckStackLimit flag to push operations (instead of kNoStackLimitCheck)
120 // at least once for every stack_limit() pushes that are executed.
121 virtual intptr_t stack_limit_slack() = 0;
122 virtual bool CanReadUnaligned() = 0;
123 virtual void AdvanceCurrentPosition(intptr_t by) = 0; // Signed cp change.
124 virtual void AdvanceRegister(intptr_t reg, intptr_t by) = 0; // r[reg] += by.
125 // Continues execution from the position pushed on the top of the backtrack
126 // stack by an earlier PushBacktrack(BlockLabel*).
127 virtual void Backtrack() = 0;
128 virtual void BindBlock(BlockLabel* label) = 0;
129 virtual void CheckAtStart(BlockLabel* on_at_start) = 0;
130 // Dispatch after looking the current character up in a 2-bits-per-entry
131 // map. The destinations vector has up to 4 labels.
132 virtual void CheckCharacter(unsigned c, BlockLabel* on_equal) = 0;
133 // Bitwise and the current character with the given constant and then
134 // check for a match with c.
135 virtual void CheckCharacterAfterAnd(unsigned c,
136 unsigned and_with,
137 BlockLabel* on_equal) = 0;
138 virtual void CheckCharacterGT(uint16_t limit, BlockLabel* on_greater) = 0;
139 virtual void CheckCharacterLT(uint16_t limit, BlockLabel* on_less) = 0;
140 virtual void CheckGreedyLoop(BlockLabel* on_tos_equals_current_position) = 0;
141 virtual void CheckNotAtStart(intptr_t cp_offset,
142 BlockLabel* on_not_at_start) = 0;
143 virtual void CheckNotBackReference(intptr_t start_reg,
144 bool read_backward,
145 BlockLabel* on_no_match) = 0;
146 virtual void CheckNotBackReferenceIgnoreCase(intptr_t start_reg,
147 bool read_backward,
148 bool unicode,
149 BlockLabel* on_no_match) = 0;
150 // Check the current character for a match with a literal character. If we
151 // fail to match then goto the on_failure label. End of input always
152 // matches. If the label is NULL then we should pop a backtrack address off
153 // the stack and go to that.
154 virtual void CheckNotCharacter(unsigned c, BlockLabel* on_not_equal) = 0;
155 virtual void CheckNotCharacterAfterAnd(unsigned c,
156 unsigned and_with,
157 BlockLabel* on_not_equal) = 0;
158 // Subtract a constant from the current character, then and with the given
159 // constant and then check for a match with c.
160 virtual void CheckNotCharacterAfterMinusAnd(uint16_t c,
161 uint16_t minus,
162 uint16_t and_with,
163 BlockLabel* on_not_equal) = 0;
164 virtual void CheckCharacterInRange(uint16_t from,
165 uint16_t to, // Both inclusive.
166 BlockLabel* on_in_range) = 0;
167 virtual void CheckCharacterNotInRange(uint16_t from,
168 uint16_t to, // Both inclusive.
169 BlockLabel* on_not_in_range) = 0;
170
171 // The current character (modulus the kTableSize) is looked up in the byte
172 // array, and if the found byte is non-zero, we jump to the on_bit_set label.
173 virtual void CheckBitInTable(const TypedData& table,
174 BlockLabel* on_bit_set) = 0;
175
176 // Checks for preemption and serves as an OSR entry.
177 virtual void CheckPreemption(bool is_backtrack) {}
178
179 // Checks whether the given offset from the current position is before
180 // the end of the string. May overwrite the current character.
181 virtual void CheckPosition(intptr_t cp_offset, BlockLabel* on_outside_input) {
182 LoadCurrentCharacter(cp_offset, on_outside_input, true);
183 }
184 // Check whether a standard/default character class matches the current
185 // character. Returns false if the type of special character class does
186 // not have custom support.
187 // May clobber the current loaded character.
188 virtual bool CheckSpecialCharacterClass(uint16_t type,
189 BlockLabel* on_no_match) {
190 return false;
191 }
192 virtual void Fail() = 0;
193 // Check whether a register is >= a given constant and go to a label if it
194 // is. Backtracks instead if the label is NULL.
195 virtual void IfRegisterGE(intptr_t reg,
196 intptr_t comparand,
197 BlockLabel* if_ge) = 0;
198 // Check whether a register is < a given constant and go to a label if it is.
199 // Backtracks instead if the label is NULL.
200 virtual void IfRegisterLT(intptr_t reg,
201 intptr_t comparand,
202 BlockLabel* if_lt) = 0;
203 // Check whether a register is == to the current position and go to a
204 // label if it is.
205 virtual void IfRegisterEqPos(intptr_t reg, BlockLabel* if_eq) = 0;
206 virtual IrregexpImplementation Implementation() = 0;
207 // The assembler is closed, iff there is no current instruction assigned.
208 virtual bool IsClosed() const = 0;
209 // Jump to the target label without setting it as the current instruction.
210 virtual void GoTo(BlockLabel* to) = 0;
211 virtual void LoadCurrentCharacter(intptr_t cp_offset,
212 BlockLabel* on_end_of_input,
213 bool check_bounds = true,
214 intptr_t characters = 1) = 0;
215 virtual void PopCurrentPosition() = 0;
216 virtual void PopRegister(intptr_t register_index) = 0;
217 // Prints string within the generated code. Used for debugging.
218 virtual void Print(const char* str) = 0;
219 // Prints all emitted blocks.
220 virtual void PrintBlocks() = 0;
221 // Pushes the label on the backtrack stack, so that a following Backtrack
222 // will go to this label. Always checks the backtrack stack limit.
223 virtual void PushBacktrack(BlockLabel* label) = 0;
224 virtual void PushCurrentPosition() = 0;
225 virtual void PushRegister(intptr_t register_index) = 0;
226 virtual void ReadCurrentPositionFromRegister(intptr_t reg) = 0;
227 virtual void ReadStackPointerFromRegister(intptr_t reg) = 0;
228 virtual void SetCurrentPositionFromEnd(intptr_t by) = 0;
229 virtual void SetRegister(intptr_t register_index, intptr_t to) = 0;
230 // Return whether the matching (with a global regexp) will be restarted.
231 virtual bool Succeed() = 0;
232 virtual void WriteCurrentPositionToRegister(intptr_t reg,
233 intptr_t cp_offset) = 0;
234 virtual void ClearRegisters(intptr_t reg_from, intptr_t reg_to) = 0;
235 virtual void WriteStackPointerToRegister(intptr_t reg) = 0;
236
237 // Check that we are not in the middle of a surrogate pair.
238 void CheckNotInSurrogatePair(intptr_t cp_offset, BlockLabel* on_failure);
239
240 // Controls the generation of large inlined constants in the code.
241 void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
242 bool slow_safe() { return slow_safe_compiler_; }
243
244 enum GlobalMode {
245 NOT_GLOBAL,
246 GLOBAL,
247 GLOBAL_NO_ZERO_LENGTH_CHECK,
248 GLOBAL_UNICODE
249 };
250 // Set whether the regular expression has the global flag. Exiting due to
251 // a failure in a global regexp may still mean success overall.
252 inline void set_global_mode(GlobalMode mode) { global_mode_ = mode; }
253 inline bool global() { return global_mode_ != NOT_GLOBAL; }
254 inline bool global_with_zero_length_check() {
255 return global_mode_ == GLOBAL || global_mode_ == GLOBAL_UNICODE;
256 }
257 inline bool global_unicode() { return global_mode_ == GLOBAL_UNICODE; }
258
259 Zone* zone() const { return zone_; }
260
261 private:
262 bool slow_safe_compiler_;
263 GlobalMode global_mode_;
264 Zone* zone_;
265};
266
267} // namespace dart
268
269#endif // RUNTIME_VM_REGEXP_ASSEMBLER_H_
270