1// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2// for details. All rights reserved. Use of this source code is governed by a
3// BSD-style license that can be found in the LICENSE file.
4
5#ifndef RUNTIME_VM_REGEXP_ASSEMBLER_IR_H_
6#define RUNTIME_VM_REGEXP_ASSEMBLER_IR_H_
7
8#include "vm/compiler/assembler/assembler.h"
9#include "vm/compiler/backend/il.h"
10#include "vm/object.h"
11#include "vm/regexp_assembler.h"
12
13namespace dart {
14
15class IRRegExpMacroAssembler : public RegExpMacroAssembler {
16 public:
17 // Type of input string to generate code for.
18 enum Mode { ASCII = 1, UC16 = 2 };
19
20 // Result of calling generated native RegExp code.
21 // RETRY: Something significant changed during execution, and the matching
22 // should be retried from scratch.
23 // EXCEPTION: Something failed during execution. If no exception has been
24 // thrown, it's an internal out-of-memory, and the caller should
25 // throw the exception.
26 // FAILURE: Matching failed.
27 // SUCCESS: Matching succeeded, and the output array has been filled with
28 // capture positions.
29 enum Result { RETRY = -2, EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 };
30
31 IRRegExpMacroAssembler(intptr_t specialization_cid,
32 intptr_t capture_count,
33 const ParsedFunction* parsed_function,
34 const ZoneGrowableArray<const ICData*>& ic_data_array,
35 intptr_t osr_id,
36 Zone* zone);
37 virtual ~IRRegExpMacroAssembler();
38
39 virtual bool CanReadUnaligned();
40
41 static ArrayPtr Execute(const RegExp& regexp,
42 const String& input,
43 const Smi& start_offset,
44 bool sticky,
45 Zone* zone);
46
47 virtual bool IsClosed() const { return (current_instruction_ == NULL); }
48
49 virtual intptr_t stack_limit_slack();
50 virtual void AdvanceCurrentPosition(intptr_t by);
51 virtual void AdvanceRegister(intptr_t reg, intptr_t by);
52 virtual void Backtrack();
53 virtual void BindBlock(BlockLabel* label);
54 virtual void CheckAtStart(BlockLabel* on_at_start);
55 virtual void CheckCharacter(uint32_t c, BlockLabel* on_equal);
56 virtual void CheckCharacterAfterAnd(uint32_t c,
57 uint32_t mask,
58 BlockLabel* on_equal);
59 virtual void CheckCharacterGT(uint16_t limit, BlockLabel* on_greater);
60 virtual void CheckCharacterLT(uint16_t limit, BlockLabel* on_less);
61 // A "greedy loop" is a loop that is both greedy and with a simple
62 // body. It has a particularly simple implementation.
63 virtual void CheckGreedyLoop(BlockLabel* on_tos_equals_current_position);
64 virtual void CheckNotAtStart(intptr_t cp_offset, BlockLabel* on_not_at_start);
65 virtual void CheckNotBackReference(intptr_t start_reg,
66 bool read_backward,
67 BlockLabel* on_no_match);
68 virtual void CheckNotBackReferenceIgnoreCase(intptr_t start_reg,
69 bool read_backward,
70 bool unicode,
71 BlockLabel* on_no_match);
72 virtual void CheckNotCharacter(uint32_t c, BlockLabel* on_not_equal);
73 virtual void CheckNotCharacterAfterAnd(uint32_t c,
74 uint32_t mask,
75 BlockLabel* on_not_equal);
76 virtual void CheckNotCharacterAfterMinusAnd(uint16_t c,
77 uint16_t minus,
78 uint16_t mask,
79 BlockLabel* on_not_equal);
80 virtual void CheckCharacterInRange(uint16_t from,
81 uint16_t to,
82 BlockLabel* on_in_range);
83 virtual void CheckCharacterNotInRange(uint16_t from,
84 uint16_t to,
85 BlockLabel* on_not_in_range);
86 virtual void CheckBitInTable(const TypedData& table, BlockLabel* on_bit_set);
87
88 // Checks whether the given offset from the current position is before
89 // the end of the string.
90 virtual void CheckPosition(intptr_t cp_offset, BlockLabel* on_outside_input);
91 virtual bool CheckSpecialCharacterClass(uint16_t type,
92 BlockLabel* on_no_match);
93 virtual void Fail();
94 virtual void IfRegisterGE(intptr_t reg,
95 intptr_t comparand,
96 BlockLabel* if_ge);
97 virtual void IfRegisterLT(intptr_t reg,
98 intptr_t comparand,
99 BlockLabel* if_lt);
100 virtual void IfRegisterEqPos(intptr_t reg, BlockLabel* if_eq);
101 virtual IrregexpImplementation Implementation();
102 virtual void GoTo(BlockLabel* to);
103 virtual void LoadCurrentCharacter(intptr_t cp_offset,
104 BlockLabel* on_end_of_input,
105 bool check_bounds = true,
106 intptr_t characters = 1);
107 virtual void PopCurrentPosition();
108 virtual void PopRegister(intptr_t register_index);
109 virtual void Print(const char* str);
110 virtual void PushBacktrack(BlockLabel* label);
111 virtual void PushCurrentPosition();
112 virtual void PushRegister(intptr_t register_index);
113 virtual void ReadCurrentPositionFromRegister(intptr_t reg);
114 virtual void ReadStackPointerFromRegister(intptr_t reg);
115 virtual void SetCurrentPositionFromEnd(intptr_t by);
116 virtual void SetRegister(intptr_t register_index, intptr_t to);
117 virtual bool Succeed();
118 virtual void WriteCurrentPositionToRegister(intptr_t reg, intptr_t cp_offset);
119 virtual void ClearRegisters(intptr_t reg_from, intptr_t reg_to);
120 virtual void WriteStackPointerToRegister(intptr_t reg);
121
122 virtual void PrintBlocks();
123
124 IndirectGotoInstr* backtrack_goto() const { return backtrack_goto_; }
125 GraphEntryInstr* graph_entry() const { return entry_block_; }
126
127 intptr_t num_stack_locals() const { return local_id_.Count(); }
128 intptr_t num_blocks() const { return block_id_.Count(); }
129
130 // Generate a dispatch block implementing backtracking. Must be done after
131 // graph construction.
132 void GenerateBacktrackBlock();
133
134 // Allocate the actual registers array once its size is known. Must be done
135 // after graph construction.
136 void FinalizeRegistersArray();
137
138 private:
139 intptr_t GetNextDeoptId() const {
140 return thread_->compiler_state().GetNextDeoptId();
141 }
142
143 // Generate the contents of preset blocks. The entry block is the entry point
144 // of the generated code.
145 void GenerateEntryBlock();
146 // Copies capture indices into the result area and returns true.
147 void GenerateSuccessBlock();
148 // Returns false.
149 void GenerateExitBlock();
150
151 enum ComparisonKind {
152 kEQ,
153 kNE,
154 kLT,
155 kGT,
156 kLTE,
157 kGTE,
158 };
159
160 struct InstanceCallDescriptor {
161 // Standard (i.e. most non-Smi) functions.
162 explicit InstanceCallDescriptor(const String& name)
163 : name(name), token_kind(Token::kILLEGAL), checked_argument_count(1) {}
164
165 InstanceCallDescriptor(const String& name,
166 Token::Kind token_kind,
167 intptr_t checked_argument_count)
168 : name(name),
169 token_kind(token_kind),
170 checked_argument_count(checked_argument_count) {}
171
172 // Special cases for Smi and indexing functions.
173 static InstanceCallDescriptor FromToken(Token::Kind token_kind) {
174 switch (token_kind) {
175 case Token::kEQ:
176 return InstanceCallDescriptor(Symbols::EqualOperator(), token_kind,
177 2);
178 case Token::kADD:
179 return InstanceCallDescriptor(Symbols::Plus(), token_kind, 2);
180 case Token::kSUB:
181 return InstanceCallDescriptor(Symbols::Minus(), token_kind, 2);
182 case Token::kBIT_OR:
183 return InstanceCallDescriptor(Symbols::BitOr(), token_kind, 2);
184 case Token::kBIT_AND:
185 return InstanceCallDescriptor(Symbols::BitAnd(), token_kind, 2);
186 case Token::kLT:
187 return InstanceCallDescriptor(Symbols::LAngleBracket(), token_kind,
188 2);
189 case Token::kLTE:
190 return InstanceCallDescriptor(Symbols::LessEqualOperator(),
191 token_kind, 2);
192 case Token::kGT:
193 return InstanceCallDescriptor(Symbols::RAngleBracket(), token_kind,
194 2);
195 case Token::kGTE:
196 return InstanceCallDescriptor(Symbols::GreaterEqualOperator(),
197 token_kind, 2);
198 case Token::kNEGATE:
199 return InstanceCallDescriptor(Symbols::UnaryMinus(), token_kind, 1);
200 case Token::kINDEX:
201 return InstanceCallDescriptor(Symbols::IndexToken(), token_kind, 2);
202 case Token::kASSIGN_INDEX:
203 return InstanceCallDescriptor(Symbols::AssignIndexToken(), token_kind,
204 2);
205 default:
206 UNREACHABLE();
207 }
208 UNREACHABLE();
209 return InstanceCallDescriptor(Symbols::Empty());
210 }
211
212 const String& name;
213 Token::Kind token_kind;
214 intptr_t checked_argument_count;
215 };
216
217 LocalVariable* Local(const String& name);
218 LocalVariable* Parameter(const String& name, intptr_t index) const;
219
220 ConstantInstr* Int64Constant(int64_t value) const;
221 ConstantInstr* Uint64Constant(uint64_t value) const;
222 ConstantInstr* BoolConstant(bool value) const;
223 ConstantInstr* StringConstant(const char* value) const;
224
225 // The word character map static member of the RegExp class.
226 // Byte map of one byte characters with a 0xff if the character is a word
227 // character (digit, letter or underscore) and 0x00 otherwise.
228 // Used by generated RegExp code.
229 ConstantInstr* WordCharacterMapConstant() const;
230
231 ComparisonInstr* Comparison(ComparisonKind kind, Value* lhs, Value* rhs);
232 ComparisonInstr* Comparison(ComparisonKind kind,
233 Definition* lhs,
234 Definition* rhs);
235
236 InstanceCallInstr* InstanceCall(const InstanceCallDescriptor& desc,
237 Value* arg1) const;
238 InstanceCallInstr* InstanceCall(const InstanceCallDescriptor& desc,
239 Value* arg1,
240 Value* arg2) const;
241 InstanceCallInstr* InstanceCall(const InstanceCallDescriptor& desc,
242 Value* arg1,
243 Value* arg2,
244 Value* arg3) const;
245 InstanceCallInstr* InstanceCall(const InstanceCallDescriptor& desc,
246 InputsArray* arguments) const;
247
248 StaticCallInstr* StaticCall(const Function& function,
249 ICData::RebindRule rebind_rule) const;
250 StaticCallInstr* StaticCall(const Function& function,
251 Value* arg1,
252 ICData::RebindRule rebind_rule) const;
253 StaticCallInstr* StaticCall(const Function& function,
254 Value* arg1,
255 Value* arg2,
256 ICData::RebindRule rebind_rule) const;
257 StaticCallInstr* StaticCall(const Function& function,
258 InputsArray* arguments,
259 ICData::RebindRule rebind_rule) const;
260
261 // Creates a new block consisting simply of a goto to dst.
262 TargetEntryInstr* TargetWithJoinGoto(JoinEntryInstr* dst);
263 IndirectEntryInstr* IndirectWithJoinGoto(JoinEntryInstr* dst);
264
265 // Adds, respectively subtracts lhs and rhs and returns the result.
266 Definition* Add(Value* lhs, Value* rhs);
267 Definition* Sub(Value* lhs, Value* rhs);
268
269 LoadLocalInstr* LoadLocal(LocalVariable* local) const;
270 void StoreLocal(LocalVariable* local, Value* value);
271
272 Value* PushLocal(LocalVariable* local);
273
274 Value* PushRegisterIndex(intptr_t reg);
275 Value* LoadRegister(intptr_t reg);
276 void StoreRegister(intptr_t reg, intptr_t value);
277 void StoreRegister(Value* registers, Value* index, Value* value);
278
279 // Load a number of characters at the given offset from the
280 // current position, into the current-character register.
281 void LoadCurrentCharacterUnchecked(intptr_t cp_offset,
282 intptr_t character_count);
283
284 // Returns the character within the passed string at the specified index.
285 Value* CharacterAt(LocalVariable* index);
286
287 // Load a number of characters starting from index in the pattern string.
288 Value* LoadCodeUnitsAt(LocalVariable* index, intptr_t character_count);
289
290 // Check whether preemption has been requested. Also serves as an OSR entry.
291 void CheckPreemption(bool is_backtrack);
292
293 // Byte size of chars in the string to match (decided by the Mode argument)
294 inline intptr_t char_size() { return static_cast<int>(mode_); }
295
296 // Equivalent to a conditional branch to the label, unless the label
297 // is NULL, in which case it is a conditional Backtrack.
298 void BranchOrBacktrack(ComparisonInstr* comparison,
299 BlockLabel* true_successor);
300
301 // Set up all local variables and parameters.
302 void InitializeLocals();
303
304 // Allocates a new local, and returns the appropriate id for placing it
305 // on the stack.
306 intptr_t GetNextLocalIndex();
307
308 // We never have any copied parameters.
309 intptr_t num_copied_params() const { return 0; }
310
311 // Return the position register at the specified index, creating it if
312 // necessary. Note that the number of such registers can exceed the amount
313 // required by the number of output captures.
314 LocalVariable* position_register(intptr_t index);
315
316 void set_current_instruction(Instruction* instruction);
317
318 // The following functions are responsible for appending instructions
319 // to the current instruction in various ways. The most simple one
320 // is AppendInstruction, which simply appends an instruction and performs
321 // bookkeeping.
322 void AppendInstruction(Instruction* instruction);
323 // Similar to AppendInstruction, but closes the current block by
324 // setting current_instruction_ to NULL.
325 void CloseBlockWith(Instruction* instruction);
326 // Appends definition and allocates a temp index for the result.
327 Value* Bind(Definition* definition);
328 // Loads and binds a local variable.
329 Value* BindLoadLocal(const LocalVariable& local);
330
331 // Appends the definition.
332 void Do(Definition* definition);
333 // Closes the current block with a jump to the specified block.
334 void GoTo(JoinEntryInstr* to);
335
336 // Accessors for our local stack_.
337 void PushStack(Definition* definition);
338 Definition* PopStack();
339 Definition* PeekStack();
340 void CheckStackLimit();
341 void GrowStack();
342
343 // Prints the specified argument. Used for debugging.
344 void Print(Value* argument);
345
346 // A utility class tracking ids of various objects such as blocks, temps, etc.
347 class IdAllocator : public ValueObject {
348 public:
349 explicit IdAllocator(intptr_t first_id = 0) : next_id(first_id) {}
350
351 intptr_t Count() const { return next_id; }
352 intptr_t Alloc(intptr_t count = 1) {
353 ASSERT(count >= 0);
354 intptr_t current_id = next_id;
355 next_id += count;
356 return current_id;
357 }
358 void Dealloc(intptr_t count = 1) {
359 ASSERT(count <= next_id);
360 next_id -= count;
361 }
362
363 private:
364 intptr_t next_id;
365 };
366
367 Thread* thread_;
368
369 // Which mode to generate code for (ASCII or UC16).
370 Mode mode_;
371
372 // Which specific string class to generate code for.
373 intptr_t specialization_cid_;
374
375 // Block entries used internally.
376 GraphEntryInstr* entry_block_;
377 JoinEntryInstr* start_block_;
378 JoinEntryInstr* success_block_;
379 JoinEntryInstr* exit_block_;
380
381 // Shared backtracking block.
382 JoinEntryInstr* backtrack_block_;
383 // Single indirect goto instruction which performs all backtracking.
384 IndirectGotoInstr* backtrack_goto_;
385
386 const ParsedFunction* parsed_function_;
387 const ZoneGrowableArray<const ICData*>& ic_data_array_;
388
389 // All created blocks are contained within this set. Used for printing
390 // the generated code.
391 GrowableArray<BlockEntryInstr*> blocks_;
392
393 // The current instruction to link to when new code is emitted.
394 Instruction* current_instruction_;
395
396 // A list, acting as the runtime stack for both backtrack locations and
397 // stored positions within the string.
398 LocalVariable* stack_;
399 LocalVariable* stack_pointer_;
400
401 // Stores the current character within the string.
402 LocalVariable* current_character_;
403
404 // Stores the current location within the string as a negative offset
405 // from the end of the string.
406 LocalVariable* current_position_;
407
408 // The string being processed, passed as a function parameter.
409 LocalVariable* string_param_;
410
411 // Stores the length of string_param_.
412 LocalVariable* string_param_length_;
413
414 // The start index within the string, passed as a function parameter.
415 LocalVariable* start_index_param_;
416
417 // An assortment of utility variables.
418 LocalVariable* capture_length_;
419 LocalVariable* match_start_index_;
420 LocalVariable* capture_start_index_;
421 LocalVariable* match_end_index_;
422 LocalVariable* char_in_capture_;
423 LocalVariable* char_in_match_;
424 LocalVariable* index_temp_;
425
426 LocalVariable* result_;
427
428 // Stored positions containing group bounds. Generated as needed.
429 LocalVariable* registers_;
430 intptr_t registers_count_;
431 const intptr_t saved_registers_count_;
432
433 // The actual array objects used for the stack and registers.
434 Array& stack_array_cell_;
435 TypedData& registers_array_;
436
437 IdAllocator block_id_;
438 IdAllocator temp_id_;
439 IdAllocator local_id_;
440 IdAllocator indirect_id_;
441};
442
443} // namespace dart
444
445#endif // RUNTIME_VM_REGEXP_ASSEMBLER_IR_H_
446