| 1 | // Copyright (c) 2014, the Dart project authors.  Please see the AUTHORS file | 
| 2 | // for details. All rights reserved. Use of this source code is governed by a | 
| 3 | // BSD-style license that can be found in the LICENSE file. | 
| 4 |  | 
| 5 | #ifndef RUNTIME_VM_REGEXP_ASSEMBLER_IR_H_ | 
| 6 | #define RUNTIME_VM_REGEXP_ASSEMBLER_IR_H_ | 
| 7 |  | 
| 8 | #include "vm/compiler/assembler/assembler.h" | 
| 9 | #include "vm/compiler/backend/il.h" | 
| 10 | #include "vm/object.h" | 
| 11 | #include "vm/regexp_assembler.h" | 
| 12 |  | 
| 13 | namespace dart { | 
| 14 |  | 
| 15 | class IRRegExpMacroAssembler : public RegExpMacroAssembler { | 
| 16 |  public: | 
| 17 |   // Type of input string to generate code for. | 
| 18 |   enum Mode { ASCII = 1, UC16 = 2 }; | 
| 19 |  | 
| 20 |   // Result of calling generated native RegExp code. | 
| 21 |   // RETRY: Something significant changed during execution, and the matching | 
| 22 |   //        should be retried from scratch. | 
| 23 |   // EXCEPTION: Something failed during execution. If no exception has been | 
| 24 |   //        thrown, it's an internal out-of-memory, and the caller should | 
| 25 |   //        throw the exception. | 
| 26 |   // FAILURE: Matching failed. | 
| 27 |   // SUCCESS: Matching succeeded, and the output array has been filled with | 
| 28 |   //        capture positions. | 
| 29 |   enum Result { RETRY = -2, EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 }; | 
| 30 |  | 
| 31 |   IRRegExpMacroAssembler(intptr_t specialization_cid, | 
| 32 |                          intptr_t capture_count, | 
| 33 |                          const ParsedFunction* parsed_function, | 
| 34 |                          const ZoneGrowableArray<const ICData*>& ic_data_array, | 
| 35 |                          intptr_t osr_id, | 
| 36 |                          Zone* zone); | 
| 37 |   virtual ~IRRegExpMacroAssembler(); | 
| 38 |  | 
| 39 |   virtual bool CanReadUnaligned(); | 
| 40 |  | 
| 41 |   static ArrayPtr Execute(const RegExp& regexp, | 
| 42 |                           const String& input, | 
| 43 |                           const Smi& start_offset, | 
| 44 |                           bool sticky, | 
| 45 |                           Zone* zone); | 
| 46 |  | 
| 47 |   virtual bool IsClosed() const { return (current_instruction_ == NULL); } | 
| 48 |  | 
| 49 |   virtual intptr_t stack_limit_slack(); | 
| 50 |   virtual void AdvanceCurrentPosition(intptr_t by); | 
| 51 |   virtual void AdvanceRegister(intptr_t reg, intptr_t by); | 
| 52 |   virtual void Backtrack(); | 
| 53 |   virtual void BindBlock(BlockLabel* label); | 
| 54 |   virtual void CheckAtStart(BlockLabel* on_at_start); | 
| 55 |   virtual void CheckCharacter(uint32_t c, BlockLabel* on_equal); | 
| 56 |   virtual void CheckCharacterAfterAnd(uint32_t c, | 
| 57 |                                       uint32_t mask, | 
| 58 |                                       BlockLabel* on_equal); | 
| 59 |   virtual void CheckCharacterGT(uint16_t limit, BlockLabel* on_greater); | 
| 60 |   virtual void CheckCharacterLT(uint16_t limit, BlockLabel* on_less); | 
| 61 |   // A "greedy loop" is a loop that is both greedy and with a simple | 
| 62 |   // body. It has a particularly simple implementation. | 
| 63 |   virtual void CheckGreedyLoop(BlockLabel* on_tos_equals_current_position); | 
| 64 |   virtual void CheckNotAtStart(intptr_t cp_offset, BlockLabel* on_not_at_start); | 
| 65 |   virtual void CheckNotBackReference(intptr_t start_reg, | 
| 66 |                                      bool read_backward, | 
| 67 |                                      BlockLabel* on_no_match); | 
| 68 |   virtual void CheckNotBackReferenceIgnoreCase(intptr_t start_reg, | 
| 69 |                                                bool read_backward, | 
| 70 |                                                bool unicode, | 
| 71 |                                                BlockLabel* on_no_match); | 
| 72 |   virtual void CheckNotCharacter(uint32_t c, BlockLabel* on_not_equal); | 
| 73 |   virtual void CheckNotCharacterAfterAnd(uint32_t c, | 
| 74 |                                          uint32_t mask, | 
| 75 |                                          BlockLabel* on_not_equal); | 
| 76 |   virtual void CheckNotCharacterAfterMinusAnd(uint16_t c, | 
| 77 |                                               uint16_t minus, | 
| 78 |                                               uint16_t mask, | 
| 79 |                                               BlockLabel* on_not_equal); | 
| 80 |   virtual void CheckCharacterInRange(uint16_t from, | 
| 81 |                                      uint16_t to, | 
| 82 |                                      BlockLabel* on_in_range); | 
| 83 |   virtual void CheckCharacterNotInRange(uint16_t from, | 
| 84 |                                         uint16_t to, | 
| 85 |                                         BlockLabel* on_not_in_range); | 
| 86 |   virtual void CheckBitInTable(const TypedData& table, BlockLabel* on_bit_set); | 
| 87 |  | 
| 88 |   // Checks whether the given offset from the current position is before | 
| 89 |   // the end of the string. | 
| 90 |   virtual void CheckPosition(intptr_t cp_offset, BlockLabel* on_outside_input); | 
| 91 |   virtual bool CheckSpecialCharacterClass(uint16_t type, | 
| 92 |                                           BlockLabel* on_no_match); | 
| 93 |   virtual void Fail(); | 
| 94 |   virtual void IfRegisterGE(intptr_t reg, | 
| 95 |                             intptr_t comparand, | 
| 96 |                             BlockLabel* if_ge); | 
| 97 |   virtual void IfRegisterLT(intptr_t reg, | 
| 98 |                             intptr_t comparand, | 
| 99 |                             BlockLabel* if_lt); | 
| 100 |   virtual void IfRegisterEqPos(intptr_t reg, BlockLabel* if_eq); | 
| 101 |   virtual IrregexpImplementation Implementation(); | 
| 102 |   virtual void GoTo(BlockLabel* to); | 
| 103 |   virtual void LoadCurrentCharacter(intptr_t cp_offset, | 
| 104 |                                     BlockLabel* on_end_of_input, | 
| 105 |                                     bool check_bounds = true, | 
| 106 |                                     intptr_t characters = 1); | 
| 107 |   virtual void PopCurrentPosition(); | 
| 108 |   virtual void PopRegister(intptr_t register_index); | 
| 109 |   virtual void Print(const char* str); | 
| 110 |   virtual void PushBacktrack(BlockLabel* label); | 
| 111 |   virtual void PushCurrentPosition(); | 
| 112 |   virtual void PushRegister(intptr_t register_index); | 
| 113 |   virtual void ReadCurrentPositionFromRegister(intptr_t reg); | 
| 114 |   virtual void ReadStackPointerFromRegister(intptr_t reg); | 
| 115 |   virtual void SetCurrentPositionFromEnd(intptr_t by); | 
| 116 |   virtual void SetRegister(intptr_t register_index, intptr_t to); | 
| 117 |   virtual bool Succeed(); | 
| 118 |   virtual void WriteCurrentPositionToRegister(intptr_t reg, intptr_t cp_offset); | 
| 119 |   virtual void ClearRegisters(intptr_t reg_from, intptr_t reg_to); | 
| 120 |   virtual void WriteStackPointerToRegister(intptr_t reg); | 
| 121 |  | 
| 122 |   virtual void PrintBlocks(); | 
| 123 |  | 
| 124 |   IndirectGotoInstr* backtrack_goto() const { return backtrack_goto_; } | 
| 125 |   GraphEntryInstr* graph_entry() const { return entry_block_; } | 
| 126 |  | 
| 127 |   intptr_t num_stack_locals() const { return local_id_.Count(); } | 
| 128 |   intptr_t num_blocks() const { return block_id_.Count(); } | 
| 129 |  | 
| 130 |   // Generate a dispatch block implementing backtracking. Must be done after | 
| 131 |   // graph construction. | 
| 132 |   void GenerateBacktrackBlock(); | 
| 133 |  | 
| 134 |   // Allocate the actual registers array once its size is known. Must be done | 
| 135 |   // after graph construction. | 
| 136 |   void FinalizeRegistersArray(); | 
| 137 |  | 
| 138 |  private: | 
| 139 |   intptr_t GetNextDeoptId() const { | 
| 140 |     return thread_->compiler_state().GetNextDeoptId(); | 
| 141 |   } | 
| 142 |  | 
| 143 |   // Generate the contents of preset blocks. The entry block is the entry point | 
| 144 |   // of the generated code. | 
| 145 |   void GenerateEntryBlock(); | 
| 146 |   // Copies capture indices into the result area and returns true. | 
| 147 |   void GenerateSuccessBlock(); | 
| 148 |   // Returns false. | 
| 149 |   void GenerateExitBlock(); | 
| 150 |  | 
| 151 |   enum ComparisonKind { | 
| 152 |     kEQ, | 
| 153 |     kNE, | 
| 154 |     kLT, | 
| 155 |     kGT, | 
| 156 |     kLTE, | 
| 157 |     kGTE, | 
| 158 |   }; | 
| 159 |  | 
| 160 |   struct InstanceCallDescriptor { | 
| 161 |     // Standard (i.e. most non-Smi) functions. | 
| 162 |     explicit InstanceCallDescriptor(const String& name) | 
| 163 |         : name(name), token_kind(Token::kILLEGAL), checked_argument_count(1) {} | 
| 164 |  | 
| 165 |     InstanceCallDescriptor(const String& name, | 
| 166 |                            Token::Kind token_kind, | 
| 167 |                            intptr_t checked_argument_count) | 
| 168 |         : name(name), | 
| 169 |           token_kind(token_kind), | 
| 170 |           checked_argument_count(checked_argument_count) {} | 
| 171 |  | 
| 172 |     // Special cases for Smi and indexing functions. | 
| 173 |     static InstanceCallDescriptor FromToken(Token::Kind token_kind) { | 
| 174 |       switch (token_kind) { | 
| 175 |         case Token::kEQ: | 
| 176 |           return InstanceCallDescriptor(Symbols::EqualOperator(), token_kind, | 
| 177 |                                         2); | 
| 178 |         case Token::kADD: | 
| 179 |           return InstanceCallDescriptor(Symbols::Plus(), token_kind, 2); | 
| 180 |         case Token::kSUB: | 
| 181 |           return InstanceCallDescriptor(Symbols::Minus(), token_kind, 2); | 
| 182 |         case Token::kBIT_OR: | 
| 183 |           return InstanceCallDescriptor(Symbols::BitOr(), token_kind, 2); | 
| 184 |         case Token::kBIT_AND: | 
| 185 |           return InstanceCallDescriptor(Symbols::BitAnd(), token_kind, 2); | 
| 186 |         case Token::kLT: | 
| 187 |           return InstanceCallDescriptor(Symbols::LAngleBracket(), token_kind, | 
| 188 |                                         2); | 
| 189 |         case Token::kLTE: | 
| 190 |           return InstanceCallDescriptor(Symbols::LessEqualOperator(), | 
| 191 |                                         token_kind, 2); | 
| 192 |         case Token::kGT: | 
| 193 |           return InstanceCallDescriptor(Symbols::RAngleBracket(), token_kind, | 
| 194 |                                         2); | 
| 195 |         case Token::kGTE: | 
| 196 |           return InstanceCallDescriptor(Symbols::GreaterEqualOperator(), | 
| 197 |                                         token_kind, 2); | 
| 198 |         case Token::kNEGATE: | 
| 199 |           return InstanceCallDescriptor(Symbols::UnaryMinus(), token_kind, 1); | 
| 200 |         case Token::kINDEX: | 
| 201 |           return InstanceCallDescriptor(Symbols::IndexToken(), token_kind, 2); | 
| 202 |         case Token::kASSIGN_INDEX: | 
| 203 |           return InstanceCallDescriptor(Symbols::AssignIndexToken(), token_kind, | 
| 204 |                                         2); | 
| 205 |         default: | 
| 206 |           UNREACHABLE(); | 
| 207 |       } | 
| 208 |       UNREACHABLE(); | 
| 209 |       return InstanceCallDescriptor(Symbols::Empty()); | 
| 210 |     } | 
| 211 |  | 
| 212 |     const String& name; | 
| 213 |     Token::Kind token_kind; | 
| 214 |     intptr_t checked_argument_count; | 
| 215 |   }; | 
| 216 |  | 
| 217 |   LocalVariable* Local(const String& name); | 
| 218 |   LocalVariable* Parameter(const String& name, intptr_t index) const; | 
| 219 |  | 
| 220 |   ConstantInstr* Int64Constant(int64_t value) const; | 
| 221 |   ConstantInstr* Uint64Constant(uint64_t value) const; | 
| 222 |   ConstantInstr* BoolConstant(bool value) const; | 
| 223 |   ConstantInstr* StringConstant(const char* value) const; | 
| 224 |  | 
| 225 |   // The word character map static member of the RegExp class. | 
| 226 |   // Byte map of one byte characters with a 0xff if the character is a word | 
| 227 |   // character (digit, letter or underscore) and 0x00 otherwise. | 
| 228 |   // Used by generated RegExp code. | 
| 229 |   ConstantInstr* WordCharacterMapConstant() const; | 
| 230 |  | 
| 231 |   ComparisonInstr* Comparison(ComparisonKind kind, Value* lhs, Value* rhs); | 
| 232 |   ComparisonInstr* Comparison(ComparisonKind kind, | 
| 233 |                               Definition* lhs, | 
| 234 |                               Definition* rhs); | 
| 235 |  | 
| 236 |   InstanceCallInstr* InstanceCall(const InstanceCallDescriptor& desc, | 
| 237 |                                   Value* arg1) const; | 
| 238 |   InstanceCallInstr* InstanceCall(const InstanceCallDescriptor& desc, | 
| 239 |                                   Value* arg1, | 
| 240 |                                   Value* arg2) const; | 
| 241 |   InstanceCallInstr* InstanceCall(const InstanceCallDescriptor& desc, | 
| 242 |                                   Value* arg1, | 
| 243 |                                   Value* arg2, | 
| 244 |                                   Value* arg3) const; | 
| 245 |   InstanceCallInstr* InstanceCall(const InstanceCallDescriptor& desc, | 
| 246 |                                   InputsArray* arguments) const; | 
| 247 |  | 
| 248 |   StaticCallInstr* StaticCall(const Function& function, | 
| 249 |                               ICData::RebindRule rebind_rule) const; | 
| 250 |   StaticCallInstr* StaticCall(const Function& function, | 
| 251 |                               Value* arg1, | 
| 252 |                               ICData::RebindRule rebind_rule) const; | 
| 253 |   StaticCallInstr* StaticCall(const Function& function, | 
| 254 |                               Value* arg1, | 
| 255 |                               Value* arg2, | 
| 256 |                               ICData::RebindRule rebind_rule) const; | 
| 257 |   StaticCallInstr* StaticCall(const Function& function, | 
| 258 |                               InputsArray* arguments, | 
| 259 |                               ICData::RebindRule rebind_rule) const; | 
| 260 |  | 
| 261 |   // Creates a new block consisting simply of a goto to dst. | 
| 262 |   TargetEntryInstr* TargetWithJoinGoto(JoinEntryInstr* dst); | 
| 263 |   IndirectEntryInstr* IndirectWithJoinGoto(JoinEntryInstr* dst); | 
| 264 |  | 
| 265 |   // Adds, respectively subtracts lhs and rhs and returns the result. | 
| 266 |   Definition* Add(Value* lhs, Value* rhs); | 
| 267 |   Definition* Sub(Value* lhs, Value* rhs); | 
| 268 |  | 
| 269 |   LoadLocalInstr* LoadLocal(LocalVariable* local) const; | 
| 270 |   void StoreLocal(LocalVariable* local, Value* value); | 
| 271 |  | 
| 272 |   Value* PushLocal(LocalVariable* local); | 
| 273 |  | 
| 274 |   Value* PushRegisterIndex(intptr_t reg); | 
| 275 |   Value* LoadRegister(intptr_t reg); | 
| 276 |   void StoreRegister(intptr_t reg, intptr_t value); | 
| 277 |   void StoreRegister(Value* registers, Value* index, Value* value); | 
| 278 |  | 
| 279 |   // Load a number of characters at the given offset from the | 
| 280 |   // current position, into the current-character register. | 
| 281 |   void LoadCurrentCharacterUnchecked(intptr_t cp_offset, | 
| 282 |                                      intptr_t character_count); | 
| 283 |  | 
| 284 |   // Returns the character within the passed string at the specified index. | 
| 285 |   Value* CharacterAt(LocalVariable* index); | 
| 286 |  | 
| 287 |   // Load a number of characters starting from index in the pattern string. | 
| 288 |   Value* LoadCodeUnitsAt(LocalVariable* index, intptr_t character_count); | 
| 289 |  | 
| 290 |   // Check whether preemption has been requested. Also serves as an OSR entry. | 
| 291 |   void CheckPreemption(bool is_backtrack); | 
| 292 |  | 
| 293 |   // Byte size of chars in the string to match (decided by the Mode argument) | 
| 294 |   inline intptr_t char_size() { return static_cast<int>(mode_); } | 
| 295 |  | 
| 296 |   // Equivalent to a conditional branch to the label, unless the label | 
| 297 |   // is NULL, in which case it is a conditional Backtrack. | 
| 298 |   void BranchOrBacktrack(ComparisonInstr* comparison, | 
| 299 |                          BlockLabel* true_successor); | 
| 300 |  | 
| 301 |   // Set up all local variables and parameters. | 
| 302 |   void InitializeLocals(); | 
| 303 |  | 
| 304 |   // Allocates a new local, and returns the appropriate id for placing it | 
| 305 |   // on the stack. | 
| 306 |   intptr_t GetNextLocalIndex(); | 
| 307 |  | 
| 308 |   // We never have any copied parameters. | 
| 309 |   intptr_t num_copied_params() const { return 0; } | 
| 310 |  | 
| 311 |   // Return the position register at the specified index, creating it if | 
| 312 |   // necessary. Note that the number of such registers can exceed the amount | 
| 313 |   // required by the number of output captures. | 
| 314 |   LocalVariable* position_register(intptr_t index); | 
| 315 |  | 
| 316 |   void set_current_instruction(Instruction* instruction); | 
| 317 |  | 
| 318 |   // The following functions are responsible for appending instructions | 
| 319 |   // to the current instruction in various ways. The most simple one | 
| 320 |   // is AppendInstruction, which simply appends an instruction and performs | 
| 321 |   // bookkeeping. | 
| 322 |   void AppendInstruction(Instruction* instruction); | 
| 323 |   // Similar to AppendInstruction, but closes the current block by | 
| 324 |   // setting current_instruction_ to NULL. | 
| 325 |   void CloseBlockWith(Instruction* instruction); | 
| 326 |   // Appends definition and allocates a temp index for the result. | 
| 327 |   Value* Bind(Definition* definition); | 
| 328 |   // Loads and binds a local variable. | 
| 329 |   Value* BindLoadLocal(const LocalVariable& local); | 
| 330 |  | 
| 331 |   // Appends the definition. | 
| 332 |   void Do(Definition* definition); | 
| 333 |   // Closes the current block with a jump to the specified block. | 
| 334 |   void GoTo(JoinEntryInstr* to); | 
| 335 |  | 
| 336 |   // Accessors for our local stack_. | 
| 337 |   void PushStack(Definition* definition); | 
| 338 |   Definition* PopStack(); | 
| 339 |   Definition* PeekStack(); | 
| 340 |   void CheckStackLimit(); | 
| 341 |   void GrowStack(); | 
| 342 |  | 
| 343 |   // Prints the specified argument. Used for debugging. | 
| 344 |   void Print(Value* argument); | 
| 345 |  | 
| 346 |   // A utility class tracking ids of various objects such as blocks, temps, etc. | 
| 347 |   class IdAllocator : public ValueObject { | 
| 348 |    public: | 
| 349 |     explicit IdAllocator(intptr_t first_id = 0) : next_id(first_id) {} | 
| 350 |  | 
| 351 |     intptr_t Count() const { return next_id; } | 
| 352 |     intptr_t Alloc(intptr_t count = 1) { | 
| 353 |       ASSERT(count >= 0); | 
| 354 |       intptr_t current_id = next_id; | 
| 355 |       next_id += count; | 
| 356 |       return current_id; | 
| 357 |     } | 
| 358 |     void Dealloc(intptr_t count = 1) { | 
| 359 |       ASSERT(count <= next_id); | 
| 360 |       next_id -= count; | 
| 361 |     } | 
| 362 |  | 
| 363 |    private: | 
| 364 |     intptr_t next_id; | 
| 365 |   }; | 
| 366 |  | 
| 367 |   Thread* thread_; | 
| 368 |  | 
| 369 |   // Which mode to generate code for (ASCII or UC16). | 
| 370 |   Mode mode_; | 
| 371 |  | 
| 372 |   // Which specific string class to generate code for. | 
| 373 |   intptr_t specialization_cid_; | 
| 374 |  | 
| 375 |   // Block entries used internally. | 
| 376 |   GraphEntryInstr* entry_block_; | 
| 377 |   JoinEntryInstr* start_block_; | 
| 378 |   JoinEntryInstr* success_block_; | 
| 379 |   JoinEntryInstr* exit_block_; | 
| 380 |  | 
| 381 |   // Shared backtracking block. | 
| 382 |   JoinEntryInstr* backtrack_block_; | 
| 383 |   // Single indirect goto instruction which performs all backtracking. | 
| 384 |   IndirectGotoInstr* backtrack_goto_; | 
| 385 |  | 
| 386 |   const ParsedFunction* parsed_function_; | 
| 387 |   const ZoneGrowableArray<const ICData*>& ic_data_array_; | 
| 388 |  | 
| 389 |   // All created blocks are contained within this set. Used for printing | 
| 390 |   // the generated code. | 
| 391 |   GrowableArray<BlockEntryInstr*> blocks_; | 
| 392 |  | 
| 393 |   // The current instruction to link to when new code is emitted. | 
| 394 |   Instruction* current_instruction_; | 
| 395 |  | 
| 396 |   // A list, acting as the runtime stack for both backtrack locations and | 
| 397 |   // stored positions within the string. | 
| 398 |   LocalVariable* stack_; | 
| 399 |   LocalVariable* stack_pointer_; | 
| 400 |  | 
| 401 |   // Stores the current character within the string. | 
| 402 |   LocalVariable* current_character_; | 
| 403 |  | 
| 404 |   // Stores the current location within the string as a negative offset | 
| 405 |   // from the end of the string. | 
| 406 |   LocalVariable* current_position_; | 
| 407 |  | 
| 408 |   // The string being processed, passed as a function parameter. | 
| 409 |   LocalVariable* string_param_; | 
| 410 |  | 
| 411 |   // Stores the length of string_param_. | 
| 412 |   LocalVariable* string_param_length_; | 
| 413 |  | 
| 414 |   // The start index within the string, passed as a function parameter. | 
| 415 |   LocalVariable* start_index_param_; | 
| 416 |  | 
| 417 |   // An assortment of utility variables. | 
| 418 |   LocalVariable* capture_length_; | 
| 419 |   LocalVariable* match_start_index_; | 
| 420 |   LocalVariable* capture_start_index_; | 
| 421 |   LocalVariable* match_end_index_; | 
| 422 |   LocalVariable* char_in_capture_; | 
| 423 |   LocalVariable* char_in_match_; | 
| 424 |   LocalVariable* index_temp_; | 
| 425 |  | 
| 426 |   LocalVariable* result_; | 
| 427 |  | 
| 428 |   // Stored positions containing group bounds. Generated as needed. | 
| 429 |   LocalVariable* registers_; | 
| 430 |   intptr_t registers_count_; | 
| 431 |   const intptr_t saved_registers_count_; | 
| 432 |  | 
| 433 |   // The actual array objects used for the stack and registers. | 
| 434 |   Array& stack_array_cell_; | 
| 435 |   TypedData& registers_array_; | 
| 436 |  | 
| 437 |   IdAllocator block_id_; | 
| 438 |   IdAllocator temp_id_; | 
| 439 |   IdAllocator local_id_; | 
| 440 |   IdAllocator indirect_id_; | 
| 441 | }; | 
| 442 |  | 
| 443 | }  // namespace dart | 
| 444 |  | 
| 445 | #endif  // RUNTIME_VM_REGEXP_ASSEMBLER_IR_H_ | 
| 446 |  |