| 1 | /* |
| 2 | * Copyright (c) 2015-2018, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | /** \file |
| 30 | * \brief Compiler front-end interface. |
| 31 | */ |
| 32 | #include "allocator.h" |
| 33 | #include "asserts.h" |
| 34 | #include "compiler.h" |
| 35 | #include "crc32.h" |
| 36 | #include "database.h" |
| 37 | #include "grey.h" |
| 38 | #include "hs_internal.h" |
| 39 | #include "hs_runtime.h" |
| 40 | #include "ue2common.h" |
| 41 | #include "nfagraph/ng_builder.h" |
| 42 | #include "nfagraph/ng_dump.h" |
| 43 | #include "nfagraph/ng.h" |
| 44 | #include "nfagraph/ng_util.h" |
| 45 | #include "parser/buildstate.h" |
| 46 | #include "parser/dump.h" |
| 47 | #include "parser/Component.h" |
| 48 | #include "parser/logical_combination.h" |
| 49 | #include "parser/parse_error.h" |
| 50 | #include "parser/Parser.h" // for flags |
| 51 | #include "parser/position.h" |
| 52 | #include "parser/position_dump.h" |
| 53 | #include "parser/position_info.h" |
| 54 | #include "parser/prefilter.h" |
| 55 | #include "parser/shortcut_literal.h" |
| 56 | #include "parser/unsupported.h" |
| 57 | #include "parser/utf8_validate.h" |
| 58 | #include "rose/rose_build.h" |
| 59 | #include "som/slot_manager_dump.h" |
| 60 | #include "util/bytecode_ptr.h" |
| 61 | #include "util/compile_error.h" |
| 62 | #include "util/target_info.h" |
| 63 | #include "util/verify_types.h" |
| 64 | |
| 65 | #include <algorithm> |
| 66 | #include <cassert> |
| 67 | #include <cstdlib> |
| 68 | #include <cstring> |
| 69 | #include <fstream> |
| 70 | #include <memory> |
| 71 | #include <sstream> |
| 72 | |
| 73 | using namespace std; |
| 74 | |
| 75 | namespace ue2 { |
| 76 | |
| 77 | static |
| 78 | void validateExt(const hs_expr_ext &ext) { |
| 79 | static const unsigned long long ALL_EXT_FLAGS = HS_EXT_FLAG_MIN_OFFSET | |
| 80 | HS_EXT_FLAG_MAX_OFFSET | |
| 81 | HS_EXT_FLAG_MIN_LENGTH | |
| 82 | HS_EXT_FLAG_EDIT_DISTANCE | |
| 83 | HS_EXT_FLAG_HAMMING_DISTANCE; |
| 84 | if (ext.flags & ~ALL_EXT_FLAGS) { |
| 85 | throw CompileError("Invalid hs_expr_ext flag set." ); |
| 86 | } |
| 87 | |
| 88 | if ((ext.flags & HS_EXT_FLAG_MIN_OFFSET) && |
| 89 | (ext.flags & HS_EXT_FLAG_MAX_OFFSET) && |
| 90 | (ext.min_offset > ext.max_offset)) { |
| 91 | throw CompileError("In hs_expr_ext, min_offset must be less than or " |
| 92 | "equal to max_offset." ); |
| 93 | } |
| 94 | |
| 95 | if ((ext.flags & HS_EXT_FLAG_MIN_LENGTH) && |
| 96 | (ext.flags & HS_EXT_FLAG_MAX_OFFSET) && |
| 97 | (ext.min_length > ext.max_offset)) { |
| 98 | throw CompileError("In hs_expr_ext, min_length must be less than or " |
| 99 | "equal to max_offset." ); |
| 100 | } |
| 101 | |
| 102 | if ((ext.flags & HS_EXT_FLAG_EDIT_DISTANCE) && |
| 103 | (ext.flags & HS_EXT_FLAG_HAMMING_DISTANCE)) { |
| 104 | throw CompileError("In hs_expr_ext, cannot have both edit distance and " |
| 105 | "Hamming distance." ); |
| 106 | } |
| 107 | |
| 108 | } |
| 109 | |
| 110 | ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, |
| 111 | unsigned flags, ReportID report, |
| 112 | const hs_expr_ext *ext) |
| 113 | : expr(index_in, flags & HS_FLAG_ALLOWEMPTY, flags & HS_FLAG_SINGLEMATCH, |
| 114 | false, flags & HS_FLAG_PREFILTER, SOM_NONE, report, 0, MAX_OFFSET, |
| 115 | 0, 0, 0, flags & HS_FLAG_QUIET) { |
| 116 | // We disallow SOM + Quiet. |
| 117 | if ((flags & HS_FLAG_QUIET) && (flags & HS_FLAG_SOM_LEFTMOST)) { |
| 118 | throw CompileError("HS_FLAG_QUIET is not supported in " |
| 119 | "combination with HS_FLAG_SOM_LEFTMOST." ); |
| 120 | } |
| 121 | flags &= ~HS_FLAG_QUIET; |
| 122 | ParseMode mode(flags); |
| 123 | |
| 124 | component = parse(expression, mode); |
| 125 | |
| 126 | expr.utf8 = mode.utf8; /* utf8 may be set by parse() */ |
| 127 | |
| 128 | const size_t len = strlen(expression); |
| 129 | if (expr.utf8 && !isValidUtf8(expression, len)) { |
| 130 | throw ParseError("Expression is not valid UTF-8." ); |
| 131 | } |
| 132 | |
| 133 | if (!component) { |
| 134 | assert(0); // parse() should have thrown a ParseError. |
| 135 | throw ParseError("Parse error." ); |
| 136 | } |
| 137 | |
| 138 | if (flags & ~HS_FLAG_ALL) { |
| 139 | DEBUG_PRINTF("Unrecognised flag, flags=%u.\n" , flags); |
| 140 | throw CompileError("Unrecognised flag." ); |
| 141 | } |
| 142 | |
| 143 | // FIXME: we disallow highlander + SOM, see UE-1850. |
| 144 | if ((flags & HS_FLAG_SINGLEMATCH) && (flags & HS_FLAG_SOM_LEFTMOST)) { |
| 145 | throw CompileError("HS_FLAG_SINGLEMATCH is not supported in " |
| 146 | "combination with HS_FLAG_SOM_LEFTMOST." ); |
| 147 | } |
| 148 | |
| 149 | // FIXME: we disallow prefilter + SOM, see UE-1899. |
| 150 | if ((flags & HS_FLAG_PREFILTER) && (flags & HS_FLAG_SOM_LEFTMOST)) { |
| 151 | throw CompileError("HS_FLAG_PREFILTER is not supported in " |
| 152 | "combination with HS_FLAG_SOM_LEFTMOST." ); |
| 153 | } |
| 154 | |
| 155 | // Set SOM type. |
| 156 | if (flags & HS_FLAG_SOM_LEFTMOST) { |
| 157 | expr.som = SOM_LEFT; |
| 158 | } |
| 159 | |
| 160 | // Set extended parameters, if we have them. |
| 161 | if (ext) { |
| 162 | // Ensure that the given parameters make sense. |
| 163 | validateExt(*ext); |
| 164 | |
| 165 | if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) { |
| 166 | expr.min_offset = ext->min_offset; |
| 167 | } |
| 168 | if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) { |
| 169 | expr.max_offset = ext->max_offset; |
| 170 | } |
| 171 | if (ext->flags & HS_EXT_FLAG_MIN_LENGTH) { |
| 172 | expr.min_length = ext->min_length; |
| 173 | } |
| 174 | if (ext->flags & HS_EXT_FLAG_EDIT_DISTANCE) { |
| 175 | expr.edit_distance = ext->edit_distance; |
| 176 | } |
| 177 | if (ext->flags & HS_EXT_FLAG_HAMMING_DISTANCE) { |
| 178 | expr.hamm_distance = ext->hamming_distance; |
| 179 | } |
| 180 | } |
| 181 | |
| 182 | // These are validated in validateExt, so an error will already have been |
| 183 | // thrown if these conditions don't hold. |
| 184 | assert(expr.max_offset >= expr.min_offset); |
| 185 | assert(expr.max_offset >= expr.min_length); |
| 186 | |
| 187 | // Since prefiltering and SOM aren't supported together, we must squash any |
| 188 | // min_length constraint as well. |
| 189 | if (flags & HS_FLAG_PREFILTER && expr.min_length) { |
| 190 | DEBUG_PRINTF("prefiltering mode: squashing min_length constraint\n" ); |
| 191 | expr.min_length = 0; |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | #if defined(DUMP_SUPPORT) || defined(DEBUG) |
| 196 | /** |
| 197 | * \brief Dumps the parse tree to screen in debug mode and to disk in dump |
| 198 | * mode. |
| 199 | */ |
| 200 | void dumpExpression(UNUSED const ParsedExpression &pe, |
| 201 | UNUSED const char *stage, UNUSED const Grey &grey) { |
| 202 | #if defined(DEBUG) |
| 203 | DEBUG_PRINTF("===== Rule ID: %u (expression index: %u) =====\n" , |
| 204 | pe.expr.report, pe.expr.index); |
| 205 | ostringstream debug_tree; |
| 206 | dumpTree(debug_tree, pe.component.get()); |
| 207 | printf("%s\n" , debug_tree.str().c_str()); |
| 208 | #endif // DEBUG |
| 209 | |
| 210 | #if defined(DUMP_SUPPORT) |
| 211 | if (grey.dumpFlags & Grey::DUMP_PARSE) { |
| 212 | stringstream ss; |
| 213 | ss << grey.dumpPath << "Expr_" << pe.expr.index << "_componenttree_" |
| 214 | << stage << ".txt" ; |
| 215 | ofstream out(ss.str().c_str()); |
| 216 | out << "Component Tree for " << pe.expr.report << endl; |
| 217 | dumpTree(out, pe.component.get()); |
| 218 | if (pe.expr.utf8) { |
| 219 | out << "UTF8 mode" << endl; |
| 220 | } |
| 221 | } |
| 222 | #endif // DEBUG |
| 223 | } |
| 224 | #endif |
| 225 | |
| 226 | /** \brief Run Component tree optimisations on \a expr. */ |
| 227 | static |
| 228 | void optimise(ParsedExpression &pe) { |
| 229 | if (pe.expr.min_length || pe.expr.som) { |
| 230 | return; |
| 231 | } |
| 232 | |
| 233 | DEBUG_PRINTF("optimising\n" ); |
| 234 | pe.component->optimise(true /* root is connected to sds */); |
| 235 | } |
| 236 | |
| 237 | void addExpression(NG &ng, unsigned index, const char *expression, |
| 238 | unsigned flags, const hs_expr_ext *ext, ReportID id) { |
| 239 | assert(expression); |
| 240 | const CompileContext &cc = ng.cc; |
| 241 | DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s'\n" , index, id, flags, |
| 242 | expression); |
| 243 | |
| 244 | if (flags & HS_FLAG_COMBINATION) { |
| 245 | if (flags & ~(HS_FLAG_COMBINATION | HS_FLAG_QUIET | |
| 246 | HS_FLAG_SINGLEMATCH)) { |
| 247 | throw CompileError("only HS_FLAG_QUIET and HS_FLAG_SINGLEMATCH " |
| 248 | "are supported in combination " |
| 249 | "with HS_FLAG_COMBINATION." ); |
| 250 | } |
| 251 | if (flags & HS_FLAG_QUIET) { |
| 252 | DEBUG_PRINTF("skip QUIET logical combination expression %u\n" , id); |
| 253 | } else { |
| 254 | u32 ekey = INVALID_EKEY; |
| 255 | u64a min_offset = 0; |
| 256 | u64a max_offset = MAX_OFFSET; |
| 257 | if (flags & HS_FLAG_SINGLEMATCH) { |
| 258 | ekey = ng.rm.getExhaustibleKey(id); |
| 259 | } |
| 260 | if (ext) { |
| 261 | validateExt(*ext); |
| 262 | if (ext->flags & ~(HS_EXT_FLAG_MIN_OFFSET | |
| 263 | HS_EXT_FLAG_MAX_OFFSET)) { |
| 264 | throw CompileError("only HS_EXT_FLAG_MIN_OFFSET and " |
| 265 | "HS_EXT_FLAG_MAX_OFFSET extra flags " |
| 266 | "are supported in combination " |
| 267 | "with HS_FLAG_COMBINATION." ); |
| 268 | } |
| 269 | if (ext->flags & HS_EXT_FLAG_MIN_OFFSET) { |
| 270 | min_offset = ext->min_offset; |
| 271 | } |
| 272 | if (ext->flags & HS_EXT_FLAG_MAX_OFFSET) { |
| 273 | max_offset = ext->max_offset; |
| 274 | } |
| 275 | } |
| 276 | ng.rm.pl.parseLogicalCombination(id, expression, ekey, min_offset, |
| 277 | max_offset); |
| 278 | DEBUG_PRINTF("parsed logical combination expression %u\n" , id); |
| 279 | } |
| 280 | return; |
| 281 | } |
| 282 | |
| 283 | // Ensure that our pattern isn't too long (in characters). |
| 284 | if (strlen(expression) > cc.grey.limitPatternLength) { |
| 285 | throw CompileError("Pattern length exceeds limit." ); |
| 286 | } |
| 287 | |
| 288 | // Do per-expression processing: errors here will result in an exception |
| 289 | // being thrown up to our caller |
| 290 | ParsedExpression pe(index, expression, flags, id, ext); |
| 291 | dumpExpression(pe, "orig" , cc.grey); |
| 292 | |
| 293 | // Apply prefiltering transformations if desired. |
| 294 | if (pe.expr.prefilter) { |
| 295 | prefilterTree(pe.component, ParseMode(flags)); |
| 296 | dumpExpression(pe, "prefiltered" , cc.grey); |
| 297 | } |
| 298 | |
| 299 | // Expressions containing zero-width assertions and other extended pcre |
| 300 | // types aren't supported yet. This call will throw a ParseError exception |
| 301 | // if the component tree contains such a construct. |
| 302 | checkUnsupported(*pe.component); |
| 303 | |
| 304 | pe.component->checkEmbeddedStartAnchor(true); |
| 305 | pe.component->checkEmbeddedEndAnchor(true); |
| 306 | |
| 307 | if (cc.grey.optimiseComponentTree) { |
| 308 | optimise(pe); |
| 309 | dumpExpression(pe, "opt" , cc.grey); |
| 310 | } |
| 311 | |
| 312 | DEBUG_PRINTF("component=%p, nfaId=%u, reportId=%u\n" , |
| 313 | pe.component.get(), pe.expr.index, pe.expr.report); |
| 314 | |
| 315 | // You can only use the SOM flags if you've also specified an SOM |
| 316 | // precision mode. |
| 317 | if (pe.expr.som != SOM_NONE && cc.streaming && !ng.ssm.somPrecision()) { |
| 318 | throw CompileError("To use a SOM expression flag in streaming mode, " |
| 319 | "an SOM precision mode (e.g. " |
| 320 | "HS_MODE_SOM_HORIZON_LARGE) must be specified." ); |
| 321 | } |
| 322 | |
| 323 | // If this expression is a literal, we can feed it directly to Rose rather |
| 324 | // than building the NFA graph. |
| 325 | if (shortcutLiteral(ng, pe)) { |
| 326 | DEBUG_PRINTF("took literal short cut\n" ); |
| 327 | return; |
| 328 | } |
| 329 | |
| 330 | auto built_expr = buildGraph(ng.rm, cc, pe); |
| 331 | if (!built_expr.g) { |
| 332 | DEBUG_PRINTF("NFA build failed on ID %u, but no exception was " |
| 333 | "thrown.\n" , pe.expr.report); |
| 334 | throw CompileError("Internal error." ); |
| 335 | } |
| 336 | |
| 337 | if (!pe.expr.allow_vacuous && matches_everywhere(*built_expr.g)) { |
| 338 | throw CompileError("Pattern matches empty buffer; use " |
| 339 | "HS_FLAG_ALLOWEMPTY to enable support." ); |
| 340 | } |
| 341 | |
| 342 | if (!ng.addGraph(built_expr.expr, std::move(built_expr.g))) { |
| 343 | DEBUG_PRINTF("NFA addGraph failed on ID %u.\n" , pe.expr.report); |
| 344 | throw CompileError("Error compiling expression." ); |
| 345 | } |
| 346 | } |
| 347 | |
| 348 | static |
| 349 | bytecode_ptr<RoseEngine> generateRoseEngine(NG &ng) { |
| 350 | const u32 minWidth = |
| 351 | ng.minWidth.is_finite() ? verify_u32(ng.minWidth) : ROSE_BOUND_INF; |
| 352 | auto rose = ng.rose->buildRose(minWidth); |
| 353 | |
| 354 | if (!rose) { |
| 355 | DEBUG_PRINTF("error building rose\n" ); |
| 356 | assert(0); |
| 357 | return nullptr; |
| 358 | } |
| 359 | |
| 360 | dumpReportManager(ng.rm, ng.cc.grey); |
| 361 | dumpSomSlotManager(ng.ssm, ng.cc.grey); |
| 362 | dumpSmallWrite(rose.get(), ng.cc.grey); |
| 363 | |
| 364 | return rose; |
| 365 | } |
| 366 | |
| 367 | platform_t target_to_platform(const target_t &target_info) { |
| 368 | platform_t p; |
| 369 | p = 0; |
| 370 | |
| 371 | if (!target_info.has_avx2()) { |
| 372 | p |= HS_PLATFORM_NOAVX2; |
| 373 | } |
| 374 | if (!target_info.has_avx512()) { |
| 375 | p |= HS_PLATFORM_NOAVX512; |
| 376 | } |
| 377 | return p; |
| 378 | } |
| 379 | |
| 380 | /** \brief Encapsulate the given bytecode (RoseEngine) in a newly-allocated |
| 381 | * \ref hs_database, ensuring that it is padded correctly to give cacheline |
| 382 | * alignment. */ |
| 383 | static |
| 384 | hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) { |
| 385 | size_t db_len = sizeof(struct hs_database) + len; |
| 386 | DEBUG_PRINTF("db size %zu\n" , db_len); |
| 387 | DEBUG_PRINTF("db platform %llx\n" , platform); |
| 388 | |
| 389 | struct hs_database *db = (struct hs_database *)hs_database_alloc(db_len); |
| 390 | if (hs_check_alloc(db) != HS_SUCCESS) { |
| 391 | hs_database_free(db); |
| 392 | return nullptr; |
| 393 | } |
| 394 | |
| 395 | // So that none of our database is uninitialized |
| 396 | memset(db, 0, db_len); |
| 397 | |
| 398 | // we need to align things manually |
| 399 | size_t shift = (uintptr_t)db->bytes & 0x3f; |
| 400 | DEBUG_PRINTF("shift is %zu\n" , shift); |
| 401 | |
| 402 | db->bytecode = offsetof(struct hs_database, bytes) - shift; |
| 403 | char *bytecode = (char *)db + db->bytecode; |
| 404 | assert(ISALIGNED_CL(bytecode)); |
| 405 | |
| 406 | db->magic = HS_DB_MAGIC; |
| 407 | db->version = HS_DB_VERSION; |
| 408 | db->length = len; |
| 409 | db->platform = platform; |
| 410 | |
| 411 | // Copy bytecode |
| 412 | memcpy(bytecode, in_bytecode, len); |
| 413 | |
| 414 | db->crc32 = Crc32c_ComputeBuf(0, bytecode, db->length); |
| 415 | return db; |
| 416 | } |
| 417 | |
| 418 | |
| 419 | struct hs_database *build(NG &ng, unsigned int *length) { |
| 420 | assert(length); |
| 421 | |
| 422 | auto rose = generateRoseEngine(ng); |
| 423 | if (!rose) { |
| 424 | throw CompileError("Unable to generate bytecode." ); |
| 425 | } |
| 426 | *length = rose.size(); |
| 427 | if (!*length) { |
| 428 | DEBUG_PRINTF("RoseEngine has zero length\n" ); |
| 429 | assert(0); |
| 430 | throw CompileError("Internal error." ); |
| 431 | } |
| 432 | |
| 433 | const char *bytecode = (const char *)(rose.get()); |
| 434 | const platform_t p = target_to_platform(ng.cc.target_info); |
| 435 | struct hs_database *db = dbCreate(bytecode, *length, p); |
| 436 | if (!db) { |
| 437 | throw CompileError("Could not allocate memory for bytecode." ); |
| 438 | } |
| 439 | |
| 440 | return db; |
| 441 | } |
| 442 | |
| 443 | static |
| 444 | void stripFromPositions(vector<PositionInfo> &v, Position pos) { |
| 445 | auto removed = remove(v.begin(), v.end(), PositionInfo(pos)); |
| 446 | v.erase(removed, v.end()); |
| 447 | } |
| 448 | |
| 449 | static |
| 450 | void connectInitialStates(GlushkovBuildState &bs, |
| 451 | const ParsedExpression &expr) { |
| 452 | vector<PositionInfo> initials = expr.component->first(); |
| 453 | const NFABuilder &builder = bs.getBuilder(); |
| 454 | const Position startState = builder.getStart(); |
| 455 | const Position startDotStarState = builder.getStartDotStar(); |
| 456 | |
| 457 | DEBUG_PRINTF("wiring initials = %s\n" , |
| 458 | dumpPositions(initials.begin(), initials.end()).c_str()); |
| 459 | |
| 460 | vector<PositionInfo> starts = {startState, startDotStarState}; |
| 461 | |
| 462 | // strip start and startDs, which can be present due to boundaries |
| 463 | stripFromPositions(initials, startState); |
| 464 | stripFromPositions(initials, startDotStarState); |
| 465 | |
| 466 | // replace epsilons with accepts |
| 467 | for (const auto &s : initials) { |
| 468 | if (s.pos != GlushkovBuildState::POS_EPSILON) { |
| 469 | continue; |
| 470 | } |
| 471 | |
| 472 | assert(starts.size() == 2); /* start, startds */ |
| 473 | vector<PositionInfo> starts_temp = starts; |
| 474 | starts_temp[0].flags = s.flags; |
| 475 | starts_temp[1].flags = s.flags; |
| 476 | bs.connectAccepts(starts_temp); |
| 477 | } |
| 478 | |
| 479 | if (!initials.empty()) { |
| 480 | bs.connectRegions(starts, initials); |
| 481 | } |
| 482 | } |
| 483 | |
| 484 | static |
| 485 | void connectFinalStates(GlushkovBuildState &bs, const ParsedExpression &expr) { |
| 486 | vector<PositionInfo> finals = expr.component->last(); |
| 487 | |
| 488 | DEBUG_PRINTF("wiring finals = %s\n" , |
| 489 | dumpPositions(finals.begin(), finals.end()).c_str()); |
| 490 | |
| 491 | bs.connectAccepts(finals); |
| 492 | } |
| 493 | |
| 494 | #ifndef NDEBUG |
| 495 | static |
| 496 | bool isSupported(const Component &c) { |
| 497 | try { |
| 498 | checkUnsupported(c); |
| 499 | return true; |
| 500 | } |
| 501 | catch (ParseError &) { |
| 502 | return false; |
| 503 | } |
| 504 | } |
| 505 | #endif |
| 506 | |
| 507 | BuiltExpression buildGraph(ReportManager &rm, const CompileContext &cc, |
| 508 | const ParsedExpression &pe) { |
| 509 | assert(isSupported(*pe.component)); |
| 510 | |
| 511 | const auto builder = makeNFABuilder(rm, cc, pe); |
| 512 | assert(builder); |
| 513 | |
| 514 | // Set up START and ACCEPT states; retrieve the special states |
| 515 | const auto bs = makeGlushkovBuildState(*builder, pe.expr.prefilter); |
| 516 | |
| 517 | // Map position IDs to characters/components |
| 518 | pe.component->notePositions(*bs); |
| 519 | |
| 520 | // Wire the start dotstar state to the firsts |
| 521 | connectInitialStates(*bs, pe); |
| 522 | |
| 523 | DEBUG_PRINTF("wire up body of expr\n" ); |
| 524 | // Build the rest of the FOLLOW set |
| 525 | vector<PositionInfo> initials = {builder->getStartDotStar(), |
| 526 | builder->getStart()}; |
| 527 | pe.component->buildFollowSet(*bs, initials); |
| 528 | |
| 529 | // Wire the lasts to the accept state |
| 530 | connectFinalStates(*bs, pe); |
| 531 | |
| 532 | // Create our edges |
| 533 | bs->buildEdges(); |
| 534 | |
| 535 | BuiltExpression built_expr = builder->getGraph(); |
| 536 | assert(built_expr.g); |
| 537 | |
| 538 | dumpDotWrapper(*built_expr.g, built_expr.expr, "00_before_asserts" , |
| 539 | cc.grey); |
| 540 | removeAssertVertices(rm, *built_expr.g, built_expr.expr); |
| 541 | |
| 542 | return built_expr; |
| 543 | } |
| 544 | |
| 545 | } // namespace ue2 |
| 546 | |