| 1 | /* |
| 2 | * Copyright (c) 2015-2018, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | /** \file |
| 30 | * \brief Rose data structures. |
| 31 | */ |
| 32 | |
| 33 | #ifndef ROSE_INTERNAL_H |
| 34 | #define ROSE_INTERNAL_H |
| 35 | |
| 36 | #include "ue2common.h" |
| 37 | #include "rose_common.h" |
| 38 | #include "util/scatter.h" |
| 39 | |
| 40 | #define ROSE_OFFSET_INVALID 0xffffffff |
| 41 | |
| 42 | // Group constants |
| 43 | typedef u64a rose_group; |
| 44 | |
| 45 | // Delayed literal stuff |
| 46 | #define DELAY_BITS 5 |
| 47 | #define DELAY_SLOT_COUNT (1U << DELAY_BITS) |
| 48 | #define MAX_DELAY (DELAY_SLOT_COUNT - 1) |
| 49 | #define DELAY_MASK (DELAY_SLOT_COUNT - 1) |
| 50 | |
| 51 | /* Allocation of Rose literal ids |
| 52 | * |
| 53 | * The rose literal id space is segmented: |
| 54 | * |
| 55 | * ---- 0 |
| 56 | * | | 'Normal' undelayed literals in either e or f tables |
| 57 | * | | |
| 58 | * | | |
| 59 | * | | |
| 60 | * ---- anchored_base_id |
| 61 | * | | literals from the a table |
| 62 | * | | |
| 63 | * ---- delay_base_id |
| 64 | * | | Delayed version of normal literals |
| 65 | * | | |
| 66 | * ---- literalCount |
| 67 | */ |
| 68 | |
| 69 | /* Rose Literal Sources |
| 70 | * |
| 71 | * Rose currently gets events (mainly roseProcessMatch calls) from a number of |
| 72 | * sources: |
| 73 | * 1) The floating table |
| 74 | * 2) The anchored table |
| 75 | * 3) Delayed literals |
| 76 | * 4) Suffix NFAs |
| 77 | * 5) Literal masks |
| 78 | * 5) End anchored table |
| 79 | * 6) Prefix / Infix nfas |
| 80 | * |
| 81 | * Care is required to ensure that events appear to come into Rose in order |
| 82 | * (or sufficiently ordered for Rose to cope). Generally the progress of the |
| 83 | * floating table is considered the canonical position in the buffer. |
| 84 | * |
| 85 | * Anchored table: |
| 86 | * The anchored table is run before the floating table as nothing in it can |
| 87 | * depend on a floating literal. Order is achieved by two measures: |
| 88 | * a) user matches^1 are logged and held until the floating matcher passes that |
| 89 | * point; |
| 90 | * b) any floating role with an anchored predecessor has a history relationship |
| 91 | * to enforce the ordering. |
| 92 | * |
| 93 | * Delayed literals: |
| 94 | * Delayed literal ordering is handled by delivering any pending delayed |
| 95 | * literals before processing any floating match. |
| 96 | * |
| 97 | * Suffix: |
| 98 | * Suffixes are always pure terminal roles. Prior to raising a match^2, pending |
| 99 | * NFA queues are run to the current point (floating or delayed literal) as |
| 100 | * appropriate. |
| 101 | * |
| 102 | * Literal Masks: |
| 103 | * These are triggered from either floating literals or delayed literals and |
| 104 | * inspect the data behind them. Matches are raised at the same location as the |
| 105 | * trigger literal so there are no ordering issues. Masks are always pure |
| 106 | * terminal roles. |
| 107 | * |
| 108 | * Lookaround: |
| 109 | * These are tests run on receipt of a role that "look around" the match, |
| 110 | * checking characters at nearby offsets against reachability masks. Each role |
| 111 | * can have a list of these lookaround offset/reach pairs, ordered in offset |
| 112 | * order, and any failure will prevent the role from being switched on. Offsets |
| 113 | * are relative to the byte after a literal match, and can be negative. |
| 114 | * |
| 115 | * Prefix / Infix: |
| 116 | * TODO: remember / discuss |
| 117 | * |
| 118 | * End anchored table: |
| 119 | * All user matches occur at the last byte. We do this last, so no problems |
| 120 | * (yippee) |
| 121 | * |
| 122 | * ^1 User matches which occur before any possible match from the other tables |
| 123 | * are not delayed. |
| 124 | * ^2 Queues may also be run to the current location if a queue is full and |
| 125 | * needs to be emptied. |
| 126 | * ^3 There is no need to catch up at the end of a block scan as it contains no |
| 127 | * terminals. |
| 128 | */ |
| 129 | |
| 130 | struct RoseCountingMiracle { |
| 131 | char shufti; /** 1: count shufti class; 0: count a single character */ |
| 132 | u8 count; /** minimum number of occurrences for the counting |
| 133 | * miracle char to kill the leftfix. */ |
| 134 | u8 c; /** character to look for if not shufti */ |
| 135 | u8 poison; /** character not in the shufti mask */ |
| 136 | m128 lo; /** shufti lo mask */ |
| 137 | m128 hi; /** shufti hi mask */ |
| 138 | }; |
| 139 | |
| 140 | struct LeftNfaInfo { |
| 141 | u32 maxQueueLen; |
| 142 | u32 maxLag; // maximum of successor roles' lag |
| 143 | u32 lagIndex; // iff lag != 0, index into leftfixLagTable |
| 144 | u32 stopTable; // stop table index, or ROSE_OFFSET_INVALID |
| 145 | u8 transient; /**< 0 if not transient, else max width of transient prefix */ |
| 146 | char infix; /* TODO: make flags */ |
| 147 | char eager; /**< nfa should be run eagerly to first match or death */ |
| 148 | char eod_check; /**< nfa is used by the event eod literal */ |
| 149 | u32 countingMiracleOffset; /** if not 0, offset to RoseCountingMiracle. */ |
| 150 | rose_group squash_mask; /* & mask applied when rose nfa dies */ |
| 151 | }; |
| 152 | |
| 153 | struct NfaInfo { |
| 154 | u32 nfaOffset; |
| 155 | u32 stateOffset; |
| 156 | u32 fullStateOffset; /* offset in scratch, relative to ??? */ |
| 157 | u32 ekeyListOffset; /* suffix, relative to base of rose, 0 if no ekeys */ |
| 158 | u8 no_retrigger; /* TODO */ |
| 159 | u8 in_sbmatcher; /**< this outfix should not be run in small-block |
| 160 | * execution, as it will be handled by the sbmatcher |
| 161 | * HWLM table. */ |
| 162 | u8 eod; /* suffix is triggered by the etable --> can only produce eod |
| 163 | * matches */ |
| 164 | }; |
| 165 | |
| 166 | #define MAX_STORED_LEFTFIX_LAG 127 /* max leftfix lag that we can store in one |
| 167 | * whole byte (OWB) (streaming only). Other |
| 168 | * values in OWB are reserved for zombie |
| 169 | * status */ |
| 170 | #define OWB_ZOMBIE_ALWAYS_YES 128 /* nfa will always answer yes to any rose |
| 171 | * prefix checks */ |
| 172 | |
| 173 | /* offset of the status flags in the stream state. */ |
| 174 | #define ROSE_STATE_OFFSET_STATUS_FLAGS 0 |
| 175 | |
| 176 | /* offset of role mmbit in stream state (just after the status flag byte). */ |
| 177 | #define ROSE_STATE_OFFSET_ROLE_MMBIT sizeof(u8) |
| 178 | |
| 179 | /** |
| 180 | * \brief Rose state offsets. |
| 181 | * |
| 182 | * Stores pre-calculated offsets (in bytes) to MOST of the state structures |
| 183 | * used by Rose, relative to the start of stream state. |
| 184 | * |
| 185 | * State not covered by this structure includes: |
| 186 | * |
| 187 | * -# the first byte, containing the status bitmask |
| 188 | * -# the role state multibit |
| 189 | */ |
| 190 | struct RoseStateOffsets { |
| 191 | /** History buffer. |
| 192 | * |
| 193 | * Max size of history is RoseEngine::historyRequired. */ |
| 194 | u32 history; |
| 195 | |
| 196 | /** Exhausted multibit. |
| 197 | * |
| 198 | * entry per exhaustible key (used by Highlander mode). If a bit is set, |
| 199 | * reports with that ekey should not be delivered to the user. */ |
| 200 | u32 exhausted; |
| 201 | |
| 202 | /** size in bytes of exhausted multibit */ |
| 203 | u32 exhausted_size; |
| 204 | |
| 205 | /** Logical multibit. |
| 206 | * |
| 207 | * entry per logical key(operand/operator) (used by Logical Combination). */ |
| 208 | u32 logicalVec; |
| 209 | |
| 210 | /** size in bytes of logical multibit */ |
| 211 | u32 logicalVec_size; |
| 212 | |
| 213 | /** Combination multibit. |
| 214 | * |
| 215 | * entry per combination key (used by Logical Combination). */ |
| 216 | u32 combVec; |
| 217 | |
| 218 | /** size in bytes of combination multibit */ |
| 219 | u32 combVec_size; |
| 220 | |
| 221 | /** Multibit for active suffix/outfix engines. */ |
| 222 | u32 activeLeafArray; |
| 223 | |
| 224 | /** Size of multibit for active suffix/outfix engines in bytes. */ |
| 225 | u32 activeLeafArray_size; |
| 226 | |
| 227 | /** Multibit for active leftfix (prefix/infix) engines. */ |
| 228 | u32 activeLeftArray; |
| 229 | |
| 230 | /** Size of multibit for active leftfix (prefix/infix) engines in bytes. */ |
| 231 | u32 activeLeftArray_size; |
| 232 | |
| 233 | /** Table of lag information (stored as one byte per engine) for active |
| 234 | * Rose leftfix engines. */ |
| 235 | u32 leftfixLagTable; |
| 236 | |
| 237 | /** State for anchored matchers (McClellan DFAs). */ |
| 238 | u32 anchorState; |
| 239 | |
| 240 | /** Packed Rose groups value. */ |
| 241 | u32 groups; |
| 242 | |
| 243 | /** Size of packed Rose groups value, in bytes. */ |
| 244 | u32 groups_size; |
| 245 | |
| 246 | /** State for long literal support. */ |
| 247 | u32 longLitState; |
| 248 | |
| 249 | /** Size of the long literal state. */ |
| 250 | u32 longLitState_size; |
| 251 | |
| 252 | /** Packed SOM location slots. */ |
| 253 | u32 somLocation; |
| 254 | |
| 255 | /** Multibit guarding SOM location slots. */ |
| 256 | u32 somValid; |
| 257 | |
| 258 | /** Multibit guarding SOM location slots. */ |
| 259 | u32 somWritable; |
| 260 | |
| 261 | /** Size of each of the somValid and somWritable multibits, in bytes. */ |
| 262 | u32 somMultibit_size; |
| 263 | |
| 264 | /** Begin of the region where NFA engine state is stored. |
| 265 | * The NFA state region extends to end. */ |
| 266 | u32 nfaStateBegin; |
| 267 | |
| 268 | /** Total size of Rose state, in bytes. */ |
| 269 | u32 end; |
| 270 | }; |
| 271 | |
| 272 | struct RoseBoundaryReports { |
| 273 | /** \brief 0 if no reports list, otherwise offset of program to run to |
| 274 | * deliver reports at EOD. */ |
| 275 | u32 reportEodOffset; |
| 276 | |
| 277 | /** \brief 0 if no reports list, otherwise offset of program to run to |
| 278 | * deliver reports at offset 0. */ |
| 279 | u32 reportZeroOffset; |
| 280 | |
| 281 | /** \brief 0 if no reports list, otherwise offset of program to run to |
| 282 | * deliver reports if EOD is at offset 0. Superset of other programs. */ |
| 283 | u32 reportZeroEodOffset; |
| 284 | }; |
| 285 | |
| 286 | /* NFA Queue Assignment |
| 287 | * |
| 288 | * --- 0 |
| 289 | * (|) chained mpv (if present) |
| 290 | * # |
| 291 | * --- outfixBeginQueue - |
| 292 | * | outfixes. enabled at offset 0. |
| 293 | * | |
| 294 | * # |
| 295 | * --- outfixEndQueue - |
| 296 | * | suffixes. enabled by rose roles. |
| 297 | * | |
| 298 | * # |
| 299 | * --- leftfixBeginQueue - |
| 300 | * | prefixes |
| 301 | * | |
| 302 | * # |
| 303 | * --- ? |
| 304 | * | infixes |
| 305 | * | |
| 306 | * # |
| 307 | */ |
| 308 | |
| 309 | #define ROSE_RUNTIME_FULL_ROSE 0 |
| 310 | #define ROSE_RUNTIME_PURE_LITERAL 1 |
| 311 | #define ROSE_RUNTIME_SINGLE_OUTFIX 2 |
| 312 | |
| 313 | /** |
| 314 | * \brief Runtime structure header for Rose. |
| 315 | * |
| 316 | * Runtime structure header for Rose. |
| 317 | * In memory, we follow this with: |
| 318 | * -# the "engine blob" |
| 319 | * -# anchored 'literal' matcher table |
| 320 | * -# floating literal matcher table |
| 321 | * -# eod-anchored literal matcher table |
| 322 | * -# small block table |
| 323 | * -# array of NFA offsets, one per queue |
| 324 | * -# array of state offsets, one per queue (+) |
| 325 | * |
| 326 | * (+) stateOffset array note: Offsets in the array are either into the stream |
| 327 | * state (normal case) or into the tstate region of scratch (for transient rose |
| 328 | * nfas). Rose nfa info table can distinguish the cases. |
| 329 | */ |
| 330 | struct RoseEngine { |
| 331 | u8 noFloatingRoots; /* only need to run the anchored table if something |
| 332 | * matched in the anchored table */ |
| 333 | u8 requiresEodCheck; /* stuff happens at eod time */ |
| 334 | u8 hasOutfixesInSmallBlock; /**< has at least one outfix that must run even |
| 335 | in small block scans. */ |
| 336 | u8 runtimeImpl; /**< can we just run the floating table or a single outfix? |
| 337 | * or do we need a full rose? */ |
| 338 | u8 mpvTriggeredByLeaf; /**< need to check (suf|out)fixes for mpv trigger */ |
| 339 | u8 canExhaust; /**< every pattern has an exhaustion key */ |
| 340 | u8 hasSom; /**< has at least one pattern which tracks SOM. */ |
| 341 | u8 somHorizon; /**< width in bytes of SOM offset storage (governed by |
| 342 | SOM precision) */ |
| 343 | u32 mode; /**< scanning mode, one of HS_MODE_{BLOCK,STREAM,VECTORED} */ |
| 344 | u32 historyRequired; /**< max amount of history required for streaming */ |
| 345 | u32 ekeyCount; /**< number of exhaustion keys */ |
| 346 | u32 lkeyCount; /**< number of logical keys */ |
| 347 | u32 lopCount; /**< number of logical ops */ |
| 348 | u32 ckeyCount; /**< number of combination keys */ |
| 349 | u32 logicalTreeOffset; /**< offset to mapping from lkey to LogicalOp */ |
| 350 | u32 combInfoMapOffset; /**< offset to mapping from ckey to combInfo */ |
| 351 | u32 dkeyCount; /**< number of dedupe keys */ |
| 352 | u32 dkeyLogSize; /**< size of fatbit for storing dkey log (bytes) */ |
| 353 | u32 invDkeyOffset; /**< offset to table mapping from dkeys to the external |
| 354 | * report ids */ |
| 355 | u32 somLocationCount; /**< number of som locations required */ |
| 356 | u32 somLocationFatbitSize; /**< size of SOM location fatbit (bytes) */ |
| 357 | u32 rolesWithStateCount; // number of roles with entries in state bitset |
| 358 | u32 stateSize; /* size of the state bitset |
| 359 | * WARNING: not the size of the rose state */ |
| 360 | u32 anchorStateSize; /* size of the state for the anchor dfas */ |
| 361 | u32 tStateSize; /* total size of the state for transient rose nfas */ |
| 362 | u32 scratchStateSize; /**< uncompressed state req'd for NFAs in scratch; |
| 363 | * used for sizing scratch only. */ |
| 364 | u32 smallWriteOffset; /**< offset of small-write matcher */ |
| 365 | u32 amatcherOffset; // offset of the anchored literal matcher (bytes) |
| 366 | u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes) |
| 367 | u32 fmatcherOffset; // offset of the floating literal matcher (bytes) |
| 368 | u32 drmatcherOffset; // offset of the delayed rebuild table (bytes) |
| 369 | u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes) |
| 370 | u32 longLitTableOffset; // offset of the long literal table |
| 371 | u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern |
| 372 | * involved with the anchored table to produce a full |
| 373 | * match. */ |
| 374 | u32 fmatcherMinWidth; /**< minimum number of bytes required for a pattern |
| 375 | * involved with the floating table to produce a full |
| 376 | * match. */ |
| 377 | u32 eodmatcherMinWidth; /**< minimum number of bytes required for a pattern |
| 378 | * involved with the eod table to produce a full |
| 379 | * match. */ |
| 380 | u32 amatcherMaxBiAnchoredWidth; /**< maximum number of bytes that can still |
| 381 | * produce a match for a pattern involved |
| 382 | * with the anchored table. */ |
| 383 | u32 fmatcherMaxBiAnchoredWidth; /**< maximum number of bytes that can still |
| 384 | * produce a match for a pattern involved |
| 385 | * with the anchored table. */ |
| 386 | |
| 387 | /** |
| 388 | * \brief Offset of u32 array of program offsets for reports used by |
| 389 | * output-exposed engines. |
| 390 | */ |
| 391 | u32 reportProgramOffset; |
| 392 | |
| 393 | /** |
| 394 | * \brief Number of programs for reports used by output-exposed engines. |
| 395 | */ |
| 396 | u32 reportProgramCount; |
| 397 | |
| 398 | /** |
| 399 | * \brief Offset of u32 array of program offsets for delayed replay of |
| 400 | * literals. |
| 401 | */ |
| 402 | u32 delayProgramOffset; |
| 403 | |
| 404 | /** |
| 405 | * \brief Offset of u32 array of program offsets for anchored literals. |
| 406 | */ |
| 407 | u32 anchoredProgramOffset; |
| 408 | |
| 409 | u32 activeArrayCount; //number of nfas tracked in the active array |
| 410 | u32 activeLeftCount; //number of nfas tracked in the active rose array |
| 411 | u32 queueCount; /**< number of nfa queues */ |
| 412 | u32 activeQueueArraySize; //!< size of fatbit for active queues (bytes) |
| 413 | |
| 414 | u32 eagerIterOffset; /**< offset to sparse iter for eager prefixes or 0 if |
| 415 | * none */ |
| 416 | |
| 417 | /** \brief Number of keys used by CHECK_SET_HANDLED instructions in role |
| 418 | * programs. */ |
| 419 | u32 handledKeyCount; |
| 420 | |
| 421 | /** \brief Size of the handled keys fatbit in scratch (bytes). */ |
| 422 | u32 handledKeyFatbitSize; |
| 423 | |
| 424 | u32 leftOffset; |
| 425 | u32 roseCount; |
| 426 | |
| 427 | u32 eodProgramOffset; //!< EOD program, otherwise 0. |
| 428 | u32 flushCombProgramOffset; /**< FlushCombination program, otherwise 0 */ |
| 429 | |
| 430 | u32 lastByteHistoryIterOffset; // if non-zero |
| 431 | |
| 432 | /** \brief Minimum number of bytes required to match. */ |
| 433 | u32 minWidth; |
| 434 | |
| 435 | /** \brief Minimum number of bytes required to match, excluding boundary |
| 436 | * reports. */ |
| 437 | u32 minWidthExcludingBoundaries; |
| 438 | |
| 439 | u32 maxBiAnchoredWidth; /* ROSE_BOUND_INF if any non bianchored patterns |
| 440 | * present */ |
| 441 | u32 anchoredDistance; // region to run the anchored table over |
| 442 | u32 anchoredMinDistance; /* start of region to run anchored table over */ |
| 443 | u32 floatingDistance; /* end of region to run the floating table over |
| 444 | ROSE_BOUND_INF if not bounded */ |
| 445 | u32 floatingMinDistance; /* start of region to run floating table over */ |
| 446 | u32 smallBlockDistance; /* end of region to run the floating table over |
| 447 | ROSE_BOUND_INF if not bounded */ |
| 448 | u32 floatingMinLiteralMatchOffset; /* the minimum offset that we can get a |
| 449 | * 'valid' match from the floating |
| 450 | * table */ |
| 451 | u32 nfaInfoOffset; /* offset to the nfa info offset array */ |
| 452 | rose_group initialGroups; |
| 453 | rose_group floating_group_mask; /* groups that are used by the ftable */ |
| 454 | u32 size; // (bytes) |
| 455 | u32 delay_count; /* number of delayed literal ids. */ |
| 456 | u32 delay_fatbit_size; //!< size of each delay fatbit in scratch (bytes) |
| 457 | u32 anchored_count; /* number of anchored literal ids */ |
| 458 | u32 anchored_fatbit_size; //!< size of each anch fatbit in scratch (bytes) |
| 459 | u32 maxFloatingDelayedMatch; /* max offset that a delayed literal can |
| 460 | * usefully be reported */ |
| 461 | u32 delayRebuildLength; /* length of the history region which needs to be |
| 462 | * rescanned when we are doing a delayed literal |
| 463 | * rebuild scan. */ |
| 464 | struct RoseStateOffsets stateOffsets; |
| 465 | struct RoseBoundaryReports boundary; |
| 466 | u32 totalNumLiterals; /* total number of literals including dr */ |
| 467 | u32 asize; /* size of the atable */ |
| 468 | u32 outfixBeginQueue; /* first outfix queue */ |
| 469 | u32 outfixEndQueue; /* one past the last outfix queue */ |
| 470 | u32 leftfixBeginQueue; /* first prefix/infix queue */ |
| 471 | u32 initMpvNfa; /* (allegedly chained) mpv to force on at init */ |
| 472 | u32 rosePrefixCount; /* number of rose prefixes */ |
| 473 | u32 activeLeftIterOffset; /* mmbit_sparse_iter over non-transient roses */ |
| 474 | u32 ematcherRegionSize; /* max region size to pass to ematcher */ |
| 475 | u32 somRevCount; /**< number of som reverse nfas */ |
| 476 | u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */ |
| 477 | u32 longLitStreamState; // size in bytes |
| 478 | |
| 479 | struct scatter_full_plan state_init; |
| 480 | }; |
| 481 | |
| 482 | struct ALIGN_CL_DIRECTIVE anchored_matcher_info { |
| 483 | u32 next_offset; /* relative to this, 0 for end */ |
| 484 | u32 state_offset; /* relative to anchorState */ |
| 485 | u32 anchoredMinDistance; /* start of region to run anchored table over */ |
| 486 | }; |
| 487 | |
| 488 | /** |
| 489 | * \brief Long literal subtable for a particular mode (caseful or nocase). |
| 490 | */ |
| 491 | struct RoseLongLitSubtable { |
| 492 | /** |
| 493 | * \brief Offset of the hash table (relative to RoseLongLitTable base). |
| 494 | * |
| 495 | * Offset is zero if no such table exists. |
| 496 | */ |
| 497 | u32 hashOffset; |
| 498 | |
| 499 | /** |
| 500 | * \brief Offset of the bloom filter (relative to RoseLongLitTable base). |
| 501 | * |
| 502 | * Offset is zero if no such table exists. |
| 503 | */ |
| 504 | u32 bloomOffset; |
| 505 | |
| 506 | /** \brief lg2 of the size of the hash table. */ |
| 507 | u8 hashBits; |
| 508 | |
| 509 | /** \brief Size of the bloom filter in bits. */ |
| 510 | u8 bloomBits; |
| 511 | |
| 512 | /** \brief Number of bits of packed stream state used. */ |
| 513 | u8 streamStateBits; |
| 514 | }; |
| 515 | |
| 516 | /** |
| 517 | * \brief Long literal table header. |
| 518 | */ |
| 519 | struct RoseLongLitTable { |
| 520 | /** |
| 521 | * \brief Total size of the whole table (including strings, bloom filters, |
| 522 | * hash tables). |
| 523 | */ |
| 524 | u32 size; |
| 525 | |
| 526 | /** \brief Caseful sub-table (hash table and bloom filter). */ |
| 527 | struct RoseLongLitSubtable caseful; |
| 528 | |
| 529 | /** \brief Caseless sub-table (hash table and bloom filter). */ |
| 530 | struct RoseLongLitSubtable nocase; |
| 531 | |
| 532 | /** \brief Total size of packed stream state in bytes. */ |
| 533 | u8 streamStateBytes; |
| 534 | |
| 535 | /** \brief Max length of literal prefixes. */ |
| 536 | u8 maxLen; |
| 537 | }; |
| 538 | |
| 539 | /** |
| 540 | * \brief One of these structures per hash table entry in our long literal |
| 541 | * table. |
| 542 | */ |
| 543 | struct RoseLongLitHashEntry { |
| 544 | /** |
| 545 | * \brief Offset of the literal string itself, relative to |
| 546 | * RoseLongLitTable base. Zero if this bucket is empty. |
| 547 | */ |
| 548 | u32 str_offset; |
| 549 | |
| 550 | /** \brief Length of the literal string. */ |
| 551 | u32 str_len; |
| 552 | }; |
| 553 | |
| 554 | static really_inline |
| 555 | const struct anchored_matcher_info *getALiteralMatcher( |
| 556 | const struct RoseEngine *t) { |
| 557 | if (!t->amatcherOffset) { |
| 558 | return NULL; |
| 559 | } |
| 560 | |
| 561 | const char *lt = (const char *)t + t->amatcherOffset; |
| 562 | assert(ISALIGNED_CL(lt)); |
| 563 | return (const struct anchored_matcher_info *)lt; |
| 564 | } |
| 565 | |
| 566 | struct HWLM; |
| 567 | |
| 568 | static really_inline |
| 569 | const struct HWLM *getFLiteralMatcher(const struct RoseEngine *t) { |
| 570 | if (!t->fmatcherOffset) { |
| 571 | return NULL; |
| 572 | } |
| 573 | |
| 574 | const char *lt = (const char *)t + t->fmatcherOffset; |
| 575 | assert(ISALIGNED_CL(lt)); |
| 576 | return (const struct HWLM *)lt; |
| 577 | } |
| 578 | |
| 579 | static really_inline |
| 580 | const void *getSBLiteralMatcher(const struct RoseEngine *t) { |
| 581 | if (!t->sbmatcherOffset) { |
| 582 | return NULL; |
| 583 | } |
| 584 | |
| 585 | const char *matcher = (const char *)t + t->sbmatcherOffset; |
| 586 | assert(ISALIGNED_N(matcher, 8)); |
| 587 | return matcher; |
| 588 | } |
| 589 | |
| 590 | static really_inline |
| 591 | const struct LeftNfaInfo *getLeftTable(const struct RoseEngine *t) { |
| 592 | const struct LeftNfaInfo *r |
| 593 | = (const struct LeftNfaInfo *)((const char *)t + t->leftOffset); |
| 594 | assert(ISALIGNED_N(r, 4)); |
| 595 | return r; |
| 596 | } |
| 597 | |
| 598 | struct mmbit_sparse_iter; // forward decl |
| 599 | |
| 600 | static really_inline |
| 601 | const struct mmbit_sparse_iter *getActiveLeftIter(const struct RoseEngine *t) { |
| 602 | assert(t->activeLeftIterOffset); |
| 603 | const struct mmbit_sparse_iter *it = (const struct mmbit_sparse_iter *) |
| 604 | ((const char *)t + t->activeLeftIterOffset); |
| 605 | assert(ISALIGNED_N(it, 4)); |
| 606 | return it; |
| 607 | } |
| 608 | |
| 609 | static really_inline |
| 610 | const struct NfaInfo *getNfaInfoByQueue(const struct RoseEngine *t, u32 qi) { |
| 611 | const struct NfaInfo *infos |
| 612 | = (const struct NfaInfo *)((const char *)t + t->nfaInfoOffset); |
| 613 | assert(ISALIGNED_N(infos, sizeof(u32))); |
| 614 | |
| 615 | return &infos[qi]; |
| 616 | } |
| 617 | |
| 618 | static really_inline |
| 619 | const struct NFA *getNfaByInfo(const struct RoseEngine *t, |
| 620 | const struct NfaInfo *info) { |
| 621 | return (const struct NFA *)((const char *)t + info->nfaOffset); |
| 622 | } |
| 623 | |
| 624 | static really_inline |
| 625 | const struct NFA *getNfaByQueue(const struct RoseEngine *t, u32 qi) { |
| 626 | const struct NfaInfo *info = getNfaInfoByQueue(t, qi); |
| 627 | return getNfaByInfo(t, info); |
| 628 | } |
| 629 | |
| 630 | static really_inline |
| 631 | u32 queueToLeftIndex(const struct RoseEngine *t, u32 qi) { |
| 632 | assert(qi >= t->leftfixBeginQueue); |
| 633 | return qi - t->leftfixBeginQueue; |
| 634 | } |
| 635 | |
| 636 | static really_inline |
| 637 | const struct LeftNfaInfo *getLeftInfoByQueue(const struct RoseEngine *t, |
| 638 | u32 qi) { |
| 639 | const struct LeftNfaInfo *infos = getLeftTable(t); |
| 640 | return &infos[queueToLeftIndex(t, qi)]; |
| 641 | } |
| 642 | |
| 643 | struct SmallWriteEngine; |
| 644 | |
| 645 | static really_inline |
| 646 | const struct SmallWriteEngine *getSmallWrite(const struct RoseEngine *t) { |
| 647 | if (!t->smallWriteOffset) { |
| 648 | return NULL; |
| 649 | } |
| 650 | |
| 651 | const struct SmallWriteEngine *smwr = |
| 652 | (const struct SmallWriteEngine *)((const char *)t + t->smallWriteOffset); |
| 653 | return smwr; |
| 654 | } |
| 655 | |
| 656 | #endif // ROSE_INTERNAL_H |
| 657 | |