| 1 | /* |
| 2 | * Copyright (c) 2015-2017, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | /** |
| 30 | * \file |
| 31 | * \brief FDR literal matcher: Teddy build code. |
| 32 | */ |
| 33 | |
| 34 | #include "teddy_compile.h" |
| 35 | |
| 36 | #include "fdr.h" |
| 37 | #include "fdr_internal.h" |
| 38 | #include "fdr_compile_internal.h" |
| 39 | #include "fdr_confirm.h" |
| 40 | #include "fdr_engine_description.h" |
| 41 | #include "teddy_internal.h" |
| 42 | #include "teddy_engine_description.h" |
| 43 | #include "grey.h" |
| 44 | #include "ue2common.h" |
| 45 | #include "hwlm/hwlm_build.h" |
| 46 | #include "util/alloc.h" |
| 47 | #include "util/compare.h" |
| 48 | #include "util/container.h" |
| 49 | #include "util/make_unique.h" |
| 50 | #include "util/noncopyable.h" |
| 51 | #include "util/popcount.h" |
| 52 | #include "util/small_vector.h" |
| 53 | #include "util/target_info.h" |
| 54 | #include "util/verify_types.h" |
| 55 | |
| 56 | #include <algorithm> |
| 57 | #include <cassert> |
| 58 | #include <cctype> |
| 59 | #include <cstdio> |
| 60 | #include <cstdlib> |
| 61 | #include <cstring> |
| 62 | #include <map> |
| 63 | #include <memory> |
| 64 | #include <set> |
| 65 | #include <string> |
| 66 | #include <vector> |
| 67 | |
| 68 | using namespace std; |
| 69 | |
| 70 | namespace ue2 { |
| 71 | |
| 72 | namespace { |
| 73 | |
| 74 | //#define TEDDY_DEBUG |
| 75 | |
| 76 | /** \brief Max number of Teddy masks we use. */ |
| 77 | static constexpr size_t MAX_NUM_MASKS = 4; |
| 78 | |
| 79 | class TeddyCompiler : noncopyable { |
| 80 | const TeddyEngineDescription ŋ |
| 81 | const Grey &grey; |
| 82 | const vector<hwlmLiteral> &lits; |
| 83 | map<BucketIndex, std::vector<LiteralIndex>> bucketToLits; |
| 84 | bool make_small; |
| 85 | |
| 86 | public: |
| 87 | TeddyCompiler(const vector<hwlmLiteral> &lits_in, |
| 88 | map<BucketIndex, std::vector<LiteralIndex>> bucketToLits_in, |
| 89 | const TeddyEngineDescription &eng_in, bool make_small_in, |
| 90 | const Grey &grey_in) |
| 91 | : eng(eng_in), grey(grey_in), lits(lits_in), |
| 92 | bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {} |
| 93 | |
| 94 | bytecode_ptr<FDR> build(); |
| 95 | }; |
| 96 | |
| 97 | class TeddySet { |
| 98 | /** |
| 99 | * \brief Estimate of the max number of literals in a set, used to |
| 100 | * minimise allocations. |
| 101 | */ |
| 102 | static constexpr size_t LITS_PER_SET = 20; |
| 103 | |
| 104 | /** \brief Number of masks. */ |
| 105 | u32 len; |
| 106 | |
| 107 | /** |
| 108 | * \brief A series of bitfields over 16 predicates that represent the |
| 109 | * shufti nibble set. |
| 110 | * |
| 111 | * So for num_masks = 4 we will represent our strings by 8 u16s in the |
| 112 | * vector that indicate what a shufti bucket would have to look like. |
| 113 | */ |
| 114 | small_vector<u16, MAX_NUM_MASKS * 2> nibbleSets; |
| 115 | |
| 116 | /** |
| 117 | * \brief Sorted, unique set of literals. We maintain our own set in a |
| 118 | * sorted vector to minimise allocations. |
| 119 | */ |
| 120 | small_vector<u32, LITS_PER_SET> litIds; |
| 121 | |
| 122 | public: |
| 123 | explicit TeddySet(u32 len_in) : len(len_in), nibbleSets(len_in * 2, 0) {} |
| 124 | size_t litCount() const { return litIds.size(); } |
| 125 | const small_vector<u32, LITS_PER_SET> &getLits() const { return litIds; } |
| 126 | |
| 127 | bool operator<(const TeddySet &s) const { |
| 128 | return litIds < s.litIds; |
| 129 | } |
| 130 | |
| 131 | #ifdef TEDDY_DEBUG |
| 132 | void dump() const { |
| 133 | printf("TS: " ); |
| 134 | for (u32 i = 0; i < nibbleSets.size(); i++) { |
| 135 | printf("%04x " , (u32)nibbleSets[i]); |
| 136 | } |
| 137 | printf("\nnlits: %zu\nLit ids: " , litCount()); |
| 138 | printf("Prob: %llu\n" , probability()); |
| 139 | for (const auto &id : litIds) { |
| 140 | printf("%u " , id); |
| 141 | } |
| 142 | printf("\n" ); |
| 143 | printf("Flood prone : %s\n" , isRunProne() ? "yes" : "no" ); |
| 144 | } |
| 145 | #endif |
| 146 | |
| 147 | bool identicalTail(const TeddySet &ts) const { |
| 148 | return nibbleSets == ts.nibbleSets; |
| 149 | } |
| 150 | |
| 151 | void addLiteral(u32 lit_id, const hwlmLiteral &lit) { |
| 152 | const string &s = lit.s; |
| 153 | for (u32 i = 0; i < len; i++) { |
| 154 | if (i < s.size()) { |
| 155 | u8 c = s[s.size() - i - 1]; |
| 156 | u8 c_hi = (c >> 4) & 0xf; |
| 157 | u8 c_lo = c & 0xf; |
| 158 | nibbleSets[i * 2] = 1 << c_lo; |
| 159 | if (lit.nocase && ourisalpha(c)) { |
| 160 | nibbleSets[i * 2 + 1] = |
| 161 | (1 << (c_hi & 0xd)) | (1 << (c_hi | 0x2)); |
| 162 | } else { |
| 163 | nibbleSets[i * 2 + 1] = 1 << c_hi; |
| 164 | } |
| 165 | } else { |
| 166 | nibbleSets[i * 2] = nibbleSets[i * 2 + 1] = 0xffff; |
| 167 | } |
| 168 | } |
| 169 | litIds.push_back(lit_id); |
| 170 | sort_and_unique(litIds); |
| 171 | } |
| 172 | |
| 173 | // return a value p from 0 .. MAXINT64 that gives p/MAXINT64 |
| 174 | // likelihood of this TeddySet firing a first-stage accept |
| 175 | // if it was given a bucket of its own and random data were |
| 176 | // to be passed in |
| 177 | u64a probability() const { |
| 178 | u64a val = 1; |
| 179 | for (size_t i = 0; i < nibbleSets.size(); i++) { |
| 180 | val *= popcount32((u32)nibbleSets[i]); |
| 181 | } |
| 182 | return val; |
| 183 | } |
| 184 | |
| 185 | // return a score based around the chance of this hitting times |
| 186 | // a small fixed cost + the cost of traversing some sort of followup |
| 187 | // (assumption is that the followup is linear) |
| 188 | u64a heuristic() const { |
| 189 | return probability() * (2 + litCount()); |
| 190 | } |
| 191 | |
| 192 | bool isRunProne() const { |
| 193 | u16 lo_and = 0xffff; |
| 194 | u16 hi_and = 0xffff; |
| 195 | for (u32 i = 0; i < len; i++) { |
| 196 | lo_and &= nibbleSets[i * 2]; |
| 197 | hi_and &= nibbleSets[i * 2 + 1]; |
| 198 | } |
| 199 | // we're not flood-prone if there's no way to get |
| 200 | // through with a flood |
| 201 | if (!lo_and || !hi_and) { |
| 202 | return false; |
| 203 | } |
| 204 | return true; |
| 205 | } |
| 206 | |
| 207 | friend TeddySet merge(const TeddySet &a, const TeddySet &b) { |
| 208 | assert(a.nibbleSets.size() == b.nibbleSets.size()); |
| 209 | |
| 210 | TeddySet m(a); |
| 211 | |
| 212 | for (size_t i = 0; i < m.nibbleSets.size(); i++) { |
| 213 | m.nibbleSets[i] |= b.nibbleSets[i]; |
| 214 | } |
| 215 | |
| 216 | m.litIds.insert(m.litIds.end(), b.litIds.begin(), b.litIds.end()); |
| 217 | sort_and_unique(m.litIds); |
| 218 | |
| 219 | return m; |
| 220 | } |
| 221 | }; |
| 222 | |
| 223 | static |
| 224 | bool pack(const vector<hwlmLiteral> &lits, |
| 225 | const TeddyEngineDescription &eng, |
| 226 | map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits) { |
| 227 | set<TeddySet> sts; |
| 228 | |
| 229 | for (u32 i = 0; i < lits.size(); i++) { |
| 230 | TeddySet ts(eng.numMasks); |
| 231 | ts.addLiteral(i, lits[i]); |
| 232 | sts.insert(ts); |
| 233 | } |
| 234 | |
| 235 | while (1) { |
| 236 | #ifdef TEDDY_DEBUG |
| 237 | printf("Size %zu\n" , sts.size()); |
| 238 | for (const TeddySet &ts : sts) { |
| 239 | printf("\n" ); |
| 240 | ts.dump(); |
| 241 | } |
| 242 | printf("\n===============================================\n" ); |
| 243 | #endif |
| 244 | |
| 245 | auto m1 = sts.end(), m2 = sts.end(); |
| 246 | u64a best = 0xffffffffffffffffULL; |
| 247 | |
| 248 | for (auto i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) { |
| 249 | const TeddySet &s1 = *i1; |
| 250 | for (auto i2 = next(i1), e2 = sts.end(); i2 != e2; ++i2) { |
| 251 | const TeddySet &s2 = *i2; |
| 252 | |
| 253 | // be more conservative if we don't absolutely need to |
| 254 | // keep packing |
| 255 | if ((sts.size() <= eng.getNumBuckets()) && |
| 256 | !s1.identicalTail(s2)) { |
| 257 | continue; |
| 258 | } |
| 259 | |
| 260 | TeddySet tmpSet = merge(s1, s2); |
| 261 | u64a newScore = tmpSet.heuristic(); |
| 262 | u64a oldScore = s1.heuristic() + s2.heuristic(); |
| 263 | if (newScore < oldScore) { |
| 264 | m1 = i1; |
| 265 | m2 = i2; |
| 266 | break; |
| 267 | } else { |
| 268 | u64a score = newScore - oldScore; |
| 269 | bool oldRunProne = s1.isRunProne() && s2.isRunProne(); |
| 270 | bool newRunProne = tmpSet.isRunProne(); |
| 271 | if (newRunProne && !oldRunProne) { |
| 272 | continue; |
| 273 | } |
| 274 | if (score < best) { |
| 275 | best = score; |
| 276 | m1 = i1; |
| 277 | m2 = i2; |
| 278 | } |
| 279 | } |
| 280 | } |
| 281 | } |
| 282 | // if we didn't find a merge candidate, bail out |
| 283 | if ((m1 == sts.end()) || (m2 == sts.end())) { |
| 284 | break; |
| 285 | } |
| 286 | |
| 287 | // do the merge |
| 288 | TeddySet nts = merge(*m1, *m2); |
| 289 | #ifdef TEDDY_DEBUG |
| 290 | printf("Merging\n" ); |
| 291 | printf("m1 = \n" ); |
| 292 | m1->dump(); |
| 293 | printf("m2 = \n" ); |
| 294 | m2->dump(); |
| 295 | printf("nts = \n" ); |
| 296 | nts.dump(); |
| 297 | printf("\n===============================================\n" ); |
| 298 | #endif |
| 299 | sts.erase(m1); |
| 300 | sts.erase(m2); |
| 301 | sts.insert(nts); |
| 302 | } |
| 303 | |
| 304 | if (sts.size() > eng.getNumBuckets()) { |
| 305 | return false; |
| 306 | } |
| 307 | |
| 308 | u32 bucket_id = 0; |
| 309 | for (const TeddySet &ts : sts) { |
| 310 | const auto &ts_lits = ts.getLits(); |
| 311 | auto &bucket_lits = bucketToLits[bucket_id]; |
| 312 | bucket_lits.insert(end(bucket_lits), begin(ts_lits), end(ts_lits)); |
| 313 | bucket_id++; |
| 314 | } |
| 315 | return true; |
| 316 | } |
| 317 | |
| 318 | // this entry has all-zero mask to skip reinforcement |
| 319 | #define NO_REINFORCEMENT N_CHARS |
| 320 | |
| 321 | // this means every entry in reinforcement table |
| 322 | #define ALL_CHAR_SET N_CHARS |
| 323 | |
| 324 | // each item's reinforcement mask has REINFORCED_MSK_LEN bytes |
| 325 | #define REINFORCED_MSK_LEN 8 |
| 326 | |
| 327 | // reinforcement table size for each 8 buckets set |
| 328 | #define RTABLE_SIZE ((N_CHARS + 1) * REINFORCED_MSK_LEN) |
| 329 | |
| 330 | static |
| 331 | void initReinforcedTable(u8 *rmsk) { |
| 332 | u64a *mask = (u64a *)rmsk; |
| 333 | fill_n(mask, N_CHARS, 0x00ffffffffffffffULL); |
| 334 | } |
| 335 | |
| 336 | static |
| 337 | void fillReinforcedMskZero(u8 *rmsk) { |
| 338 | u8 *mc = rmsk + NO_REINFORCEMENT * REINFORCED_MSK_LEN; |
| 339 | fill_n(mc, REINFORCED_MSK_LEN, 0x00); |
| 340 | } |
| 341 | |
| 342 | static |
| 343 | void fillReinforcedMsk(u8 *rmsk, u16 c, u32 j, u8 bmsk) { |
| 344 | assert(j > 0); |
| 345 | if (c == ALL_CHAR_SET) { |
| 346 | for (size_t i = 0; i < N_CHARS; i++) { |
| 347 | u8 *mc = rmsk + i * REINFORCED_MSK_LEN; |
| 348 | mc[j - 1] &= ~bmsk; |
| 349 | } |
| 350 | } else { |
| 351 | u8 *mc = rmsk + c * REINFORCED_MSK_LEN; |
| 352 | mc[j - 1] &= ~bmsk; |
| 353 | } |
| 354 | } |
| 355 | |
| 356 | static |
| 357 | void fillNibbleMasks(const map<BucketIndex, |
| 358 | vector<LiteralIndex>> &bucketToLits, |
| 359 | const vector<hwlmLiteral> &lits, |
| 360 | u32 numMasks, u32 maskWidth, size_t maskLen, |
| 361 | u8 *baseMsk) { |
| 362 | memset(baseMsk, 0xff, maskLen); |
| 363 | |
| 364 | for (const auto &b2l : bucketToLits) { |
| 365 | const u32 &bucket_id = b2l.first; |
| 366 | const vector<LiteralIndex> &ids = b2l.second; |
| 367 | const u8 bmsk = 1U << (bucket_id % 8); |
| 368 | |
| 369 | for (const LiteralIndex &lit_id : ids) { |
| 370 | const hwlmLiteral &l = lits[lit_id]; |
| 371 | DEBUG_PRINTF("putting lit %u into bucket %u\n" , lit_id, bucket_id); |
| 372 | const u32 sz = verify_u32(l.s.size()); |
| 373 | |
| 374 | // fill in masks |
| 375 | for (u32 j = 0; j < numMasks; j++) { |
| 376 | const u32 msk_id_lo = j * 2 * maskWidth + (bucket_id / 8); |
| 377 | const u32 msk_id_hi = (j * 2 + 1) * maskWidth + (bucket_id / 8); |
| 378 | const u32 lo_base = msk_id_lo * 16; |
| 379 | const u32 hi_base = msk_id_hi * 16; |
| 380 | |
| 381 | // if we don't have a char at this position, fill in i |
| 382 | // locations in these masks with '1' |
| 383 | if (j >= sz) { |
| 384 | for (u32 n = 0; n < 16; n++) { |
| 385 | baseMsk[lo_base + n] &= ~bmsk; |
| 386 | baseMsk[hi_base + n] &= ~bmsk; |
| 387 | } |
| 388 | } else { |
| 389 | u8 c = l.s[sz - 1 - j]; |
| 390 | // if we do have a char at this position |
| 391 | const u32 hiShift = 4; |
| 392 | u32 n_hi = (c >> hiShift) & 0xf; |
| 393 | u32 n_lo = c & 0xf; |
| 394 | |
| 395 | if (j < l.msk.size() && l.msk[l.msk.size() - 1 - j]) { |
| 396 | u8 m = l.msk[l.msk.size() - 1 - j]; |
| 397 | u8 m_hi = (m >> hiShift) & 0xf; |
| 398 | u8 m_lo = m & 0xf; |
| 399 | u8 cmp = l.cmp[l.msk.size() - 1 - j]; |
| 400 | u8 cmp_lo = cmp & 0xf; |
| 401 | u8 cmp_hi = (cmp >> hiShift) & 0xf; |
| 402 | |
| 403 | for (u8 cm = 0; cm < 0x10; cm++) { |
| 404 | if ((cm & m_lo) == (cmp_lo & m_lo)) { |
| 405 | baseMsk[lo_base + cm] &= ~bmsk; |
| 406 | } |
| 407 | if ((cm & m_hi) == (cmp_hi & m_hi)) { |
| 408 | baseMsk[hi_base + cm] &= ~bmsk; |
| 409 | } |
| 410 | } |
| 411 | } else { |
| 412 | if (l.nocase && ourisalpha(c)) { |
| 413 | u32 cmHalfClear = (0xdf >> hiShift) & 0xf; |
| 414 | u32 cmHalfSet = (0x20 >> hiShift) & 0xf; |
| 415 | baseMsk[hi_base + (n_hi & cmHalfClear)] &= ~bmsk; |
| 416 | baseMsk[hi_base + (n_hi | cmHalfSet)] &= ~bmsk; |
| 417 | } else { |
| 418 | baseMsk[hi_base + n_hi] &= ~bmsk; |
| 419 | } |
| 420 | baseMsk[lo_base + n_lo] &= ~bmsk; |
| 421 | } |
| 422 | } |
| 423 | } |
| 424 | } |
| 425 | } |
| 426 | } |
| 427 | |
| 428 | static |
| 429 | void fillReinforcedTable(const map<BucketIndex, |
| 430 | vector<LiteralIndex>> &bucketToLits, |
| 431 | const vector<hwlmLiteral> &lits, |
| 432 | u8 *rtable_base, const u32 num_tables) { |
| 433 | vector<u8 *> tables; |
| 434 | for (u32 i = 0; i < num_tables; i++) { |
| 435 | tables.push_back(rtable_base + i * RTABLE_SIZE); |
| 436 | } |
| 437 | |
| 438 | for (auto t : tables) { |
| 439 | initReinforcedTable(t); |
| 440 | } |
| 441 | |
| 442 | for (const auto &b2l : bucketToLits) { |
| 443 | const u32 &bucket_id = b2l.first; |
| 444 | const vector<LiteralIndex> &ids = b2l.second; |
| 445 | u8 *rmsk = tables[bucket_id / 8]; |
| 446 | const u8 bmsk = 1U << (bucket_id % 8); |
| 447 | |
| 448 | for (const LiteralIndex &lit_id : ids) { |
| 449 | const hwlmLiteral &l = lits[lit_id]; |
| 450 | DEBUG_PRINTF("putting lit %u into bucket %u\n" , lit_id, bucket_id); |
| 451 | const u32 sz = verify_u32(l.s.size()); |
| 452 | |
| 453 | // fill in reinforced masks |
| 454 | for (u32 j = 1; j < REINFORCED_MSK_LEN; j++) { |
| 455 | if (sz - 1 < j) { |
| 456 | fillReinforcedMsk(rmsk, ALL_CHAR_SET, j, bmsk); |
| 457 | } else { |
| 458 | u8 c = l.s[sz - 1 - j]; |
| 459 | if (l.nocase && ourisalpha(c)) { |
| 460 | u8 c_up = c & 0xdf; |
| 461 | fillReinforcedMsk(rmsk, c_up, j, bmsk); |
| 462 | u8 c_lo = c | 0x20; |
| 463 | fillReinforcedMsk(rmsk, c_lo, j, bmsk); |
| 464 | } else { |
| 465 | fillReinforcedMsk(rmsk, c, j, bmsk); |
| 466 | } |
| 467 | } |
| 468 | } |
| 469 | } |
| 470 | } |
| 471 | |
| 472 | for (auto t : tables) { |
| 473 | fillReinforcedMskZero(t); |
| 474 | } |
| 475 | } |
| 476 | |
| 477 | bytecode_ptr<FDR> TeddyCompiler::build() { |
| 478 | u32 maskWidth = eng.getNumBuckets() / 8; |
| 479 | |
| 480 | size_t = sizeof(Teddy); |
| 481 | size_t maskLen = eng.numMasks * 16 * 2 * maskWidth; |
| 482 | size_t reinforcedMaskLen = RTABLE_SIZE * maskWidth; |
| 483 | |
| 484 | auto floodTable = setupFDRFloodControl(lits, eng, grey); |
| 485 | auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small); |
| 486 | |
| 487 | // Note: we place each major structure here on a cacheline boundary. |
| 488 | size_t size = ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen) + |
| 489 | ROUNDUP_CL(reinforcedMaskLen) + |
| 490 | ROUNDUP_CL(confirmTable.size()) + floodTable.size(); |
| 491 | |
| 492 | auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64); |
| 493 | assert(fdr); // otherwise would have thrown std::bad_alloc |
| 494 | Teddy *teddy = (Teddy *)fdr.get(); // ugly |
| 495 | u8 *teddy_base = (u8 *)teddy; |
| 496 | |
| 497 | // Write header. |
| 498 | teddy->size = size; |
| 499 | teddy->engineID = eng.getID(); |
| 500 | teddy->maxStringLen = verify_u32(maxLen(lits)); |
| 501 | teddy->numStrings = verify_u32(lits.size()); |
| 502 | |
| 503 | // Write confirm structures. |
| 504 | u8 *ptr = teddy_base + ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen) + |
| 505 | ROUNDUP_CL(reinforcedMaskLen); |
| 506 | assert(ISALIGNED_CL(ptr)); |
| 507 | teddy->confOffset = verify_u32(ptr - teddy_base); |
| 508 | memcpy(ptr, confirmTable.get(), confirmTable.size()); |
| 509 | ptr += ROUNDUP_CL(confirmTable.size()); |
| 510 | |
| 511 | // Write flood control structures. |
| 512 | assert(ISALIGNED_CL(ptr)); |
| 513 | teddy->floodOffset = verify_u32(ptr - teddy_base); |
| 514 | memcpy(ptr, floodTable.get(), floodTable.size()); |
| 515 | ptr += floodTable.size(); |
| 516 | |
| 517 | // Write teddy masks. |
| 518 | u8 *baseMsk = teddy_base + ROUNDUP_CL(headerSize); |
| 519 | fillNibbleMasks(bucketToLits, lits, eng.numMasks, maskWidth, maskLen, |
| 520 | baseMsk); |
| 521 | |
| 522 | // Write reinforcement masks. |
| 523 | u8 *reinforcedMsk = baseMsk + ROUNDUP_CL(maskLen); |
| 524 | fillReinforcedTable(bucketToLits, lits, reinforcedMsk, maskWidth); |
| 525 | |
| 526 | return fdr; |
| 527 | } |
| 528 | |
| 529 | |
| 530 | static |
| 531 | bool assignStringsToBuckets( |
| 532 | const vector<hwlmLiteral> &lits, |
| 533 | TeddyEngineDescription &eng, |
| 534 | map<BucketIndex, vector<LiteralIndex>> &bucketToLits) { |
| 535 | assert(eng.numMasks <= MAX_NUM_MASKS); |
| 536 | if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) { |
| 537 | DEBUG_PRINTF("too many literals: %zu\n" , lits.size()); |
| 538 | return false; |
| 539 | } |
| 540 | |
| 541 | #ifdef TEDDY_DEBUG |
| 542 | for (size_t i = 0; i < lits.size(); i++) { |
| 543 | printf("lit %zu (len = %zu, %s) is " , i, lits[i].s.size(), |
| 544 | lits[i].nocase ? "caseless" : "caseful" ); |
| 545 | for (size_t j = 0; j < lits[i].s.size(); j++) { |
| 546 | printf("%02x" , ((u32)lits[i].s[j])&0xff); |
| 547 | } |
| 548 | printf("\n" ); |
| 549 | } |
| 550 | #endif |
| 551 | |
| 552 | if (!pack(lits, eng, bucketToLits)) { |
| 553 | DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n" , |
| 554 | lits.size(), eng.getNumBuckets()); |
| 555 | return false; |
| 556 | } |
| 557 | return true; |
| 558 | } |
| 559 | |
| 560 | } // namespace |
| 561 | |
| 562 | bytecode_ptr<FDR> teddyBuildTable(const HWLMProto &proto, const Grey &grey) { |
| 563 | TeddyCompiler tc(proto.lits, proto.bucketToLits, *(proto.teddyEng), |
| 564 | proto.make_small, grey); |
| 565 | return tc.build(); |
| 566 | } |
| 567 | |
| 568 | |
| 569 | unique_ptr<HWLMProto> teddyBuildProtoHinted( |
| 570 | u8 engType, const vector<hwlmLiteral> &lits, |
| 571 | bool make_small, u32 hint, const target_t &target) { |
| 572 | unique_ptr<TeddyEngineDescription> des; |
| 573 | if (hint == HINT_INVALID) { |
| 574 | des = chooseTeddyEngine(target, lits); |
| 575 | } else { |
| 576 | des = getTeddyDescription(hint); |
| 577 | } |
| 578 | if (!des) { |
| 579 | return nullptr; |
| 580 | } |
| 581 | |
| 582 | map<BucketIndex, std::vector<LiteralIndex>> bucketToLits; |
| 583 | if (!assignStringsToBuckets(lits, *des, bucketToLits)) { |
| 584 | return nullptr; |
| 585 | } |
| 586 | |
| 587 | return ue2::make_unique<HWLMProto>(engType, move(des), lits, |
| 588 | bucketToLits, make_small); |
| 589 | } |
| 590 | |
| 591 | } // namespace ue2 |
| 592 | |