1/*
2 * Copyright (c) 2015-2019, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief FDR literal matcher: build API.
31 */
32
33#include "fdr_compile.h"
34
35#include "fdr_internal.h"
36#include "fdr_confirm.h"
37#include "fdr_compile_internal.h"
38#include "fdr_engine_description.h"
39#include "teddy_compile.h"
40#include "teddy_engine_description.h"
41#include "grey.h"
42#include "ue2common.h"
43#include "hwlm/hwlm_build.h"
44#include "util/compare.h"
45#include "util/container.h"
46#include "util/dump_mask.h"
47#include "util/make_unique.h"
48#include "util/math.h"
49#include "util/noncopyable.h"
50#include "util/target_info.h"
51#include "util/ue2string.h"
52#include "util/verify_types.h"
53
54#include <algorithm>
55#include <array>
56#include <cassert>
57#include <cctype>
58#include <cstdio>
59#include <cstdlib>
60#include <cstring>
61#include <limits>
62#include <map>
63#include <memory>
64#include <numeric>
65#include <set>
66#include <string>
67#include <unordered_map>
68#include <unordered_set>
69#include <vector>
70
71#include <boost/multi_array.hpp>
72
73using namespace std;
74
75namespace ue2 {
76
77namespace {
78
79class FDRCompiler : noncopyable {
80private:
81 const FDREngineDescription &eng;
82 const Grey &grey;
83 vector<u8> tab;
84 vector<hwlmLiteral> lits;
85 map<BucketIndex, std::vector<LiteralIndex> > bucketToLits;
86 bool make_small;
87
88 u8 *tabIndexToMask(u32 indexInTable);
89#ifdef DEBUG
90 void dumpMasks(const u8 *defaultMask);
91#endif
92 void setupTab();
93 bytecode_ptr<FDR> setupFDR();
94 void createInitialState(FDR *fdr);
95
96public:
97 FDRCompiler(vector<hwlmLiteral> lits_in,
98 map<BucketIndex, std::vector<LiteralIndex>> bucketToLits_in,
99 const FDREngineDescription &eng_in,
100 bool make_small_in, const Grey &grey_in)
101 : eng(eng_in), grey(grey_in), tab(eng_in.getTabSizeBytes()),
102 lits(move(lits_in)), bucketToLits(move(bucketToLits_in)),
103 make_small(make_small_in) {}
104
105 bytecode_ptr<FDR> build();
106};
107
108u8 *FDRCompiler::tabIndexToMask(u32 indexInTable) {
109 assert(indexInTable < tab.size());
110 return &tab[0] + (indexInTable * (eng.getSchemeWidth() / 8));
111}
112
113static
114void setbit(u8 *msk, u32 bit) {
115 msk[bit / 8] |= 1U << (bit % 8);
116}
117
118static
119void clearbit(u8 *msk, u32 bit) {
120 msk[bit / 8] &= ~(1U << (bit % 8));
121}
122
123static
124void andMask(u8 *dest, const u8 *a, const u8 *b, u32 num_bytes) {
125 for (u32 i = 0; i < num_bytes; i++) {
126 dest[i] = a[i] & b[i];
127 }
128}
129
130void FDRCompiler::createInitialState(FDR *fdr) {
131 u8 *start = (u8 *)&fdr->start;
132
133 /* initial state should to be 1 in each slot in the bucket up to bucket
134 * minlen - 1, and 0 thereafter */
135 for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
136 // Find the minimum length for the literals in this bucket.
137 const vector<LiteralIndex> &bucket_lits = bucketToLits[b];
138 u32 min_len = ~0U;
139 for (const LiteralIndex &lit_idx : bucket_lits) {
140 min_len = min(min_len, verify_u32(lits[lit_idx].s.length()));
141 }
142
143 DEBUG_PRINTF("bucket %u has min_len=%u\n", b, min_len);
144 assert(min_len);
145
146 for (PositionInBucket i = 0; i < eng.getBucketWidth(b); i++) {
147 if (i < min_len - 1) {
148 setbit(start, eng.getSchemeBit(b, i));
149 }
150 }
151 }
152}
153
154/**
155 * \brief Lay out FDR structures in bytecode.
156 *
157 * Note that each major structure (header, table, confirm, flood control) is
158 * cacheline-aligned.
159 */
160bytecode_ptr<FDR> FDRCompiler::setupFDR() {
161 auto floodTable = setupFDRFloodControl(lits, eng, grey);
162 auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small);
163
164 size_t headerSize = sizeof(FDR);
165 size_t tabSize = eng.getTabSizeBytes();
166
167 // Note: we place each major structure here on a cacheline boundary.
168 size_t size = ROUNDUP_CL(headerSize) + ROUNDUP_CL(tabSize) +
169 ROUNDUP_CL(confirmTable.size()) + floodTable.size();
170
171 DEBUG_PRINTF("sizes base=%zu tabSize=%zu confirm=%zu floodControl=%zu "
172 "total=%zu\n",
173 headerSize, tabSize, confirmTable.size(), floodTable.size(),
174 size);
175
176 auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
177 assert(fdr); // otherwise would have thrown std::bad_alloc
178
179 u8 *fdr_base = (u8 *)fdr.get();
180
181 // Write header.
182 fdr->size = size;
183 fdr->engineID = eng.getID();
184 fdr->maxStringLen = verify_u32(maxLen(lits));
185 fdr->numStrings = verify_u32(lits.size());
186 assert(eng.bits > 8 && eng.bits < 16); // we allow domains 9 to 15 only
187 fdr->domain = eng.bits;
188 fdr->domainMask = (1 << eng.bits) - 1;
189 fdr->tabSize = tabSize;
190 fdr->stride = eng.stride;
191 createInitialState(fdr.get());
192
193 // Write table.
194 u8 *ptr = fdr_base + ROUNDUP_CL(sizeof(FDR));
195 assert(ISALIGNED_CL(ptr));
196 copy(tab.begin(), tab.end(), ptr);
197 ptr += ROUNDUP_CL(tabSize);
198
199 // Write confirm structures.
200 assert(ISALIGNED_CL(ptr));
201 fdr->confOffset = verify_u32(ptr - fdr_base);
202 memcpy(ptr, confirmTable.get(), confirmTable.size());
203 ptr += ROUNDUP_CL(confirmTable.size());
204
205 // Write flood control structures.
206 assert(ISALIGNED_CL(ptr));
207 fdr->floodOffset = verify_u32(ptr - fdr_base);
208 memcpy(ptr, floodTable.get(), floodTable.size());
209 ptr += floodTable.size(); // last write, no need to round up
210
211 return fdr;
212}
213
214//#define DEBUG_ASSIGNMENT
215
216/**
217 * Utility class for computing:
218 *
219 * score(count, len) = pow(count, 1.05) * pow(len, -3)
220 *
221 * Calling pow() is expensive. This is mitigated by using pre-computed LUTs for
222 * small inputs and a cache for larger ones.
223 */
224class Scorer {
225 unordered_map<u32, double> count_factor_cache;
226
227 // LUT: pow(count, 1.05) for small values of count.
228 static const array<double, 100> count_lut;
229
230 double count_factor(u32 count) {
231 if (count < count_lut.size()) {
232 return count_lut[count];
233 }
234
235 auto it = count_factor_cache.find(count);
236 if (it != count_factor_cache.end()) {
237 return it->second;
238 }
239 double r = our_pow(count, 1.05);
240 count_factor_cache.emplace(count, r);
241 return r;
242 }
243
244 // LUT: pow(len, -3) for len in range [0,8].
245 static const array<double, 9> len_lut;
246
247 double len_factor(u32 len) {
248 assert(len <= len_lut.size());
249 return len_lut[len];
250 }
251
252public:
253 double operator()(u32 len, u32 count) {
254 if (len == 0) {
255 return numeric_limits<double>::max();
256 }
257 return count_factor(count) * len_factor(len);
258 }
259};
260
261const array<double, 100> Scorer::count_lut{{
262 pow(0, 1.05), pow(1, 1.05), pow(2, 1.05), pow(3, 1.05), pow(4, 1.05),
263 pow(5, 1.05), pow(6, 1.05), pow(7, 1.05), pow(8, 1.05), pow(9, 1.05),
264 pow(10, 1.05), pow(11, 1.05), pow(12, 1.05), pow(13, 1.05), pow(14, 1.05),
265 pow(15, 1.05), pow(16, 1.05), pow(17, 1.05), pow(18, 1.05), pow(19, 1.05),
266 pow(20, 1.05), pow(21, 1.05), pow(22, 1.05), pow(23, 1.05), pow(24, 1.05),
267 pow(25, 1.05), pow(26, 1.05), pow(27, 1.05), pow(28, 1.05), pow(29, 1.05),
268 pow(30, 1.05), pow(31, 1.05), pow(32, 1.05), pow(33, 1.05), pow(34, 1.05),
269 pow(35, 1.05), pow(36, 1.05), pow(37, 1.05), pow(38, 1.05), pow(39, 1.05),
270 pow(40, 1.05), pow(41, 1.05), pow(42, 1.05), pow(43, 1.05), pow(44, 1.05),
271 pow(45, 1.05), pow(46, 1.05), pow(47, 1.05), pow(48, 1.05), pow(49, 1.05),
272 pow(50, 1.05), pow(51, 1.05), pow(52, 1.05), pow(53, 1.05), pow(54, 1.05),
273 pow(55, 1.05), pow(56, 1.05), pow(57, 1.05), pow(58, 1.05), pow(59, 1.05),
274 pow(60, 1.05), pow(61, 1.05), pow(62, 1.05), pow(63, 1.05), pow(64, 1.05),
275 pow(65, 1.05), pow(66, 1.05), pow(67, 1.05), pow(68, 1.05), pow(69, 1.05),
276 pow(70, 1.05), pow(71, 1.05), pow(72, 1.05), pow(73, 1.05), pow(74, 1.05),
277 pow(75, 1.05), pow(76, 1.05), pow(77, 1.05), pow(78, 1.05), pow(79, 1.05),
278 pow(80, 1.05), pow(81, 1.05), pow(82, 1.05), pow(83, 1.05), pow(84, 1.05),
279 pow(85, 1.05), pow(86, 1.05), pow(87, 1.05), pow(88, 1.05), pow(89, 1.05),
280 pow(90, 1.05), pow(91, 1.05), pow(92, 1.05), pow(93, 1.05), pow(94, 1.05),
281 pow(95, 1.05), pow(96, 1.05), pow(97, 1.05), pow(98, 1.05), pow(99, 1.05),
282}};
283
284const array<double, 9> Scorer::len_lut{{
285 pow(0, -3.0), pow(1, -3.0), pow(2, -3.0), pow(3, -3.0), pow(4, -3.0),
286 pow(5, -3.0), pow(6, -3.0), pow(7, -3.0), pow(8, -3.0)}};
287
288/**
289 * Returns true if the two given literals should be placed in the same chunk as
290 * they are identical except for a difference in caselessness.
291 */
292static
293bool isEquivLit(const hwlmLiteral &a, const hwlmLiteral &b,
294 const hwlmLiteral *last_nocase_lit) {
295 const size_t a_len = a.s.size();
296 const size_t b_len = b.s.size();
297
298 if (a_len != b_len) {
299 return false;
300 }
301
302 bool nocase = last_nocase_lit && a_len == last_nocase_lit->s.size() &&
303 !cmp(a.s.c_str(), last_nocase_lit->s.c_str(), a_len, true);
304 return !cmp(a.s.c_str(), b.s.c_str(), a.s.size(), nocase);
305}
306
307struct Chunk {
308 Chunk(u32 first_id_in, u32 count_in, u32 length_in)
309 : first_id(first_id_in), count(count_in), length(length_in) {}
310 u32 first_id; //!< first id in this chunk
311 u32 count; //!< how many are in this chunk
312 u32 length; //!< how long things in the chunk are
313};
314
315static
316vector<Chunk> assignChunks(const vector<hwlmLiteral> &lits,
317 const map<u32, u32> &lenCounts) {
318 const u32 CHUNK_MAX = 512;
319 const u32 MAX_CONSIDERED_LENGTH = 16;
320
321 // TODO: detailed early stage literal analysis for v. small cases (actually
322 // look at lits) yes - after we factor this out and merge in the Teddy
323 // style of building we can look at this, although the teddy merge
324 // modelling is quite different. It's still probably adaptable to some
325 // extent for this class of problem.
326
327 vector<Chunk> chunks;
328 chunks.reserve(CHUNK_MAX);
329
330 const u32 maxPerChunk = lits.size() /
331 (CHUNK_MAX - MIN(MAX_CONSIDERED_LENGTH, lenCounts.size())) + 1;
332
333 u32 currentSize = 0;
334 u32 chunkStartID = 0;
335 const hwlmLiteral *last_nocase_lit = nullptr;
336
337 for (u32 i = 0; i < lits.size() && chunks.size() < CHUNK_MAX - 1; i++) {
338 const auto &lit = lits[i];
339
340 DEBUG_PRINTF("i=%u, lit=%s%s\n", i, escapeString(lit.s).c_str(),
341 lit.nocase ? " (nocase)" : "");
342
343 // If this literal is identical to the last one (aside from differences
344 // in caselessness), keep going even if we will "overfill" a chunk; we
345 // don't want to split identical literals into different buckets.
346 if (i != 0 && isEquivLit(lit, lits[i - 1], last_nocase_lit)) {
347 DEBUG_PRINTF("identical lit\n");
348 goto next_literal;
349 }
350
351 if ((currentSize < MAX_CONSIDERED_LENGTH &&
352 (lit.s.size() != currentSize)) ||
353 (currentSize != 1 && ((i - chunkStartID) >= maxPerChunk))) {
354 currentSize = lit.s.size();
355 if (!chunks.empty()) {
356 chunks.back().count = i - chunkStartID;
357 }
358 chunkStartID = i;
359 chunks.emplace_back(i, 0, currentSize);
360 }
361next_literal:
362 if (lit.nocase) {
363 last_nocase_lit = &lit;
364 }
365 }
366
367 assert(!chunks.empty());
368 chunks.back().count = lits.size() - chunkStartID;
369 // close off chunks with an empty row
370 chunks.emplace_back(lits.size(), 0, 0);
371
372#ifdef DEBUG_ASSIGNMENT
373 for (size_t j = 0; j < chunks.size(); j++) {
374 const auto &chunk = chunks[j];
375 printf("chunk %zu first_id=%u count=%u length=%u\n", j, chunk.first_id,
376 chunk.count, chunk.length);
377 }
378#endif
379
380 DEBUG_PRINTF("built %zu chunks (%zu lits)\n", chunks.size(), lits.size());
381 assert(chunks.size() <= CHUNK_MAX);
382 return chunks;
383}
384
385static
386map<BucketIndex, vector<LiteralIndex>> assignStringsToBuckets(
387 vector<hwlmLiteral> &lits,
388 const FDREngineDescription &eng) {
389 const double MAX_SCORE = numeric_limits<double>::max();
390
391 assert(!lits.empty()); // Shouldn't be called with no literals.
392
393 // Count the number of literals for each length.
394 map<u32, u32> lenCounts;
395 for (const auto &lit : lits) {
396 lenCounts[lit.s.size()]++;
397 }
398
399#ifdef DEBUG_ASSIGNMENT
400 for (const auto &m : lenCounts) {
401 printf("l<%u>:%u ", m.first, m.second);
402 }
403 printf("\n");
404#endif
405
406 // Sort literals by literal length. If tied on length, use lexicographic
407 // ordering (of the reversed literals).
408 stable_sort(lits.begin(), lits.end(),
409 [](const hwlmLiteral &a, const hwlmLiteral &b) {
410 if (a.s.size() != b.s.size()) {
411 return a.s.size() < b.s.size();
412 }
413 auto p = mismatch(a.s.rbegin(), a.s.rend(), b.s.rbegin());
414 if (p.first != a.s.rend()) {
415 return *p.first < *p.second;
416 }
417 // Sort caseless variants first.
418 return a.nocase > b.nocase;
419 });
420
421 vector<Chunk> chunks = assignChunks(lits, lenCounts);
422
423 const u32 numChunks = chunks.size();
424 const u32 numBuckets = eng.getNumBuckets();
425
426 // 2D array of (score, chunk index) pairs, indexed by
427 // [chunk_index][bucket_index].
428 boost::multi_array<pair<double, u32>, 2> t(
429 boost::extents[numChunks][numBuckets]);
430
431 Scorer scorer;
432
433 for (u32 j = 0; j < numChunks; j++) {
434 u32 cnt = 0;
435 for (u32 k = j; k < numChunks; ++k) {
436 cnt += chunks[k].count;
437 }
438 t[j][0] = {scorer(chunks[j].length, cnt), 0};
439 }
440
441 for (u32 i = 1; i < numBuckets; i++) {
442 for (u32 j = 0; j < numChunks - 1; j++) { // don't do last, empty row
443 pair<double, u32> best = {MAX_SCORE, 0};
444 u32 cnt = chunks[j].count;
445 for (u32 k = j + 1; k < numChunks - 1; k++) {
446 auto score = scorer(chunks[j].length, cnt);
447 if (score > best.first) {
448 break; // now worse locally than our best score, give up
449 }
450 score += t[k][i-1].first;
451 if (score < best.first) {
452 best = {score, k};
453 }
454 cnt += chunks[k].count;
455 }
456 t[j][i] = best;
457 }
458 t[numChunks - 1][i] = {0,0}; // fill in empty final row for next iter
459 }
460
461#ifdef DEBUG_ASSIGNMENT
462 for (u32 j = 0; j < numChunks; j++) {
463 printf("%03u: ", j);
464 for (u32 i = 0; i < numBuckets; i++) {
465 const auto &v = t[j][i];
466 printf("<%0.3f,%3d> ", v.first, v.second);
467 }
468 printf("\n");
469 }
470#endif
471
472 // our best score is in t[0][N_BUCKETS-1] and we can follow the links
473 // to find where our buckets should start and what goes into them
474 vector<vector<LiteralIndex>> buckets;
475 for (u32 i = 0, n = numBuckets; n && (i != numChunks - 1); n--) {
476 u32 j = t[i][n - 1].second;
477 if (j == 0) {
478 j = numChunks - 1;
479 }
480
481 // put chunks between i - j into bucket (numBuckets - n).
482 u32 first_id = chunks[i].first_id;
483 u32 last_id = chunks[j].first_id;
484 assert(first_id < last_id);
485 UNUSED const auto &first_lit = lits[first_id];
486 UNUSED const auto &last_lit = lits[last_id - 1];
487 DEBUG_PRINTF("placing [%u-%u) in one bucket (%u lits, len %zu-%zu, "
488 "score %0.4f)\n",
489 first_id, last_id, last_id - first_id,
490 first_lit.s.length(), last_lit.s.length(),
491 scorer(first_lit.s.length(), last_id - first_id));
492
493 vector<LiteralIndex> litIds;
494 u32 cnt = last_id - first_id;
495 // long literals first for included literals checking
496 for (u32 k = 0; k < cnt; k++) {
497 litIds.push_back(last_id - k - 1);
498 }
499
500 i = j;
501 buckets.push_back(litIds);
502 }
503
504 // reverse bucket id, longer literals come first
505 map<BucketIndex, vector<LiteralIndex>> bucketToLits;
506 size_t bucketCnt = buckets.size();
507 for (size_t i = 0; i < bucketCnt; i++) {
508 bucketToLits.emplace(bucketCnt - i - 1, move(buckets[i]));
509 }
510
511 return bucketToLits;
512}
513
514#ifdef DEBUG
515void FDRCompiler::dumpMasks(const u8 *defaultMask) {
516 const size_t width = eng.getSchemeWidth();
517 printf("default mask: %s\n", dumpMask(defaultMask, width).c_str());
518 for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
519 u8 *m = tabIndexToMask(i);
520 if (memcmp(m, defaultMask, width / 8)) {
521 printf("tab %04x: %s\n", i, dumpMask(m, width).c_str());
522 }
523 }
524}
525#endif
526
527static
528bool getMultiEntriesAtPosition(const FDREngineDescription &eng,
529 const vector<LiteralIndex> &vl,
530 const vector<hwlmLiteral> &lits,
531 SuffixPositionInString pos,
532 map<u32, unordered_set<u32>> &m2) {
533 assert(eng.bits < 32);
534
535 u32 distance = 0;
536 if (eng.bits <= 8) {
537 distance = 1;
538 } else if (eng.bits <= 16) {
539 distance = 2;
540 } else {
541 distance = 4;
542 }
543
544 for (auto i = vl.begin(), e = vl.end(); i != e; ++i) {
545 if (e - i > 5) {
546 __builtin_prefetch(&lits[*(i + 5)]);
547 }
548 const hwlmLiteral &lit = lits[*i];
549 const size_t sz = lit.s.size();
550 u32 mask = 0;
551 u32 dontCares = 0;
552 for (u32 cnt = 0; cnt < distance; cnt++) {
553 int newPos = pos - cnt;
554 u8 dontCareByte = 0x0;
555 u8 maskByte = 0x0;
556 if (newPos < 0 || ((u32)newPos >= sz)) {
557 dontCareByte = 0xff;
558 } else {
559 u8 c = lit.s[sz - newPos - 1];
560 maskByte = c;
561 u32 remainder = eng.bits - cnt * 8;
562 assert(remainder != 0);
563 if (remainder < 8) {
564 u8 cmask = (1U << remainder) - 1;
565 maskByte &= cmask;
566 dontCareByte |= ~cmask;
567 }
568 if (lit.nocase && ourisalpha(c)) {
569 maskByte &= 0xdf;
570 dontCareByte |= 0x20;
571 }
572 }
573 u32 loc = cnt * 8;
574 mask |= maskByte << loc;
575 dontCares |= dontCareByte << loc;
576 }
577
578 // truncate m and dc down to nBits
579 mask &= (1U << eng.bits) - 1;
580 dontCares &= (1U << eng.bits) - 1;
581 if (dontCares == ((1U << eng.bits) - 1)) {
582 return true;
583 }
584 m2[dontCares].insert(mask);
585 }
586 return false;
587}
588
589void FDRCompiler::setupTab() {
590 const size_t mask_size = eng.getSchemeWidth() / 8;
591 assert(mask_size);
592
593 vector<u8> defaultMask(mask_size, 0xff);
594 for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
595 memcpy(tabIndexToMask(i), &defaultMask[0], mask_size);
596 }
597
598 for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
599 const vector<LiteralIndex> &vl = bucketToLits[b];
600 SuffixPositionInString pLimit = eng.getBucketWidth(b);
601 for (SuffixPositionInString pos = 0; pos < pLimit; pos++) {
602 u32 bit = eng.getSchemeBit(b, pos);
603 map<u32, unordered_set<u32>> m2;
604 bool done = getMultiEntriesAtPosition(eng, vl, lits, pos, m2);
605 if (done) {
606 clearbit(&defaultMask[0], bit);
607 continue;
608 }
609 for (const auto &elem : m2) {
610 u32 dc = elem.first;
611 const unordered_set<u32> &mskSet = elem.second;
612 u32 v = ~dc;
613 do {
614 u32 b2 = v & dc;
615 for (const u32 &mskVal : mskSet) {
616 u32 val = (mskVal & ~dc) | b2;
617 clearbit(tabIndexToMask(val), bit);
618 }
619 v = (v + (dc & -dc)) | ~dc;
620 } while (v != ~dc);
621 }
622 }
623 }
624
625 for (u32 i = 0; i < eng.getNumTableEntries(); i++) {
626 u8 *m = tabIndexToMask(i);
627 andMask(m, m, &defaultMask[0], mask_size);
628 }
629#ifdef DEBUG
630 dumpMasks(&defaultMask[0]);
631#endif
632}
633
634bytecode_ptr<FDR> FDRCompiler::build() {
635 setupTab();
636 return setupFDR();
637}
638
639static
640bool isSuffix(const hwlmLiteral &lit1, const hwlmLiteral &lit2) {
641 const auto &s1 = lit1.s;
642 const auto &s2 = lit2.s;
643 size_t len1 = s1.length();
644 size_t len2 = s2.length();
645 assert(len1 >= len2);
646
647 if (lit1.nocase || lit2.nocase) {
648 return equal(s2.begin(), s2.end(), s1.begin() + len1 - len2,
649 [](char a, char b) { return mytoupper(a) == mytoupper(b); });
650 } else {
651 return equal(s2.begin(), s2.end(), s1.begin() + len1 - len2);
652 }
653}
654
655/*
656 * if lit2 is a suffix of lit1 but the case sensitivity, groups or mask info
657 * of lit2 is a subset of lit1, then lit1 can't squash lit2 and lit2 can
658 * possibly match when lit1 matches. In this case, we can't do bucket
659 * squashing. e.g. AAA(no case) in bucket 0, AA(no case) and aa in bucket 1,
660 * we can't squash bucket 1 if we have input like "aaa" as aa can also match.
661 */
662static
663bool includedCheck(const hwlmLiteral &lit1, const hwlmLiteral &lit2) {
664 /* lit1 is caseless and lit2 is case sensitive */
665 if ((lit1.nocase && !lit2.nocase)) {
666 return true;
667 }
668
669 /* lit2's group is a subset of lit1 */
670 if (lit1.groups != lit2.groups &&
671 (lit2.groups == (lit1.groups & lit2.groups))) {
672 return true;
673 }
674
675 /* TODO: narrow down cases for mask check */
676 if (lit1.cmp != lit2.cmp || lit1.msk != lit2.msk) {
677 return true;
678 }
679
680 return false;
681}
682
683/*
684 * if lit2 is an included literal of both lit0 and lit1, then lit0 and lit1
685 * shouldn't match at the same offset, otherwise we give up squashing for lit1.
686 * e.g. lit0:AAA(no case), lit1:aa, lit2:A(no case). We can have duplicate
687 * matches for input "aaa" if lit0 and lit1 both squash lit2.
688 */
689static
690bool checkParentLit(
691 const vector<hwlmLiteral> &lits, u32 pos1,
692 const unordered_set<u32> &parent_map,
693 const unordered_map<u32, unordered_set<u32>> &exception_map) {
694 assert(pos1 < lits.size());
695 const auto &lit1 = lits[pos1];
696 for (const auto pos2 : parent_map) {
697 if (contains(exception_map, pos2)) {
698 const auto &exception_pos = exception_map.at(pos2);
699 if (contains(exception_pos, pos1)) {
700 return false;
701 }
702 }
703
704 /* if lit1 isn't an exception of lit2, then we have to do further
705 * exclusive check.
706 * TODO: More mask checks. Note if two literals are group exclusive,
707 * it is possible that they match at the same offset. */
708 assert(pos2 < lits.size());
709 const auto &lit2 = lits[pos2];
710 if (isSuffix(lit2, lit1)) {
711 return false;
712 }
713 }
714
715 return true;
716}
717
718static
719void buildSquashMask(vector<hwlmLiteral> &lits, u32 id1, u32 bucket1,
720 size_t start, const vector<pair<u32, u32>> &group,
721 unordered_map<u32, unordered_set<u32>> &parent_map,
722 unordered_map<u32, unordered_set<u32>> &exception_map) {
723 auto &lit1 = lits[id1];
724 DEBUG_PRINTF("b:%u len:%zu\n", bucket1, lit1.s.length());
725
726 size_t cnt = group.size();
727 bool included = false;
728 bool exception = false;
729 u32 child_id = ~0U;
730 for (size_t i = start; i < cnt; i++) {
731 u32 bucket2 = group[i].first;
732 assert(bucket2 >= bucket1);
733
734 u32 id2 = group[i].second;
735 auto &lit2 = lits[id2];
736 // check if lit2 is a suffix of lit1
737 if (isSuffix(lit1, lit2)) {
738 /* if we have a included literal in the same bucket,
739 * quit and let the included literal to do possible squashing */
740 if (bucket1 == bucket2) {
741 DEBUG_PRINTF("same bucket\n");
742 return;
743 }
744 /* if lit2 is a suffix but doesn't pass included checks for
745 * extra info, we give up sqaushing */
746 if (includedCheck(lit1, lit2)) {
747 DEBUG_PRINTF("find exceptional suffix %u\n", lit2.id);
748 exception_map[id1].insert(id2);
749 exception = true;
750 } else if (checkParentLit(lits, id1, parent_map[id2],
751 exception_map)) {
752 if (lit1.included_id == INVALID_LIT_ID) {
753 DEBUG_PRINTF("find suffix lit1 %u lit2 %u\n",
754 lit1.id, lit2.id);
755 lit1.included_id = lit2.id;
756 } else {
757 /* if we have multiple included literals in one bucket,
758 * give up squashing. */
759 DEBUG_PRINTF("multiple included literals\n");
760 lit1.included_id = INVALID_LIT_ID;
761 return;
762 }
763 child_id = id2;
764 included = true;
765 }
766 }
767
768 size_t next = i + 1;
769 u32 nextBucket = next < cnt ? group[next].first : ~0U;
770 if (bucket2 != nextBucket) {
771 if (included) {
772 if (exception) {
773 /* give up if we have exception literals
774 * in the same bucket as the included literal. */
775 lit1.included_id = INVALID_LIT_ID;
776 } else {
777 parent_map[child_id].insert(id1);
778
779 lit1.squash |= 1U << bucket2;
780 DEBUG_PRINTF("build squash mask %2x for %u\n",
781 lit1.squash, lit1.id);
782 }
783 return;
784 }
785 exception = false;
786 }
787 }
788}
789
790static constexpr u32 INCLUDED_LIMIT = 1000;
791
792static
793void findIncludedLits(vector<hwlmLiteral> &lits,
794 const vector<vector<pair<u32, u32>>> &lastCharMap) {
795 /* Map for finding the positions of literal which includes a literal
796 * in FDR hwlm literal vector. */
797 unordered_map<u32, unordered_set<u32>> parent_map;
798
799 /* Map for finding the positions of exception literals which could
800 * sometimes match if a literal matches in FDR hwlm literal vector. */
801 unordered_map<u32, unordered_set<u32>> exception_map;
802 for (const auto &group : lastCharMap) {
803 size_t cnt = group.size();
804 if (cnt > INCLUDED_LIMIT) {
805 continue;
806 }
807 for (size_t i = 0; i < cnt; i++) {
808 u32 bucket1 = group[i].first;
809 u32 id1 = group[i].second;
810 if (lits[id1].pure) {
811 continue;
812 }
813 buildSquashMask(lits, id1, bucket1, i + 1, group, parent_map,
814 exception_map);
815 }
816 }
817}
818
819static
820void addIncludedInfo(
821 vector<hwlmLiteral> &lits, u32 nBuckets,
822 map<BucketIndex, vector<LiteralIndex>> &bucketToLits) {
823 vector<vector<pair<u32, u32>>> lastCharMap(256);
824
825 for (BucketIndex b = 0; b < nBuckets; b++) {
826 if (!bucketToLits[b].empty()) {
827 for (const LiteralIndex &lit_idx : bucketToLits[b]) {
828 const auto &lit = lits[lit_idx];
829 u8 c = mytoupper(lit.s.back());
830 lastCharMap[c].emplace_back(b, lit_idx);
831 }
832 }
833 }
834
835 findIncludedLits(lits, lastCharMap);
836}
837
838} // namespace
839
840static
841unique_ptr<HWLMProto> fdrBuildProtoInternal(u8 engType,
842 vector<hwlmLiteral> &lits,
843 bool make_small,
844 const target_t &target,
845 const Grey &grey, u32 hint) {
846 DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
847
848 if (grey.fdrAllowTeddy) {
849 auto proto = teddyBuildProtoHinted(engType, lits, make_small, hint,
850 target);
851 if (proto) {
852 DEBUG_PRINTF("build with teddy succeeded\n");
853 return proto;
854 } else {
855 DEBUG_PRINTF("build with teddy failed, will try with FDR\n");
856 }
857 }
858
859 auto des = (hint == HINT_INVALID) ? chooseEngine(target, lits, make_small)
860 : getFdrDescription(hint);
861 if (!des) {
862 return nullptr;
863 }
864
865 // temporary hack for unit testing
866 if (hint != HINT_INVALID) {
867 des->bits = 9;
868 des->stride = 1;
869 }
870
871 auto bucketToLits = assignStringsToBuckets(lits, *des);
872 addIncludedInfo(lits, des->getNumBuckets(), bucketToLits);
873 auto proto =
874 ue2::make_unique<HWLMProto>(engType, move(des), lits, bucketToLits,
875 make_small);
876 return proto;
877}
878
879unique_ptr<HWLMProto> fdrBuildProto(u8 engType, vector<hwlmLiteral> lits,
880 bool make_small, const target_t &target,
881 const Grey &grey) {
882 return fdrBuildProtoInternal(engType, lits, make_small, target, grey,
883 HINT_INVALID);
884}
885
886static
887bytecode_ptr<FDR> fdrBuildTableInternal(const HWLMProto &proto,
888 const Grey &grey) {
889
890 if (proto.teddyEng) {
891 return teddyBuildTable(proto, grey);
892 }
893
894 FDRCompiler fc(proto.lits, proto.bucketToLits, *(proto.fdrEng),
895 proto.make_small, grey);
896 return fc.build();
897}
898
899bytecode_ptr<FDR> fdrBuildTable(const HWLMProto &proto, const Grey &grey) {
900 return fdrBuildTableInternal(proto, grey);
901}
902
903#if !defined(RELEASE_BUILD)
904
905unique_ptr<HWLMProto> fdrBuildProtoHinted(u8 engType,
906 vector<hwlmLiteral> lits,
907 bool make_small, u32 hint,
908 const target_t &target,
909 const Grey &grey) {
910 return fdrBuildProtoInternal(engType, lits, make_small, target, grey,
911 hint);
912}
913
914#endif
915
916size_t fdrSize(const FDR *fdr) {
917 assert(fdr);
918 return fdr->size;
919}
920
921} // namespace ue2
922