1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/**
30 * \file
31 * \brief FDR literal matcher: Teddy build code.
32 */
33
34#include "teddy_compile.h"
35
36#include "fdr.h"
37#include "fdr_internal.h"
38#include "fdr_compile_internal.h"
39#include "fdr_confirm.h"
40#include "fdr_engine_description.h"
41#include "teddy_internal.h"
42#include "teddy_engine_description.h"
43#include "grey.h"
44#include "ue2common.h"
45#include "hwlm/hwlm_build.h"
46#include "util/alloc.h"
47#include "util/compare.h"
48#include "util/container.h"
49#include "util/make_unique.h"
50#include "util/noncopyable.h"
51#include "util/popcount.h"
52#include "util/small_vector.h"
53#include "util/target_info.h"
54#include "util/verify_types.h"
55
56#include <algorithm>
57#include <cassert>
58#include <cctype>
59#include <cstdio>
60#include <cstdlib>
61#include <cstring>
62#include <map>
63#include <memory>
64#include <set>
65#include <string>
66#include <vector>
67
68using namespace std;
69
70namespace ue2 {
71
72namespace {
73
74//#define TEDDY_DEBUG
75
76/** \brief Max number of Teddy masks we use. */
77static constexpr size_t MAX_NUM_MASKS = 4;
78
79class TeddyCompiler : noncopyable {
80 const TeddyEngineDescription &eng;
81 const Grey &grey;
82 const vector<hwlmLiteral> &lits;
83 map<BucketIndex, std::vector<LiteralIndex>> bucketToLits;
84 bool make_small;
85
86public:
87 TeddyCompiler(const vector<hwlmLiteral> &lits_in,
88 map<BucketIndex, std::vector<LiteralIndex>> bucketToLits_in,
89 const TeddyEngineDescription &eng_in, bool make_small_in,
90 const Grey &grey_in)
91 : eng(eng_in), grey(grey_in), lits(lits_in),
92 bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {}
93
94 bytecode_ptr<FDR> build();
95};
96
97class TeddySet {
98 /**
99 * \brief Estimate of the max number of literals in a set, used to
100 * minimise allocations.
101 */
102 static constexpr size_t LITS_PER_SET = 20;
103
104 /** \brief Number of masks. */
105 u32 len;
106
107 /**
108 * \brief A series of bitfields over 16 predicates that represent the
109 * shufti nibble set.
110 *
111 * So for num_masks = 4 we will represent our strings by 8 u16s in the
112 * vector that indicate what a shufti bucket would have to look like.
113 */
114 small_vector<u16, MAX_NUM_MASKS * 2> nibbleSets;
115
116 /**
117 * \brief Sorted, unique set of literals. We maintain our own set in a
118 * sorted vector to minimise allocations.
119 */
120 small_vector<u32, LITS_PER_SET> litIds;
121
122public:
123 explicit TeddySet(u32 len_in) : len(len_in), nibbleSets(len_in * 2, 0) {}
124 size_t litCount() const { return litIds.size(); }
125 const small_vector<u32, LITS_PER_SET> &getLits() const { return litIds; }
126
127 bool operator<(const TeddySet &s) const {
128 return litIds < s.litIds;
129 }
130
131#ifdef TEDDY_DEBUG
132 void dump() const {
133 printf("TS: ");
134 for (u32 i = 0; i < nibbleSets.size(); i++) {
135 printf("%04x ", (u32)nibbleSets[i]);
136 }
137 printf("\nnlits: %zu\nLit ids: ", litCount());
138 printf("Prob: %llu\n", probability());
139 for (const auto &id : litIds) {
140 printf("%u ", id);
141 }
142 printf("\n");
143 printf("Flood prone : %s\n", isRunProne() ? "yes" : "no");
144 }
145#endif
146
147 bool identicalTail(const TeddySet &ts) const {
148 return nibbleSets == ts.nibbleSets;
149 }
150
151 void addLiteral(u32 lit_id, const hwlmLiteral &lit) {
152 const string &s = lit.s;
153 for (u32 i = 0; i < len; i++) {
154 if (i < s.size()) {
155 u8 c = s[s.size() - i - 1];
156 u8 c_hi = (c >> 4) & 0xf;
157 u8 c_lo = c & 0xf;
158 nibbleSets[i * 2] = 1 << c_lo;
159 if (lit.nocase && ourisalpha(c)) {
160 nibbleSets[i * 2 + 1] =
161 (1 << (c_hi & 0xd)) | (1 << (c_hi | 0x2));
162 } else {
163 nibbleSets[i * 2 + 1] = 1 << c_hi;
164 }
165 } else {
166 nibbleSets[i * 2] = nibbleSets[i * 2 + 1] = 0xffff;
167 }
168 }
169 litIds.push_back(lit_id);
170 sort_and_unique(litIds);
171 }
172
173 // return a value p from 0 .. MAXINT64 that gives p/MAXINT64
174 // likelihood of this TeddySet firing a first-stage accept
175 // if it was given a bucket of its own and random data were
176 // to be passed in
177 u64a probability() const {
178 u64a val = 1;
179 for (size_t i = 0; i < nibbleSets.size(); i++) {
180 val *= popcount32((u32)nibbleSets[i]);
181 }
182 return val;
183 }
184
185 // return a score based around the chance of this hitting times
186 // a small fixed cost + the cost of traversing some sort of followup
187 // (assumption is that the followup is linear)
188 u64a heuristic() const {
189 return probability() * (2 + litCount());
190 }
191
192 bool isRunProne() const {
193 u16 lo_and = 0xffff;
194 u16 hi_and = 0xffff;
195 for (u32 i = 0; i < len; i++) {
196 lo_and &= nibbleSets[i * 2];
197 hi_and &= nibbleSets[i * 2 + 1];
198 }
199 // we're not flood-prone if there's no way to get
200 // through with a flood
201 if (!lo_and || !hi_and) {
202 return false;
203 }
204 return true;
205 }
206
207 friend TeddySet merge(const TeddySet &a, const TeddySet &b) {
208 assert(a.nibbleSets.size() == b.nibbleSets.size());
209
210 TeddySet m(a);
211
212 for (size_t i = 0; i < m.nibbleSets.size(); i++) {
213 m.nibbleSets[i] |= b.nibbleSets[i];
214 }
215
216 m.litIds.insert(m.litIds.end(), b.litIds.begin(), b.litIds.end());
217 sort_and_unique(m.litIds);
218
219 return m;
220 }
221};
222
223static
224bool pack(const vector<hwlmLiteral> &lits,
225 const TeddyEngineDescription &eng,
226 map<BucketIndex, std::vector<LiteralIndex>> &bucketToLits) {
227 set<TeddySet> sts;
228
229 for (u32 i = 0; i < lits.size(); i++) {
230 TeddySet ts(eng.numMasks);
231 ts.addLiteral(i, lits[i]);
232 sts.insert(ts);
233 }
234
235 while (1) {
236#ifdef TEDDY_DEBUG
237 printf("Size %zu\n", sts.size());
238 for (const TeddySet &ts : sts) {
239 printf("\n");
240 ts.dump();
241 }
242 printf("\n===============================================\n");
243#endif
244
245 auto m1 = sts.end(), m2 = sts.end();
246 u64a best = 0xffffffffffffffffULL;
247
248 for (auto i1 = sts.begin(), e1 = sts.end(); i1 != e1; ++i1) {
249 const TeddySet &s1 = *i1;
250 for (auto i2 = next(i1), e2 = sts.end(); i2 != e2; ++i2) {
251 const TeddySet &s2 = *i2;
252
253 // be more conservative if we don't absolutely need to
254 // keep packing
255 if ((sts.size() <= eng.getNumBuckets()) &&
256 !s1.identicalTail(s2)) {
257 continue;
258 }
259
260 TeddySet tmpSet = merge(s1, s2);
261 u64a newScore = tmpSet.heuristic();
262 u64a oldScore = s1.heuristic() + s2.heuristic();
263 if (newScore < oldScore) {
264 m1 = i1;
265 m2 = i2;
266 break;
267 } else {
268 u64a score = newScore - oldScore;
269 bool oldRunProne = s1.isRunProne() && s2.isRunProne();
270 bool newRunProne = tmpSet.isRunProne();
271 if (newRunProne && !oldRunProne) {
272 continue;
273 }
274 if (score < best) {
275 best = score;
276 m1 = i1;
277 m2 = i2;
278 }
279 }
280 }
281 }
282 // if we didn't find a merge candidate, bail out
283 if ((m1 == sts.end()) || (m2 == sts.end())) {
284 break;
285 }
286
287 // do the merge
288 TeddySet nts = merge(*m1, *m2);
289#ifdef TEDDY_DEBUG
290 printf("Merging\n");
291 printf("m1 = \n");
292 m1->dump();
293 printf("m2 = \n");
294 m2->dump();
295 printf("nts = \n");
296 nts.dump();
297 printf("\n===============================================\n");
298#endif
299 sts.erase(m1);
300 sts.erase(m2);
301 sts.insert(nts);
302 }
303
304 if (sts.size() > eng.getNumBuckets()) {
305 return false;
306 }
307
308 u32 bucket_id = 0;
309 for (const TeddySet &ts : sts) {
310 const auto &ts_lits = ts.getLits();
311 auto &bucket_lits = bucketToLits[bucket_id];
312 bucket_lits.insert(end(bucket_lits), begin(ts_lits), end(ts_lits));
313 bucket_id++;
314 }
315 return true;
316}
317
318// this entry has all-zero mask to skip reinforcement
319#define NO_REINFORCEMENT N_CHARS
320
321// this means every entry in reinforcement table
322#define ALL_CHAR_SET N_CHARS
323
324// each item's reinforcement mask has REINFORCED_MSK_LEN bytes
325#define REINFORCED_MSK_LEN 8
326
327// reinforcement table size for each 8 buckets set
328#define RTABLE_SIZE ((N_CHARS + 1) * REINFORCED_MSK_LEN)
329
330static
331void initReinforcedTable(u8 *rmsk) {
332 u64a *mask = (u64a *)rmsk;
333 fill_n(mask, N_CHARS, 0x00ffffffffffffffULL);
334}
335
336static
337void fillReinforcedMskZero(u8 *rmsk) {
338 u8 *mc = rmsk + NO_REINFORCEMENT * REINFORCED_MSK_LEN;
339 fill_n(mc, REINFORCED_MSK_LEN, 0x00);
340}
341
342static
343void fillReinforcedMsk(u8 *rmsk, u16 c, u32 j, u8 bmsk) {
344 assert(j > 0);
345 if (c == ALL_CHAR_SET) {
346 for (size_t i = 0; i < N_CHARS; i++) {
347 u8 *mc = rmsk + i * REINFORCED_MSK_LEN;
348 mc[j - 1] &= ~bmsk;
349 }
350 } else {
351 u8 *mc = rmsk + c * REINFORCED_MSK_LEN;
352 mc[j - 1] &= ~bmsk;
353 }
354}
355
356static
357void fillNibbleMasks(const map<BucketIndex,
358 vector<LiteralIndex>> &bucketToLits,
359 const vector<hwlmLiteral> &lits,
360 u32 numMasks, u32 maskWidth, size_t maskLen,
361 u8 *baseMsk) {
362 memset(baseMsk, 0xff, maskLen);
363
364 for (const auto &b2l : bucketToLits) {
365 const u32 &bucket_id = b2l.first;
366 const vector<LiteralIndex> &ids = b2l.second;
367 const u8 bmsk = 1U << (bucket_id % 8);
368
369 for (const LiteralIndex &lit_id : ids) {
370 const hwlmLiteral &l = lits[lit_id];
371 DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id);
372 const u32 sz = verify_u32(l.s.size());
373
374 // fill in masks
375 for (u32 j = 0; j < numMasks; j++) {
376 const u32 msk_id_lo = j * 2 * maskWidth + (bucket_id / 8);
377 const u32 msk_id_hi = (j * 2 + 1) * maskWidth + (bucket_id / 8);
378 const u32 lo_base = msk_id_lo * 16;
379 const u32 hi_base = msk_id_hi * 16;
380
381 // if we don't have a char at this position, fill in i
382 // locations in these masks with '1'
383 if (j >= sz) {
384 for (u32 n = 0; n < 16; n++) {
385 baseMsk[lo_base + n] &= ~bmsk;
386 baseMsk[hi_base + n] &= ~bmsk;
387 }
388 } else {
389 u8 c = l.s[sz - 1 - j];
390 // if we do have a char at this position
391 const u32 hiShift = 4;
392 u32 n_hi = (c >> hiShift) & 0xf;
393 u32 n_lo = c & 0xf;
394
395 if (j < l.msk.size() && l.msk[l.msk.size() - 1 - j]) {
396 u8 m = l.msk[l.msk.size() - 1 - j];
397 u8 m_hi = (m >> hiShift) & 0xf;
398 u8 m_lo = m & 0xf;
399 u8 cmp = l.cmp[l.msk.size() - 1 - j];
400 u8 cmp_lo = cmp & 0xf;
401 u8 cmp_hi = (cmp >> hiShift) & 0xf;
402
403 for (u8 cm = 0; cm < 0x10; cm++) {
404 if ((cm & m_lo) == (cmp_lo & m_lo)) {
405 baseMsk[lo_base + cm] &= ~bmsk;
406 }
407 if ((cm & m_hi) == (cmp_hi & m_hi)) {
408 baseMsk[hi_base + cm] &= ~bmsk;
409 }
410 }
411 } else {
412 if (l.nocase && ourisalpha(c)) {
413 u32 cmHalfClear = (0xdf >> hiShift) & 0xf;
414 u32 cmHalfSet = (0x20 >> hiShift) & 0xf;
415 baseMsk[hi_base + (n_hi & cmHalfClear)] &= ~bmsk;
416 baseMsk[hi_base + (n_hi | cmHalfSet)] &= ~bmsk;
417 } else {
418 baseMsk[hi_base + n_hi] &= ~bmsk;
419 }
420 baseMsk[lo_base + n_lo] &= ~bmsk;
421 }
422 }
423 }
424 }
425 }
426}
427
428static
429void fillReinforcedTable(const map<BucketIndex,
430 vector<LiteralIndex>> &bucketToLits,
431 const vector<hwlmLiteral> &lits,
432 u8 *rtable_base, const u32 num_tables) {
433 vector<u8 *> tables;
434 for (u32 i = 0; i < num_tables; i++) {
435 tables.push_back(rtable_base + i * RTABLE_SIZE);
436 }
437
438 for (auto t : tables) {
439 initReinforcedTable(t);
440 }
441
442 for (const auto &b2l : bucketToLits) {
443 const u32 &bucket_id = b2l.first;
444 const vector<LiteralIndex> &ids = b2l.second;
445 u8 *rmsk = tables[bucket_id / 8];
446 const u8 bmsk = 1U << (bucket_id % 8);
447
448 for (const LiteralIndex &lit_id : ids) {
449 const hwlmLiteral &l = lits[lit_id];
450 DEBUG_PRINTF("putting lit %u into bucket %u\n", lit_id, bucket_id);
451 const u32 sz = verify_u32(l.s.size());
452
453 // fill in reinforced masks
454 for (u32 j = 1; j < REINFORCED_MSK_LEN; j++) {
455 if (sz - 1 < j) {
456 fillReinforcedMsk(rmsk, ALL_CHAR_SET, j, bmsk);
457 } else {
458 u8 c = l.s[sz - 1 - j];
459 if (l.nocase && ourisalpha(c)) {
460 u8 c_up = c & 0xdf;
461 fillReinforcedMsk(rmsk, c_up, j, bmsk);
462 u8 c_lo = c | 0x20;
463 fillReinforcedMsk(rmsk, c_lo, j, bmsk);
464 } else {
465 fillReinforcedMsk(rmsk, c, j, bmsk);
466 }
467 }
468 }
469 }
470 }
471
472 for (auto t : tables) {
473 fillReinforcedMskZero(t);
474 }
475}
476
477bytecode_ptr<FDR> TeddyCompiler::build() {
478 u32 maskWidth = eng.getNumBuckets() / 8;
479
480 size_t headerSize = sizeof(Teddy);
481 size_t maskLen = eng.numMasks * 16 * 2 * maskWidth;
482 size_t reinforcedMaskLen = RTABLE_SIZE * maskWidth;
483
484 auto floodTable = setupFDRFloodControl(lits, eng, grey);
485 auto confirmTable = setupFullConfs(lits, eng, bucketToLits, make_small);
486
487 // Note: we place each major structure here on a cacheline boundary.
488 size_t size = ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen) +
489 ROUNDUP_CL(reinforcedMaskLen) +
490 ROUNDUP_CL(confirmTable.size()) + floodTable.size();
491
492 auto fdr = make_zeroed_bytecode_ptr<FDR>(size, 64);
493 assert(fdr); // otherwise would have thrown std::bad_alloc
494 Teddy *teddy = (Teddy *)fdr.get(); // ugly
495 u8 *teddy_base = (u8 *)teddy;
496
497 // Write header.
498 teddy->size = size;
499 teddy->engineID = eng.getID();
500 teddy->maxStringLen = verify_u32(maxLen(lits));
501 teddy->numStrings = verify_u32(lits.size());
502
503 // Write confirm structures.
504 u8 *ptr = teddy_base + ROUNDUP_CL(headerSize) + ROUNDUP_CL(maskLen) +
505 ROUNDUP_CL(reinforcedMaskLen);
506 assert(ISALIGNED_CL(ptr));
507 teddy->confOffset = verify_u32(ptr - teddy_base);
508 memcpy(ptr, confirmTable.get(), confirmTable.size());
509 ptr += ROUNDUP_CL(confirmTable.size());
510
511 // Write flood control structures.
512 assert(ISALIGNED_CL(ptr));
513 teddy->floodOffset = verify_u32(ptr - teddy_base);
514 memcpy(ptr, floodTable.get(), floodTable.size());
515 ptr += floodTable.size();
516
517 // Write teddy masks.
518 u8 *baseMsk = teddy_base + ROUNDUP_CL(headerSize);
519 fillNibbleMasks(bucketToLits, lits, eng.numMasks, maskWidth, maskLen,
520 baseMsk);
521
522 // Write reinforcement masks.
523 u8 *reinforcedMsk = baseMsk + ROUNDUP_CL(maskLen);
524 fillReinforcedTable(bucketToLits, lits, reinforcedMsk, maskWidth);
525
526 return fdr;
527}
528
529
530static
531bool assignStringsToBuckets(
532 const vector<hwlmLiteral> &lits,
533 TeddyEngineDescription &eng,
534 map<BucketIndex, vector<LiteralIndex>> &bucketToLits) {
535 assert(eng.numMasks <= MAX_NUM_MASKS);
536 if (lits.size() > eng.getNumBuckets() * TEDDY_BUCKET_LOAD) {
537 DEBUG_PRINTF("too many literals: %zu\n", lits.size());
538 return false;
539 }
540
541#ifdef TEDDY_DEBUG
542 for (size_t i = 0; i < lits.size(); i++) {
543 printf("lit %zu (len = %zu, %s) is ", i, lits[i].s.size(),
544 lits[i].nocase ? "caseless" : "caseful");
545 for (size_t j = 0; j < lits[i].s.size(); j++) {
546 printf("%02x", ((u32)lits[i].s[j])&0xff);
547 }
548 printf("\n");
549 }
550#endif
551
552 if (!pack(lits, eng, bucketToLits)) {
553 DEBUG_PRINTF("more lits (%zu) than buckets (%u), can't pack.\n",
554 lits.size(), eng.getNumBuckets());
555 return false;
556 }
557 return true;
558}
559
560} // namespace
561
562bytecode_ptr<FDR> teddyBuildTable(const HWLMProto &proto, const Grey &grey) {
563 TeddyCompiler tc(proto.lits, proto.bucketToLits, *(proto.teddyEng),
564 proto.make_small, grey);
565 return tc.build();
566}
567
568
569unique_ptr<HWLMProto> teddyBuildProtoHinted(
570 u8 engType, const vector<hwlmLiteral> &lits,
571 bool make_small, u32 hint, const target_t &target) {
572 unique_ptr<TeddyEngineDescription> des;
573 if (hint == HINT_INVALID) {
574 des = chooseTeddyEngine(target, lits);
575 } else {
576 des = getTeddyDescription(hint);
577 }
578 if (!des) {
579 return nullptr;
580 }
581
582 map<BucketIndex, std::vector<LiteralIndex>> bucketToLits;
583 if (!assignStringsToBuckets(lits, *des, bucketToLits)) {
584 return nullptr;
585 }
586
587 return ue2::make_unique<HWLMProto>(engType, move(des), lits,
588 bucketToLits, make_small);
589}
590
591} // namespace ue2
592