1/*
2 * Copyright (c) 2015-2019, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "fdr_internal.h"
30#include "fdr_compile_internal.h"
31#include "fdr_confirm.h"
32#include "engine_description.h"
33#include "teddy_engine_description.h"
34#include "ue2common.h"
35#include "util/alloc.h"
36#include "util/bitutils.h"
37#include "util/compare.h"
38#include "util/container.h"
39#include "util/verify_types.h"
40
41#include <algorithm>
42#include <cstring>
43#include <set>
44
45using namespace std;
46
47namespace ue2 {
48
49using BC2CONF = map<BucketIndex, bytecode_ptr<FDRConfirm>>;
50
51static
52u64a make_u64a_mask(const vector<u8> &v) {
53 assert(v.size() <= sizeof(u64a));
54 if (v.size() > sizeof(u64a)) {
55 throw std::exception();
56 }
57
58 u64a mask = 0;
59 size_t vlen = v.size();
60 size_t len = std::min(vlen, sizeof(mask));
61 unsigned char *m = (unsigned char *)&mask;
62 memcpy(m + sizeof(mask) - len, &v[vlen - len], len);
63 return mask;
64}
65
66/**
67 * Build a temporary vector of LitInfo structures (without the corresponding
68 * pointers to the actual strings; these cannot be laid out yet). These
69 * stay in 1:1 correspondence with the lits[] vector as that's the only
70 * place we have to obtain our full strings.
71 */
72static
73void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo,
74 CONF_TYPE &andmsk) {
75 const CONF_TYPE all_ones = ~(u64a)0;
76 andmsk = all_ones; // fill in with 'and' of all literal masks
77
78 for (LiteralIndex i = 0; i < lits.size(); i++) {
79 const hwlmLiteral &lit = lits[i];
80 LitInfo &info = tmpLitInfo[i];
81 memset(&info, 0, sizeof(info));
82 info.id = lit.id;
83 u8 flags = 0;
84 if (lit.noruns) {
85 flags |= FDR_LIT_FLAG_NOREPEAT;
86 }
87 info.flags = flags;
88 info.size = verify_u8(max(lit.msk.size(), lit.s.size()));
89 info.groups = lit.groups;
90 info.pure = lit.pure;
91
92 // these are built up assuming a LE machine
93 CONF_TYPE msk = all_ones;
94 CONF_TYPE val = 0;
95 for (u32 j = 0; j < sizeof(CONF_TYPE); j++) {
96 u32 shiftLoc = (sizeof(CONF_TYPE) - j - 1) * 8;
97 if (j >= lit.s.size()) {
98 msk &= ~((CONF_TYPE)0xff << shiftLoc);
99 } else {
100 u8 c = lit.s[lit.s.size() - j - 1];
101 if (lit.nocase && ourisalpha(c)) {
102 msk &= ~((CONF_TYPE)CASE_BIT << shiftLoc);
103 val |= (CONF_TYPE)(c & CASE_CLEAR) << shiftLoc;
104 } else {
105 val |= (CONF_TYPE)c << shiftLoc;
106 }
107 }
108 }
109
110 info.v = val;
111 info.msk = msk;
112 if (!lit.msk.empty()) {
113 u64a l_msk = make_u64a_mask(lit.msk);
114 u64a l_cmp = make_u64a_mask(lit.cmp);
115
116 // test for consistency - if there's intersection, then v and msk
117 // values must line up
118 UNUSED u64a intersection = l_msk & info.msk;
119 assert((info.v & intersection) == (l_cmp & intersection));
120
121 // incorporate lit.msk, lit.cmp into v and msk
122 info.msk |= l_msk;
123 info.v |= l_cmp;
124 }
125
126 andmsk &= info.msk;
127 }
128}
129
130//#define FDR_CONFIRM_DUMP 1
131
132static
133bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits,
134 bool make_small) {
135 // Every literal must fit within CONF_TYPE.
136 assert(all_of_in(lits, [](const hwlmLiteral &lit) {
137 return lit.s.size() <= sizeof(CONF_TYPE);
138 }));
139
140 vector<LitInfo> tmpLitInfo(lits.size());
141 CONF_TYPE andmsk;
142 fillLitInfo(lits, tmpLitInfo, andmsk);
143
144#ifdef FDR_CONFIRM_DUMP
145 printf("-------------------\n");
146#endif
147
148 // just magic numbers and crude measures for now
149 u32 nBits;
150 if (make_small) {
151 nBits = min(10U, lg2(lits.size()) + 1);
152 } else {
153 nBits = lg2(lits.size()) + 4;
154 }
155
156 CONF_TYPE mult = (CONF_TYPE)0x0b4e0ef37bc32127ULL;
157
158 // we can walk the vector and assign elements from the vectors to a
159 // map by hash value
160 map<u32, vector<LiteralIndex> > res2lits;
161 hwlm_group_t gm = 0;
162 for (LiteralIndex i = 0; i < lits.size(); i++) {
163 LitInfo & li = tmpLitInfo[i];
164 u32 hash = CONF_HASH_CALL(li.v, andmsk, mult, nBits);
165 DEBUG_PRINTF("%016llx --> %u\n", li.v, hash);
166 res2lits[hash].push_back(i);
167 gm |= li.groups;
168 }
169
170#ifdef FDR_CONFIRM_DUMP
171 // print out the literals reversed - makes it easier to line up analyses
172 // that are end-offset based
173 for (const auto &m : res2lits) {
174 const u32 &hash = m.first;
175 const vector<LiteralIndex> &vlidx = m.second;
176 if (vlidx.size() <= 1) {
177 continue;
178 }
179 printf("%x -> %zu literals\n", hash, vlidx.size());
180 size_t min_len = lits[vlidx.front()].s.size();
181
182 vector<set<u8>> vsl; // contains the set of chars at each location
183 // reversed from the end
184
185 for (const auto &litIdx : vlidx) {
186 const auto &lit = lits[litIdx];
187 if (lit.s.size() > vsl.size()) {
188 vsl.resize(lit.s.size());
189 }
190 for (size_t j = lit.s.size(); j != 0; j--) {
191 vsl[lit.s.size() - j].insert(lit.s[j - 1]);
192 }
193 min_len = min(min_len, lit.s.size());
194 }
195 printf("common ");
196 for (size_t j = 0; j < min_len; j++) {
197 if (vsl[j].size() == 1) {
198 printf("%02x", *vsl[j].begin());
199 } else {
200 printf("__");
201 }
202 }
203 printf("\n");
204 for (const auto &litIdx : vlidx) {
205 const auto &lit = lits[litIdx];
206 printf("%8x %c", lit.id, lit.nocase ? '!' : ' ');
207 for (size_t j = lit.s.size(); j != 0; j--) {
208 size_t dist_from_end = lit.s.size() - j;
209 if (dist_from_end < min_len && vsl[dist_from_end].size() == 1) {
210 printf("__");
211 } else {
212 printf("%02x", lit.s[j - 1]);
213 }
214 }
215 printf("\n");
216 }
217 size_t total_compares = 0;
218 for (const auto &v : vsl) {
219 total_compares += v.size();
220 }
221 size_t total_string_size = 0;
222 for (const auto &litIdx : vlidx) {
223 const auto &lit = lits[litIdx];
224 total_string_size += lit.s.size();
225 }
226 printf("Total compare load: %zu Total string size: %zu\n\n",
227 total_compares, total_string_size);
228 }
229#endif
230
231 const size_t bitsToLitIndexSize = (1U << nBits) * sizeof(u32);
232
233 // this size can now be a worst-case as we can always be a bit smaller
234 size_t size = ROUNDUP_N(sizeof(FDRConfirm), alignof(u32)) +
235 ROUNDUP_N(bitsToLitIndexSize, alignof(LitInfo)) +
236 sizeof(LitInfo) * lits.size();
237 size = ROUNDUP_N(size, alignof(FDRConfirm));
238
239 auto fdrc = make_zeroed_bytecode_ptr<FDRConfirm>(size);
240 assert(fdrc); // otherwise would have thrown std::bad_alloc
241
242 fdrc->andmsk = andmsk;
243 fdrc->mult = mult;
244 fdrc->nBits = nBits;
245
246 fdrc->groups = gm;
247
248 // After the FDRConfirm, we have the lit index array.
249 u8 *fdrc_base = (u8 *)fdrc.get();
250 u8 *ptr = fdrc_base + sizeof(*fdrc);
251 ptr = ROUNDUP_PTR(ptr, alignof(u32));
252 u32 *bitsToLitIndex = (u32 *)ptr;
253 ptr += bitsToLitIndexSize;
254
255 // After the lit index array, we have the LitInfo structures themselves,
256 // which vary in size (as each may have a variable-length string after it).
257 ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
258
259 // Walk the map by hash value assigning indexes and laying out the
260 // elements (and their associated string confirm material) in memory.
261 for (const auto &m : res2lits) {
262 const u32 hash = m.first;
263 const vector<LiteralIndex> &vlidx = m.second;
264 bitsToLitIndex[hash] = verify_u32(ptr - fdrc_base);
265 for (auto i = vlidx.begin(), e = vlidx.end(); i != e; ++i) {
266 LiteralIndex litIdx = *i;
267
268 // Write LitInfo header.
269 LitInfo &finalLI = *(LitInfo *)ptr;
270 finalLI = tmpLitInfo[litIdx];
271
272 ptr += sizeof(LitInfo); // String starts directly after LitInfo.
273 assert(lits[litIdx].s.size() <= sizeof(CONF_TYPE));
274 if (next(i) == e) {
275 finalLI.next = 0;
276 } else {
277 finalLI.next = 1;
278 }
279 }
280 assert((size_t)(ptr - fdrc_base) <= size);
281 }
282
283 // Return actual used size, not worst-case size. Must be rounded up to
284 // FDRConfirm alignment so that the caller can lay out a sequence of these.
285 size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base),
286 alignof(FDRConfirm));
287 assert(actual_size <= size);
288 fdrc.shrink(actual_size);
289
290 return fdrc;
291}
292
293bytecode_ptr<u8>
294setupFullConfs(const vector<hwlmLiteral> &lits,
295 const EngineDescription &eng,
296 const map<BucketIndex, vector<LiteralIndex>> &bucketToLits,
297 bool make_small) {
298 unique_ptr<TeddyEngineDescription> teddyDescr =
299 getTeddyDescription(eng.getID());
300
301 BC2CONF bc2Conf;
302 u32 totalConfirmSize = 0;
303 for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) {
304 if (contains(bucketToLits, b)) {
305 vector<hwlmLiteral> vl;
306 for (const LiteralIndex &lit_idx : bucketToLits.at(b)) {
307 vl.push_back(lits[lit_idx]);
308 }
309
310 DEBUG_PRINTF("b %d sz %zu\n", b, vl.size());
311 auto fc = getFDRConfirm(vl, make_small);
312 totalConfirmSize += fc.size();
313 bc2Conf.emplace(b, move(fc));
314 }
315 }
316
317 u32 nBuckets = eng.getNumBuckets();
318 u32 totalConfSwitchSize = ROUNDUP_CL(nBuckets * sizeof(u32));
319 u32 totalSize = totalConfSwitchSize + totalConfirmSize;
320
321 auto buf = make_zeroed_bytecode_ptr<u8>(totalSize, 64);
322 assert(buf); // otherwise would have thrown std::bad_alloc
323
324 u32 *confBase = (u32 *)buf.get();
325 u8 *ptr = buf.get() + totalConfSwitchSize;
326 assert(ISALIGNED_CL(ptr));
327
328 for (const auto &m : bc2Conf) {
329 const BucketIndex &idx = m.first;
330 const bytecode_ptr<FDRConfirm> &p = m.second;
331 // confirm offset is relative to the base of this structure, now
332 u32 confirm_offset = verify_u32(ptr - buf.get());
333 memcpy(ptr, p.get(), p.size());
334 ptr += p.size();
335 confBase[idx] = confirm_offset;
336 }
337
338 return buf;
339}
340
341} // namespace ue2
342