1 | /* |
2 | * Copyright (c) 2015-2019, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #include "fdr_internal.h" |
30 | #include "fdr_compile_internal.h" |
31 | #include "fdr_confirm.h" |
32 | #include "engine_description.h" |
33 | #include "teddy_engine_description.h" |
34 | #include "ue2common.h" |
35 | #include "util/alloc.h" |
36 | #include "util/bitutils.h" |
37 | #include "util/compare.h" |
38 | #include "util/container.h" |
39 | #include "util/verify_types.h" |
40 | |
41 | #include <algorithm> |
42 | #include <cstring> |
43 | #include <set> |
44 | |
45 | using namespace std; |
46 | |
47 | namespace ue2 { |
48 | |
49 | using BC2CONF = map<BucketIndex, bytecode_ptr<FDRConfirm>>; |
50 | |
51 | static |
52 | u64a make_u64a_mask(const vector<u8> &v) { |
53 | assert(v.size() <= sizeof(u64a)); |
54 | if (v.size() > sizeof(u64a)) { |
55 | throw std::exception(); |
56 | } |
57 | |
58 | u64a mask = 0; |
59 | size_t vlen = v.size(); |
60 | size_t len = std::min(vlen, sizeof(mask)); |
61 | unsigned char *m = (unsigned char *)&mask; |
62 | memcpy(m + sizeof(mask) - len, &v[vlen - len], len); |
63 | return mask; |
64 | } |
65 | |
66 | /** |
67 | * Build a temporary vector of LitInfo structures (without the corresponding |
68 | * pointers to the actual strings; these cannot be laid out yet). These |
69 | * stay in 1:1 correspondence with the lits[] vector as that's the only |
70 | * place we have to obtain our full strings. |
71 | */ |
72 | static |
73 | void fillLitInfo(const vector<hwlmLiteral> &lits, vector<LitInfo> &tmpLitInfo, |
74 | CONF_TYPE &andmsk) { |
75 | const CONF_TYPE all_ones = ~(u64a)0; |
76 | andmsk = all_ones; // fill in with 'and' of all literal masks |
77 | |
78 | for (LiteralIndex i = 0; i < lits.size(); i++) { |
79 | const hwlmLiteral &lit = lits[i]; |
80 | LitInfo &info = tmpLitInfo[i]; |
81 | memset(&info, 0, sizeof(info)); |
82 | info.id = lit.id; |
83 | u8 flags = 0; |
84 | if (lit.noruns) { |
85 | flags |= FDR_LIT_FLAG_NOREPEAT; |
86 | } |
87 | info.flags = flags; |
88 | info.size = verify_u8(max(lit.msk.size(), lit.s.size())); |
89 | info.groups = lit.groups; |
90 | info.pure = lit.pure; |
91 | |
92 | // these are built up assuming a LE machine |
93 | CONF_TYPE msk = all_ones; |
94 | CONF_TYPE val = 0; |
95 | for (u32 j = 0; j < sizeof(CONF_TYPE); j++) { |
96 | u32 shiftLoc = (sizeof(CONF_TYPE) - j - 1) * 8; |
97 | if (j >= lit.s.size()) { |
98 | msk &= ~((CONF_TYPE)0xff << shiftLoc); |
99 | } else { |
100 | u8 c = lit.s[lit.s.size() - j - 1]; |
101 | if (lit.nocase && ourisalpha(c)) { |
102 | msk &= ~((CONF_TYPE)CASE_BIT << shiftLoc); |
103 | val |= (CONF_TYPE)(c & CASE_CLEAR) << shiftLoc; |
104 | } else { |
105 | val |= (CONF_TYPE)c << shiftLoc; |
106 | } |
107 | } |
108 | } |
109 | |
110 | info.v = val; |
111 | info.msk = msk; |
112 | if (!lit.msk.empty()) { |
113 | u64a l_msk = make_u64a_mask(lit.msk); |
114 | u64a l_cmp = make_u64a_mask(lit.cmp); |
115 | |
116 | // test for consistency - if there's intersection, then v and msk |
117 | // values must line up |
118 | UNUSED u64a intersection = l_msk & info.msk; |
119 | assert((info.v & intersection) == (l_cmp & intersection)); |
120 | |
121 | // incorporate lit.msk, lit.cmp into v and msk |
122 | info.msk |= l_msk; |
123 | info.v |= l_cmp; |
124 | } |
125 | |
126 | andmsk &= info.msk; |
127 | } |
128 | } |
129 | |
130 | //#define FDR_CONFIRM_DUMP 1 |
131 | |
132 | static |
133 | bytecode_ptr<FDRConfirm> getFDRConfirm(const vector<hwlmLiteral> &lits, |
134 | bool make_small) { |
135 | // Every literal must fit within CONF_TYPE. |
136 | assert(all_of_in(lits, [](const hwlmLiteral &lit) { |
137 | return lit.s.size() <= sizeof(CONF_TYPE); |
138 | })); |
139 | |
140 | vector<LitInfo> tmpLitInfo(lits.size()); |
141 | CONF_TYPE andmsk; |
142 | fillLitInfo(lits, tmpLitInfo, andmsk); |
143 | |
144 | #ifdef FDR_CONFIRM_DUMP |
145 | printf("-------------------\n" ); |
146 | #endif |
147 | |
148 | // just magic numbers and crude measures for now |
149 | u32 nBits; |
150 | if (make_small) { |
151 | nBits = min(10U, lg2(lits.size()) + 1); |
152 | } else { |
153 | nBits = lg2(lits.size()) + 4; |
154 | } |
155 | |
156 | CONF_TYPE mult = (CONF_TYPE)0x0b4e0ef37bc32127ULL; |
157 | |
158 | // we can walk the vector and assign elements from the vectors to a |
159 | // map by hash value |
160 | map<u32, vector<LiteralIndex> > res2lits; |
161 | hwlm_group_t gm = 0; |
162 | for (LiteralIndex i = 0; i < lits.size(); i++) { |
163 | LitInfo & li = tmpLitInfo[i]; |
164 | u32 hash = CONF_HASH_CALL(li.v, andmsk, mult, nBits); |
165 | DEBUG_PRINTF("%016llx --> %u\n" , li.v, hash); |
166 | res2lits[hash].push_back(i); |
167 | gm |= li.groups; |
168 | } |
169 | |
170 | #ifdef FDR_CONFIRM_DUMP |
171 | // print out the literals reversed - makes it easier to line up analyses |
172 | // that are end-offset based |
173 | for (const auto &m : res2lits) { |
174 | const u32 &hash = m.first; |
175 | const vector<LiteralIndex> &vlidx = m.second; |
176 | if (vlidx.size() <= 1) { |
177 | continue; |
178 | } |
179 | printf("%x -> %zu literals\n" , hash, vlidx.size()); |
180 | size_t min_len = lits[vlidx.front()].s.size(); |
181 | |
182 | vector<set<u8>> vsl; // contains the set of chars at each location |
183 | // reversed from the end |
184 | |
185 | for (const auto &litIdx : vlidx) { |
186 | const auto &lit = lits[litIdx]; |
187 | if (lit.s.size() > vsl.size()) { |
188 | vsl.resize(lit.s.size()); |
189 | } |
190 | for (size_t j = lit.s.size(); j != 0; j--) { |
191 | vsl[lit.s.size() - j].insert(lit.s[j - 1]); |
192 | } |
193 | min_len = min(min_len, lit.s.size()); |
194 | } |
195 | printf("common " ); |
196 | for (size_t j = 0; j < min_len; j++) { |
197 | if (vsl[j].size() == 1) { |
198 | printf("%02x" , *vsl[j].begin()); |
199 | } else { |
200 | printf("__" ); |
201 | } |
202 | } |
203 | printf("\n" ); |
204 | for (const auto &litIdx : vlidx) { |
205 | const auto &lit = lits[litIdx]; |
206 | printf("%8x %c" , lit.id, lit.nocase ? '!' : ' '); |
207 | for (size_t j = lit.s.size(); j != 0; j--) { |
208 | size_t dist_from_end = lit.s.size() - j; |
209 | if (dist_from_end < min_len && vsl[dist_from_end].size() == 1) { |
210 | printf("__" ); |
211 | } else { |
212 | printf("%02x" , lit.s[j - 1]); |
213 | } |
214 | } |
215 | printf("\n" ); |
216 | } |
217 | size_t total_compares = 0; |
218 | for (const auto &v : vsl) { |
219 | total_compares += v.size(); |
220 | } |
221 | size_t total_string_size = 0; |
222 | for (const auto &litIdx : vlidx) { |
223 | const auto &lit = lits[litIdx]; |
224 | total_string_size += lit.s.size(); |
225 | } |
226 | printf("Total compare load: %zu Total string size: %zu\n\n" , |
227 | total_compares, total_string_size); |
228 | } |
229 | #endif |
230 | |
231 | const size_t bitsToLitIndexSize = (1U << nBits) * sizeof(u32); |
232 | |
233 | // this size can now be a worst-case as we can always be a bit smaller |
234 | size_t size = ROUNDUP_N(sizeof(FDRConfirm), alignof(u32)) + |
235 | ROUNDUP_N(bitsToLitIndexSize, alignof(LitInfo)) + |
236 | sizeof(LitInfo) * lits.size(); |
237 | size = ROUNDUP_N(size, alignof(FDRConfirm)); |
238 | |
239 | auto fdrc = make_zeroed_bytecode_ptr<FDRConfirm>(size); |
240 | assert(fdrc); // otherwise would have thrown std::bad_alloc |
241 | |
242 | fdrc->andmsk = andmsk; |
243 | fdrc->mult = mult; |
244 | fdrc->nBits = nBits; |
245 | |
246 | fdrc->groups = gm; |
247 | |
248 | // After the FDRConfirm, we have the lit index array. |
249 | u8 *fdrc_base = (u8 *)fdrc.get(); |
250 | u8 *ptr = fdrc_base + sizeof(*fdrc); |
251 | ptr = ROUNDUP_PTR(ptr, alignof(u32)); |
252 | u32 *bitsToLitIndex = (u32 *)ptr; |
253 | ptr += bitsToLitIndexSize; |
254 | |
255 | // After the lit index array, we have the LitInfo structures themselves, |
256 | // which vary in size (as each may have a variable-length string after it). |
257 | ptr = ROUNDUP_PTR(ptr, alignof(LitInfo)); |
258 | |
259 | // Walk the map by hash value assigning indexes and laying out the |
260 | // elements (and their associated string confirm material) in memory. |
261 | for (const auto &m : res2lits) { |
262 | const u32 hash = m.first; |
263 | const vector<LiteralIndex> &vlidx = m.second; |
264 | bitsToLitIndex[hash] = verify_u32(ptr - fdrc_base); |
265 | for (auto i = vlidx.begin(), e = vlidx.end(); i != e; ++i) { |
266 | LiteralIndex litIdx = *i; |
267 | |
268 | // Write LitInfo header. |
269 | LitInfo &finalLI = *(LitInfo *)ptr; |
270 | finalLI = tmpLitInfo[litIdx]; |
271 | |
272 | ptr += sizeof(LitInfo); // String starts directly after LitInfo. |
273 | assert(lits[litIdx].s.size() <= sizeof(CONF_TYPE)); |
274 | if (next(i) == e) { |
275 | finalLI.next = 0; |
276 | } else { |
277 | finalLI.next = 1; |
278 | } |
279 | } |
280 | assert((size_t)(ptr - fdrc_base) <= size); |
281 | } |
282 | |
283 | // Return actual used size, not worst-case size. Must be rounded up to |
284 | // FDRConfirm alignment so that the caller can lay out a sequence of these. |
285 | size_t actual_size = ROUNDUP_N((size_t)(ptr - fdrc_base), |
286 | alignof(FDRConfirm)); |
287 | assert(actual_size <= size); |
288 | fdrc.shrink(actual_size); |
289 | |
290 | return fdrc; |
291 | } |
292 | |
293 | bytecode_ptr<u8> |
294 | setupFullConfs(const vector<hwlmLiteral> &lits, |
295 | const EngineDescription &eng, |
296 | const map<BucketIndex, vector<LiteralIndex>> &bucketToLits, |
297 | bool make_small) { |
298 | unique_ptr<TeddyEngineDescription> teddyDescr = |
299 | getTeddyDescription(eng.getID()); |
300 | |
301 | BC2CONF bc2Conf; |
302 | u32 totalConfirmSize = 0; |
303 | for (BucketIndex b = 0; b < eng.getNumBuckets(); b++) { |
304 | if (contains(bucketToLits, b)) { |
305 | vector<hwlmLiteral> vl; |
306 | for (const LiteralIndex &lit_idx : bucketToLits.at(b)) { |
307 | vl.push_back(lits[lit_idx]); |
308 | } |
309 | |
310 | DEBUG_PRINTF("b %d sz %zu\n" , b, vl.size()); |
311 | auto fc = getFDRConfirm(vl, make_small); |
312 | totalConfirmSize += fc.size(); |
313 | bc2Conf.emplace(b, move(fc)); |
314 | } |
315 | } |
316 | |
317 | u32 nBuckets = eng.getNumBuckets(); |
318 | u32 totalConfSwitchSize = ROUNDUP_CL(nBuckets * sizeof(u32)); |
319 | u32 totalSize = totalConfSwitchSize + totalConfirmSize; |
320 | |
321 | auto buf = make_zeroed_bytecode_ptr<u8>(totalSize, 64); |
322 | assert(buf); // otherwise would have thrown std::bad_alloc |
323 | |
324 | u32 *confBase = (u32 *)buf.get(); |
325 | u8 *ptr = buf.get() + totalConfSwitchSize; |
326 | assert(ISALIGNED_CL(ptr)); |
327 | |
328 | for (const auto &m : bc2Conf) { |
329 | const BucketIndex &idx = m.first; |
330 | const bytecode_ptr<FDRConfirm> &p = m.second; |
331 | // confirm offset is relative to the base of this structure, now |
332 | u32 confirm_offset = verify_u32(ptr - buf.get()); |
333 | memcpy(ptr, p.get(), p.size()); |
334 | ptr += p.size(); |
335 | confBase[idx] = confirm_offset; |
336 | } |
337 | |
338 | return buf; |
339 | } |
340 | |
341 | } // namespace ue2 |
342 | |