1 | /* |
2 | * Copyright (c) 2016-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #include "rose_build_long_lit.h" |
30 | |
31 | #include "rose_build_engine_blob.h" |
32 | #include "rose_build_impl.h" |
33 | #include "stream_long_lit_hash.h" |
34 | #include "util/bytecode_ptr.h" |
35 | #include "util/bitutils.h" |
36 | #include "util/verify_types.h" |
37 | #include "util/compile_context.h" |
38 | |
39 | #include <algorithm> |
40 | #include <numeric> |
41 | |
42 | using namespace std; |
43 | |
44 | namespace ue2 { |
45 | |
46 | /** \brief Minimum size for a non-empty hash table. Must be a power of two. */ |
47 | static constexpr size_t MIN_HASH_TABLE_SIZE = 128; |
48 | |
49 | /** \brief Maximum load factor (between zero and one) for a hash table. */ |
50 | static constexpr double MAX_HASH_TABLE_LOAD = 0.7; |
51 | |
52 | /** \brief Minimum size (in bits) for a bloom filter. Must be a power of two. */ |
53 | static constexpr u32 MIN_BLOOM_FILTER_SIZE = 256; |
54 | |
55 | /** \brief Maximum load factor (between zero and one) for a bloom filter. */ |
56 | static constexpr double MAX_BLOOM_FILTER_LOAD = 0.25; |
57 | |
58 | struct LongLitModeInfo { |
59 | u32 num_literals = 0; //!< Number of strings for this mode. |
60 | u32 hashed_positions = 0; //!< Number of hashable string positions. |
61 | }; |
62 | |
63 | struct LongLitInfo { |
64 | LongLitModeInfo caseful; |
65 | LongLitModeInfo nocase; |
66 | }; |
67 | |
68 | static |
69 | u32 roundUpToPowerOfTwo(u32 x) { |
70 | assert(x != 0); |
71 | u32 bits = lg2(x - 1) + 1; |
72 | assert(bits < 32); |
73 | return 1U << bits; |
74 | } |
75 | |
76 | static |
77 | LongLitInfo analyzeLongLits(const vector<ue2_case_string> &lits, |
78 | size_t max_len) { |
79 | LongLitInfo info; |
80 | |
81 | for (const auto &lit : lits) { |
82 | auto &lit_info = lit.nocase ? info.nocase : info.caseful; |
83 | assert(lit.s.size() > max_len); |
84 | lit_info.num_literals++; |
85 | lit_info.hashed_positions += lit.s.size() - max_len; |
86 | } |
87 | |
88 | DEBUG_PRINTF("case: hashed %u positions\n" , info.caseful.hashed_positions); |
89 | DEBUG_PRINTF("nocase: hashed %u positions\n" , info.nocase.hashed_positions); |
90 | |
91 | return info; |
92 | } |
93 | |
94 | static |
95 | void addToBloomFilter(vector<u8> &bloom, const u8 *substr, bool nocase) { |
96 | const u32 num_keys = verify_u32(bloom.size() * 8); |
97 | const u32 key_mask = (1U << lg2(num_keys)) -1; |
98 | |
99 | const auto hash_functions = { bloomHash_1, bloomHash_2, bloomHash_3 }; |
100 | for (const auto &hash_func : hash_functions) { |
101 | u32 hash = hash_func(substr, nocase); |
102 | u32 key = hash & key_mask; |
103 | DEBUG_PRINTF("set key %u (of %zu)\n" , key, bloom.size() * 8); |
104 | bloom[key / 8] |= 1U << (key % 8); |
105 | } |
106 | } |
107 | |
108 | static |
109 | size_t bloomOccupancy(const vector<u8> &bloom) { |
110 | return accumulate(begin(bloom), end(bloom), 0, |
111 | [](const size_t &sum, const u8 &elem) { |
112 | return sum + popcount32(elem); |
113 | }); |
114 | } |
115 | |
116 | static |
117 | double bloomLoad(const vector<u8> &bloom) { |
118 | return (double)bloomOccupancy(bloom) / (double)(bloom.size() * 8); |
119 | } |
120 | |
121 | static |
122 | vector<u8> buildBloomFilter(const vector<ue2_case_string> &lits, size_t max_len, |
123 | size_t num_entries, bool nocase) { |
124 | assert(num_entries % 8 == 0); |
125 | assert((num_entries & (num_entries - 1)) == 0); // Must be power of two. |
126 | |
127 | vector<u8> bloom(num_entries / 8, 0); |
128 | |
129 | if (!num_entries) { |
130 | return bloom; |
131 | } |
132 | |
133 | for (const auto &lit : lits) { |
134 | if (nocase != lit.nocase) { |
135 | continue; |
136 | } |
137 | for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) { |
138 | const u8 *substr = (const u8 *)lit.s.c_str() + offset; |
139 | addToBloomFilter(bloom, substr, nocase); |
140 | } |
141 | } |
142 | |
143 | DEBUG_PRINTF("%s bloom filter occupancy %zu of %zu entries\n" , |
144 | nocase ? "nocase" : "caseful" , bloomOccupancy(bloom), |
145 | num_entries); |
146 | |
147 | return bloom; |
148 | } |
149 | |
150 | |
151 | static |
152 | vector<u8> makeBloomFilter(const vector<ue2_case_string> &lits, |
153 | size_t max_len, bool nocase) { |
154 | vector<u8> bloom; |
155 | |
156 | size_t num_entries = MIN_BLOOM_FILTER_SIZE; |
157 | for (;;) { |
158 | bloom = buildBloomFilter(lits, max_len, num_entries, nocase); |
159 | DEBUG_PRINTF("built %s bloom for %zu entries: load %f\n" , |
160 | nocase ? "nocase" : "caseful" , num_entries, |
161 | bloomLoad(bloom)); |
162 | if (bloomLoad(bloom) < MAX_BLOOM_FILTER_LOAD) { |
163 | break; |
164 | } |
165 | num_entries *= 2; |
166 | } |
167 | return bloom; |
168 | } |
169 | |
170 | static UNUSED |
171 | size_t hashTableOccupancy(const vector<RoseLongLitHashEntry> &tab) { |
172 | return count_if(begin(tab), end(tab), [](const RoseLongLitHashEntry &ent) { |
173 | return ent.str_offset != 0; |
174 | }); |
175 | } |
176 | |
177 | static UNUSED |
178 | double hashTableLoad(const vector<RoseLongLitHashEntry> &tab) { |
179 | return (double)hashTableOccupancy(tab) / (double)(tab.size()); |
180 | } |
181 | |
182 | using LitOffsetVector = small_vector<pair<u32, u32>, 1>; |
183 | |
184 | static |
185 | vector<RoseLongLitHashEntry> buildHashTable( |
186 | size_t max_len, const vector<u32> &litToOffsetVal, |
187 | const map<u32, LitOffsetVector> &hashToLitOffPairs, |
188 | size_t numEntries) { |
189 | vector<RoseLongLitHashEntry> tab(numEntries, {0,0}); |
190 | |
191 | if (!numEntries) { |
192 | return tab; |
193 | } |
194 | |
195 | for (const auto &m : hashToLitOffPairs) { |
196 | u32 hash = m.first; |
197 | const LitOffsetVector &d = m.second; |
198 | |
199 | u32 bucket = hash % numEntries; |
200 | |
201 | // Placement via linear probing. |
202 | for (const auto &lit_offset : d) { |
203 | while (tab[bucket].str_offset != 0) { |
204 | bucket++; |
205 | if (bucket == numEntries) { |
206 | bucket = 0; |
207 | } |
208 | } |
209 | |
210 | u32 lit_id = lit_offset.first; |
211 | u32 offset = lit_offset.second; |
212 | |
213 | DEBUG_PRINTF("hash 0x%08x lit_id %u offset %u bucket %u\n" , hash, |
214 | lit_id, offset, bucket); |
215 | |
216 | auto &entry = tab[bucket]; |
217 | entry.str_offset = verify_u32(litToOffsetVal.at(lit_id)); |
218 | assert(entry.str_offset != 0); |
219 | entry.str_len = offset + max_len; |
220 | } |
221 | } |
222 | |
223 | DEBUG_PRINTF("hash table occupancy %zu of %zu entries\n" , |
224 | hashTableOccupancy(tab), numEntries); |
225 | |
226 | return tab; |
227 | } |
228 | |
229 | static |
230 | map<u32, LitOffsetVector> computeLitHashes(const vector<ue2_case_string> &lits, |
231 | size_t max_len, bool nocase) { |
232 | map<u32, LitOffsetVector> hashToLitOffPairs; |
233 | |
234 | for (u32 lit_id = 0; lit_id < lits.size(); lit_id++) { |
235 | const ue2_case_string &lit = lits[lit_id]; |
236 | if (nocase != lit.nocase) { |
237 | continue; |
238 | } |
239 | for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) { |
240 | const u8 *substr = (const u8 *)lit.s.c_str() + offset; |
241 | u32 hash = hashLongLiteral(substr, max_len, lit.nocase); |
242 | hashToLitOffPairs[hash].emplace_back(lit_id, offset); |
243 | } |
244 | } |
245 | |
246 | for (auto &m : hashToLitOffPairs) { |
247 | LitOffsetVector &d = m.second; |
248 | if (d.size() == 1) { |
249 | continue; |
250 | } |
251 | |
252 | // Sort by (offset, string) so that we'll be able to remove identical |
253 | // string prefixes. |
254 | stable_sort(begin(d), end(d), |
255 | [&](const pair<u32, u32> &a, const pair<u32, u32> &b) { |
256 | const auto &str_a = lits[a.first].s; |
257 | const auto &str_b = lits[b.first].s; |
258 | return tie(a.second, str_a) < tie(b.second, str_b); |
259 | }); |
260 | |
261 | // Remove entries that point to the same literal prefix. |
262 | d.erase(unique(begin(d), end(d), |
263 | [&](const pair<u32, u32> &a, const pair<u32, u32> &b) { |
264 | if (a.second != b.second) { |
265 | return false; |
266 | } |
267 | const auto &str_a = lits[a.first].s; |
268 | const auto &str_b = lits[b.first].s; |
269 | const size_t len = max_len + a.second; |
270 | return equal(begin(str_a), begin(str_a) + len, |
271 | begin(str_b)); |
272 | }), |
273 | end(d)); |
274 | |
275 | // Sort d by distance of the residual string (len minus our depth into |
276 | // the string). We need to put the 'furthest back' string first. |
277 | stable_sort(begin(d), end(d), |
278 | [](const pair<u32, u32> &a, const pair<u32, u32> &b) { |
279 | if (a.second != b.second) { |
280 | return a.second > b.second; /* longest is first */ |
281 | } |
282 | return a.first < b.first; |
283 | }); |
284 | } |
285 | |
286 | return hashToLitOffPairs; |
287 | } |
288 | |
289 | static |
290 | vector<RoseLongLitHashEntry> makeHashTable(const vector<ue2_case_string> &lits, |
291 | size_t max_len, |
292 | const vector<u32> &litToOffsetVal, |
293 | u32 numPositions, bool nocase) { |
294 | // Compute lit substring hashes. |
295 | const auto hashToLitOffPairs = computeLitHashes(lits, max_len, nocase); |
296 | |
297 | // Compute the size of the hash table: we need enough entries to satisfy |
298 | // our max load constraint, and it must be a power of two. |
299 | size_t num_entries = (double)numPositions / MAX_HASH_TABLE_LOAD + 1; |
300 | num_entries = roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, num_entries)); |
301 | |
302 | auto tab = buildHashTable(max_len, litToOffsetVal, hashToLitOffPairs, |
303 | num_entries); |
304 | DEBUG_PRINTF("built %s hash table for %zu entries: load %f\n" , |
305 | nocase ? "nocase" : "caseful" , num_entries, |
306 | hashTableLoad(tab)); |
307 | assert(hashTableLoad(tab) < MAX_HASH_TABLE_LOAD); |
308 | |
309 | return tab; |
310 | } |
311 | |
312 | static |
313 | vector<u8> buildLits(const vector<ue2_case_string> &lits, u32 baseOffset, |
314 | vector<u32> &litToOffsetVal) { |
315 | vector<u8> blob; |
316 | litToOffsetVal.resize(lits.size(), 0); |
317 | |
318 | u32 lit_id = 0; |
319 | for (const auto &lit : lits) { |
320 | u32 offset = baseOffset + verify_u32(blob.size()); |
321 | blob.insert(blob.end(), begin(lit.s), end(lit.s)); |
322 | litToOffsetVal[lit_id] = offset; |
323 | lit_id++; |
324 | } |
325 | |
326 | DEBUG_PRINTF("built %zu bytes of strings\n" , blob.size()); |
327 | return blob; |
328 | } |
329 | |
330 | u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, |
331 | vector<ue2_case_string> &lits, |
332 | size_t longLitLengthThreshold, |
333 | size_t *historyRequired, |
334 | size_t *longLitStreamStateRequired) { |
335 | // Work in terms of history requirement (i.e. literal len - 1). |
336 | const size_t max_len = longLitLengthThreshold - 1; |
337 | |
338 | // We should only be building the long literal hash table in streaming mode. |
339 | if (!build.cc.streaming) { |
340 | return 0; |
341 | } |
342 | |
343 | if (lits.empty()) { |
344 | DEBUG_PRINTF("no long literals\n" ); |
345 | return 0; |
346 | } |
347 | |
348 | // The last char of each literal is trimmed as we're not interested in full |
349 | // matches, only partial matches. |
350 | for (auto &lit : lits) { |
351 | assert(!lit.s.empty()); |
352 | lit.s.pop_back(); |
353 | } |
354 | |
355 | // Sort by caseful/caseless and in lexicographical order. |
356 | stable_sort(begin(lits), end(lits), [](const ue2_case_string &a, |
357 | const ue2_case_string &b) { |
358 | if (a.nocase != b.nocase) { |
359 | return a.nocase < b.nocase; |
360 | } |
361 | return a.s < b.s; |
362 | }); |
363 | |
364 | // Find literals that are prefixes of other literals (including |
365 | // duplicates). Note that we iterate in reverse, since we want to retain |
366 | // only the longest string from a set of prefixes. |
367 | auto it = unique(lits.rbegin(), lits.rend(), [](const ue2_case_string &a, |
368 | const ue2_case_string &b) { |
369 | return a.nocase == b.nocase && a.s.size() >= b.s.size() && |
370 | equal(b.s.begin(), b.s.end(), a.s.begin()); |
371 | }); |
372 | |
373 | // Erase dupes found by unique(). |
374 | lits.erase(lits.begin(), it.base()); |
375 | |
376 | LongLitInfo info = analyzeLongLits(lits, max_len); |
377 | |
378 | vector<u32> litToOffsetVal; |
379 | const size_t = ROUNDUP_16(sizeof(RoseLongLitTable)); |
380 | vector<u8> lit_blob = buildLits(lits, headerSize, litToOffsetVal); |
381 | |
382 | // Build caseful bloom filter and hash table. |
383 | vector<u8> bloom_case; |
384 | vector<RoseLongLitHashEntry> tab_case; |
385 | if (info.caseful.num_literals) { |
386 | bloom_case = makeBloomFilter(lits, max_len, false); |
387 | tab_case = makeHashTable(lits, max_len, litToOffsetVal, |
388 | info.caseful.hashed_positions, false); |
389 | } |
390 | |
391 | // Build nocase bloom filter and hash table. |
392 | vector<u8> bloom_nocase; |
393 | vector<RoseLongLitHashEntry> tab_nocase; |
394 | if (info.nocase.num_literals) { |
395 | bloom_nocase = makeBloomFilter(lits, max_len, true); |
396 | tab_nocase = makeHashTable(lits, max_len, litToOffsetVal, |
397 | info.nocase.hashed_positions, true); |
398 | } |
399 | |
400 | size_t wholeLitTabSize = ROUNDUP_16(byte_length(lit_blob)); |
401 | size_t htOffsetCase = headerSize + wholeLitTabSize; |
402 | size_t htOffsetNocase = htOffsetCase + byte_length(tab_case); |
403 | size_t bloomOffsetCase = htOffsetNocase + byte_length(tab_nocase); |
404 | size_t bloomOffsetNocase = bloomOffsetCase + byte_length(bloom_case); |
405 | |
406 | size_t tabSize = ROUNDUP_16(bloomOffsetNocase + byte_length(bloom_nocase)); |
407 | |
408 | // need to add +2 to both of these to allow space for the actual largest |
409 | // value as well as handling the fact that we add one to the space when |
410 | // storing out a position to allow zero to mean "no stream state value" |
411 | u8 streamBitsCase = lg2(roundUpToPowerOfTwo(tab_case.size() + 2)); |
412 | u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(tab_nocase.size() + 2)); |
413 | u32 tot_state_bytes = ROUNDUP_N(streamBitsCase + streamBitsNocase, 8) / 8; |
414 | |
415 | auto table = make_zeroed_bytecode_ptr<char>(tabSize, 16); |
416 | assert(table); // otherwise would have thrown std::bad_alloc |
417 | |
418 | // Fill in the RoseLongLitTable header structure. |
419 | RoseLongLitTable * = (RoseLongLitTable *)(table.get()); |
420 | header->size = verify_u32(tabSize); |
421 | header->maxLen = verify_u8(max_len); // u8 so doesn't matter; won't go > 255 |
422 | header->caseful.hashOffset = verify_u32(htOffsetCase); |
423 | header->caseful.hashBits = lg2(tab_case.size()); |
424 | header->caseful.streamStateBits = streamBitsCase; |
425 | header->caseful.bloomOffset = verify_u32(bloomOffsetCase); |
426 | header->caseful.bloomBits = lg2(bloom_case.size() * 8); |
427 | header->nocase.hashOffset = verify_u32(htOffsetNocase); |
428 | header->nocase.hashBits = lg2(tab_nocase.size()); |
429 | header->nocase.streamStateBits = streamBitsNocase; |
430 | header->nocase.bloomOffset = verify_u32(bloomOffsetNocase); |
431 | header->nocase.bloomBits = lg2(bloom_nocase.size() * 8); |
432 | assert(tot_state_bytes < sizeof(u64a)); |
433 | header->streamStateBytes = verify_u8(tot_state_bytes); // u8 |
434 | |
435 | // Copy in the literal strings, hash tables and bloom filters, |
436 | copy_bytes(table.get() + headerSize, lit_blob); |
437 | copy_bytes(table.get() + htOffsetCase, tab_case); |
438 | copy_bytes(table.get() + bloomOffsetCase, bloom_case); |
439 | copy_bytes(table.get() + htOffsetNocase, tab_nocase); |
440 | copy_bytes(table.get() + bloomOffsetNocase, bloom_nocase); |
441 | |
442 | DEBUG_PRINTF("built streaming table, size=%zu\n" , tabSize); |
443 | DEBUG_PRINTF("requires %zu bytes of history\n" , max_len); |
444 | DEBUG_PRINTF("requires %u bytes of stream state\n" , tot_state_bytes); |
445 | |
446 | *historyRequired = max(*historyRequired, max_len); |
447 | *longLitStreamStateRequired = tot_state_bytes; |
448 | |
449 | return blob.add(table); |
450 | } |
451 | |
452 | } // namespace ue2 |
453 | |