1/*
2 * Copyright (c) 2016-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "rose_build_long_lit.h"
30
31#include "rose_build_engine_blob.h"
32#include "rose_build_impl.h"
33#include "stream_long_lit_hash.h"
34#include "util/bytecode_ptr.h"
35#include "util/bitutils.h"
36#include "util/verify_types.h"
37#include "util/compile_context.h"
38
39#include <algorithm>
40#include <numeric>
41
42using namespace std;
43
44namespace ue2 {
45
46/** \brief Minimum size for a non-empty hash table. Must be a power of two. */
47static constexpr size_t MIN_HASH_TABLE_SIZE = 128;
48
49/** \brief Maximum load factor (between zero and one) for a hash table. */
50static constexpr double MAX_HASH_TABLE_LOAD = 0.7;
51
52/** \brief Minimum size (in bits) for a bloom filter. Must be a power of two. */
53static constexpr u32 MIN_BLOOM_FILTER_SIZE = 256;
54
55/** \brief Maximum load factor (between zero and one) for a bloom filter. */
56static constexpr double MAX_BLOOM_FILTER_LOAD = 0.25;
57
58struct LongLitModeInfo {
59 u32 num_literals = 0; //!< Number of strings for this mode.
60 u32 hashed_positions = 0; //!< Number of hashable string positions.
61};
62
63struct LongLitInfo {
64 LongLitModeInfo caseful;
65 LongLitModeInfo nocase;
66};
67
68static
69u32 roundUpToPowerOfTwo(u32 x) {
70 assert(x != 0);
71 u32 bits = lg2(x - 1) + 1;
72 assert(bits < 32);
73 return 1U << bits;
74}
75
76static
77LongLitInfo analyzeLongLits(const vector<ue2_case_string> &lits,
78 size_t max_len) {
79 LongLitInfo info;
80
81 for (const auto &lit : lits) {
82 auto &lit_info = lit.nocase ? info.nocase : info.caseful;
83 assert(lit.s.size() > max_len);
84 lit_info.num_literals++;
85 lit_info.hashed_positions += lit.s.size() - max_len;
86 }
87
88 DEBUG_PRINTF("case: hashed %u positions\n", info.caseful.hashed_positions);
89 DEBUG_PRINTF("nocase: hashed %u positions\n", info.nocase.hashed_positions);
90
91 return info;
92}
93
94static
95void addToBloomFilter(vector<u8> &bloom, const u8 *substr, bool nocase) {
96 const u32 num_keys = verify_u32(bloom.size() * 8);
97 const u32 key_mask = (1U << lg2(num_keys)) -1;
98
99 const auto hash_functions = { bloomHash_1, bloomHash_2, bloomHash_3 };
100 for (const auto &hash_func : hash_functions) {
101 u32 hash = hash_func(substr, nocase);
102 u32 key = hash & key_mask;
103 DEBUG_PRINTF("set key %u (of %zu)\n", key, bloom.size() * 8);
104 bloom[key / 8] |= 1U << (key % 8);
105 }
106}
107
108static
109size_t bloomOccupancy(const vector<u8> &bloom) {
110 return accumulate(begin(bloom), end(bloom), 0,
111 [](const size_t &sum, const u8 &elem) {
112 return sum + popcount32(elem);
113 });
114}
115
116static
117double bloomLoad(const vector<u8> &bloom) {
118 return (double)bloomOccupancy(bloom) / (double)(bloom.size() * 8);
119}
120
121static
122vector<u8> buildBloomFilter(const vector<ue2_case_string> &lits, size_t max_len,
123 size_t num_entries, bool nocase) {
124 assert(num_entries % 8 == 0);
125 assert((num_entries & (num_entries - 1)) == 0); // Must be power of two.
126
127 vector<u8> bloom(num_entries / 8, 0);
128
129 if (!num_entries) {
130 return bloom;
131 }
132
133 for (const auto &lit : lits) {
134 if (nocase != lit.nocase) {
135 continue;
136 }
137 for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) {
138 const u8 *substr = (const u8 *)lit.s.c_str() + offset;
139 addToBloomFilter(bloom, substr, nocase);
140 }
141 }
142
143 DEBUG_PRINTF("%s bloom filter occupancy %zu of %zu entries\n",
144 nocase ? "nocase" : "caseful", bloomOccupancy(bloom),
145 num_entries);
146
147 return bloom;
148}
149
150
151static
152vector<u8> makeBloomFilter(const vector<ue2_case_string> &lits,
153 size_t max_len, bool nocase) {
154 vector<u8> bloom;
155
156 size_t num_entries = MIN_BLOOM_FILTER_SIZE;
157 for (;;) {
158 bloom = buildBloomFilter(lits, max_len, num_entries, nocase);
159 DEBUG_PRINTF("built %s bloom for %zu entries: load %f\n",
160 nocase ? "nocase" : "caseful", num_entries,
161 bloomLoad(bloom));
162 if (bloomLoad(bloom) < MAX_BLOOM_FILTER_LOAD) {
163 break;
164 }
165 num_entries *= 2;
166 }
167 return bloom;
168}
169
170static UNUSED
171size_t hashTableOccupancy(const vector<RoseLongLitHashEntry> &tab) {
172 return count_if(begin(tab), end(tab), [](const RoseLongLitHashEntry &ent) {
173 return ent.str_offset != 0;
174 });
175}
176
177static UNUSED
178double hashTableLoad(const vector<RoseLongLitHashEntry> &tab) {
179 return (double)hashTableOccupancy(tab) / (double)(tab.size());
180}
181
182using LitOffsetVector = small_vector<pair<u32, u32>, 1>;
183
184static
185vector<RoseLongLitHashEntry> buildHashTable(
186 size_t max_len, const vector<u32> &litToOffsetVal,
187 const map<u32, LitOffsetVector> &hashToLitOffPairs,
188 size_t numEntries) {
189 vector<RoseLongLitHashEntry> tab(numEntries, {0,0});
190
191 if (!numEntries) {
192 return tab;
193 }
194
195 for (const auto &m : hashToLitOffPairs) {
196 u32 hash = m.first;
197 const LitOffsetVector &d = m.second;
198
199 u32 bucket = hash % numEntries;
200
201 // Placement via linear probing.
202 for (const auto &lit_offset : d) {
203 while (tab[bucket].str_offset != 0) {
204 bucket++;
205 if (bucket == numEntries) {
206 bucket = 0;
207 }
208 }
209
210 u32 lit_id = lit_offset.first;
211 u32 offset = lit_offset.second;
212
213 DEBUG_PRINTF("hash 0x%08x lit_id %u offset %u bucket %u\n", hash,
214 lit_id, offset, bucket);
215
216 auto &entry = tab[bucket];
217 entry.str_offset = verify_u32(litToOffsetVal.at(lit_id));
218 assert(entry.str_offset != 0);
219 entry.str_len = offset + max_len;
220 }
221 }
222
223 DEBUG_PRINTF("hash table occupancy %zu of %zu entries\n",
224 hashTableOccupancy(tab), numEntries);
225
226 return tab;
227}
228
229static
230map<u32, LitOffsetVector> computeLitHashes(const vector<ue2_case_string> &lits,
231 size_t max_len, bool nocase) {
232 map<u32, LitOffsetVector> hashToLitOffPairs;
233
234 for (u32 lit_id = 0; lit_id < lits.size(); lit_id++) {
235 const ue2_case_string &lit = lits[lit_id];
236 if (nocase != lit.nocase) {
237 continue;
238 }
239 for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) {
240 const u8 *substr = (const u8 *)lit.s.c_str() + offset;
241 u32 hash = hashLongLiteral(substr, max_len, lit.nocase);
242 hashToLitOffPairs[hash].emplace_back(lit_id, offset);
243 }
244 }
245
246 for (auto &m : hashToLitOffPairs) {
247 LitOffsetVector &d = m.second;
248 if (d.size() == 1) {
249 continue;
250 }
251
252 // Sort by (offset, string) so that we'll be able to remove identical
253 // string prefixes.
254 stable_sort(begin(d), end(d),
255 [&](const pair<u32, u32> &a, const pair<u32, u32> &b) {
256 const auto &str_a = lits[a.first].s;
257 const auto &str_b = lits[b.first].s;
258 return tie(a.second, str_a) < tie(b.second, str_b);
259 });
260
261 // Remove entries that point to the same literal prefix.
262 d.erase(unique(begin(d), end(d),
263 [&](const pair<u32, u32> &a, const pair<u32, u32> &b) {
264 if (a.second != b.second) {
265 return false;
266 }
267 const auto &str_a = lits[a.first].s;
268 const auto &str_b = lits[b.first].s;
269 const size_t len = max_len + a.second;
270 return equal(begin(str_a), begin(str_a) + len,
271 begin(str_b));
272 }),
273 end(d));
274
275 // Sort d by distance of the residual string (len minus our depth into
276 // the string). We need to put the 'furthest back' string first.
277 stable_sort(begin(d), end(d),
278 [](const pair<u32, u32> &a, const pair<u32, u32> &b) {
279 if (a.second != b.second) {
280 return a.second > b.second; /* longest is first */
281 }
282 return a.first < b.first;
283 });
284 }
285
286 return hashToLitOffPairs;
287}
288
289static
290vector<RoseLongLitHashEntry> makeHashTable(const vector<ue2_case_string> &lits,
291 size_t max_len,
292 const vector<u32> &litToOffsetVal,
293 u32 numPositions, bool nocase) {
294 // Compute lit substring hashes.
295 const auto hashToLitOffPairs = computeLitHashes(lits, max_len, nocase);
296
297 // Compute the size of the hash table: we need enough entries to satisfy
298 // our max load constraint, and it must be a power of two.
299 size_t num_entries = (double)numPositions / MAX_HASH_TABLE_LOAD + 1;
300 num_entries = roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, num_entries));
301
302 auto tab = buildHashTable(max_len, litToOffsetVal, hashToLitOffPairs,
303 num_entries);
304 DEBUG_PRINTF("built %s hash table for %zu entries: load %f\n",
305 nocase ? "nocase" : "caseful", num_entries,
306 hashTableLoad(tab));
307 assert(hashTableLoad(tab) < MAX_HASH_TABLE_LOAD);
308
309 return tab;
310}
311
312static
313vector<u8> buildLits(const vector<ue2_case_string> &lits, u32 baseOffset,
314 vector<u32> &litToOffsetVal) {
315 vector<u8> blob;
316 litToOffsetVal.resize(lits.size(), 0);
317
318 u32 lit_id = 0;
319 for (const auto &lit : lits) {
320 u32 offset = baseOffset + verify_u32(blob.size());
321 blob.insert(blob.end(), begin(lit.s), end(lit.s));
322 litToOffsetVal[lit_id] = offset;
323 lit_id++;
324 }
325
326 DEBUG_PRINTF("built %zu bytes of strings\n", blob.size());
327 return blob;
328}
329
330u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob,
331 vector<ue2_case_string> &lits,
332 size_t longLitLengthThreshold,
333 size_t *historyRequired,
334 size_t *longLitStreamStateRequired) {
335 // Work in terms of history requirement (i.e. literal len - 1).
336 const size_t max_len = longLitLengthThreshold - 1;
337
338 // We should only be building the long literal hash table in streaming mode.
339 if (!build.cc.streaming) {
340 return 0;
341 }
342
343 if (lits.empty()) {
344 DEBUG_PRINTF("no long literals\n");
345 return 0;
346 }
347
348 // The last char of each literal is trimmed as we're not interested in full
349 // matches, only partial matches.
350 for (auto &lit : lits) {
351 assert(!lit.s.empty());
352 lit.s.pop_back();
353 }
354
355 // Sort by caseful/caseless and in lexicographical order.
356 stable_sort(begin(lits), end(lits), [](const ue2_case_string &a,
357 const ue2_case_string &b) {
358 if (a.nocase != b.nocase) {
359 return a.nocase < b.nocase;
360 }
361 return a.s < b.s;
362 });
363
364 // Find literals that are prefixes of other literals (including
365 // duplicates). Note that we iterate in reverse, since we want to retain
366 // only the longest string from a set of prefixes.
367 auto it = unique(lits.rbegin(), lits.rend(), [](const ue2_case_string &a,
368 const ue2_case_string &b) {
369 return a.nocase == b.nocase && a.s.size() >= b.s.size() &&
370 equal(b.s.begin(), b.s.end(), a.s.begin());
371 });
372
373 // Erase dupes found by unique().
374 lits.erase(lits.begin(), it.base());
375
376 LongLitInfo info = analyzeLongLits(lits, max_len);
377
378 vector<u32> litToOffsetVal;
379 const size_t headerSize = ROUNDUP_16(sizeof(RoseLongLitTable));
380 vector<u8> lit_blob = buildLits(lits, headerSize, litToOffsetVal);
381
382 // Build caseful bloom filter and hash table.
383 vector<u8> bloom_case;
384 vector<RoseLongLitHashEntry> tab_case;
385 if (info.caseful.num_literals) {
386 bloom_case = makeBloomFilter(lits, max_len, false);
387 tab_case = makeHashTable(lits, max_len, litToOffsetVal,
388 info.caseful.hashed_positions, false);
389 }
390
391 // Build nocase bloom filter and hash table.
392 vector<u8> bloom_nocase;
393 vector<RoseLongLitHashEntry> tab_nocase;
394 if (info.nocase.num_literals) {
395 bloom_nocase = makeBloomFilter(lits, max_len, true);
396 tab_nocase = makeHashTable(lits, max_len, litToOffsetVal,
397 info.nocase.hashed_positions, true);
398 }
399
400 size_t wholeLitTabSize = ROUNDUP_16(byte_length(lit_blob));
401 size_t htOffsetCase = headerSize + wholeLitTabSize;
402 size_t htOffsetNocase = htOffsetCase + byte_length(tab_case);
403 size_t bloomOffsetCase = htOffsetNocase + byte_length(tab_nocase);
404 size_t bloomOffsetNocase = bloomOffsetCase + byte_length(bloom_case);
405
406 size_t tabSize = ROUNDUP_16(bloomOffsetNocase + byte_length(bloom_nocase));
407
408 // need to add +2 to both of these to allow space for the actual largest
409 // value as well as handling the fact that we add one to the space when
410 // storing out a position to allow zero to mean "no stream state value"
411 u8 streamBitsCase = lg2(roundUpToPowerOfTwo(tab_case.size() + 2));
412 u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(tab_nocase.size() + 2));
413 u32 tot_state_bytes = ROUNDUP_N(streamBitsCase + streamBitsNocase, 8) / 8;
414
415 auto table = make_zeroed_bytecode_ptr<char>(tabSize, 16);
416 assert(table); // otherwise would have thrown std::bad_alloc
417
418 // Fill in the RoseLongLitTable header structure.
419 RoseLongLitTable *header = (RoseLongLitTable *)(table.get());
420 header->size = verify_u32(tabSize);
421 header->maxLen = verify_u8(max_len); // u8 so doesn't matter; won't go > 255
422 header->caseful.hashOffset = verify_u32(htOffsetCase);
423 header->caseful.hashBits = lg2(tab_case.size());
424 header->caseful.streamStateBits = streamBitsCase;
425 header->caseful.bloomOffset = verify_u32(bloomOffsetCase);
426 header->caseful.bloomBits = lg2(bloom_case.size() * 8);
427 header->nocase.hashOffset = verify_u32(htOffsetNocase);
428 header->nocase.hashBits = lg2(tab_nocase.size());
429 header->nocase.streamStateBits = streamBitsNocase;
430 header->nocase.bloomOffset = verify_u32(bloomOffsetNocase);
431 header->nocase.bloomBits = lg2(bloom_nocase.size() * 8);
432 assert(tot_state_bytes < sizeof(u64a));
433 header->streamStateBytes = verify_u8(tot_state_bytes); // u8
434
435 // Copy in the literal strings, hash tables and bloom filters,
436 copy_bytes(table.get() + headerSize, lit_blob);
437 copy_bytes(table.get() + htOffsetCase, tab_case);
438 copy_bytes(table.get() + bloomOffsetCase, bloom_case);
439 copy_bytes(table.get() + htOffsetNocase, tab_nocase);
440 copy_bytes(table.get() + bloomOffsetNocase, bloom_nocase);
441
442 DEBUG_PRINTF("built streaming table, size=%zu\n", tabSize);
443 DEBUG_PRINTF("requires %zu bytes of history\n", max_len);
444 DEBUG_PRINTF("requires %u bytes of stream state\n", tot_state_bytes);
445
446 *historyRequired = max(*historyRequired, max_len);
447 *longLitStreamStateRequired = tot_state_bytes;
448
449 return blob.add(table);
450}
451
452} // namespace ue2
453