| 1 | /* |
| 2 | * Copyright (c) 2016-2017, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #include "rose_build_long_lit.h" |
| 30 | |
| 31 | #include "rose_build_engine_blob.h" |
| 32 | #include "rose_build_impl.h" |
| 33 | #include "stream_long_lit_hash.h" |
| 34 | #include "util/bytecode_ptr.h" |
| 35 | #include "util/bitutils.h" |
| 36 | #include "util/verify_types.h" |
| 37 | #include "util/compile_context.h" |
| 38 | |
| 39 | #include <algorithm> |
| 40 | #include <numeric> |
| 41 | |
| 42 | using namespace std; |
| 43 | |
| 44 | namespace ue2 { |
| 45 | |
| 46 | /** \brief Minimum size for a non-empty hash table. Must be a power of two. */ |
| 47 | static constexpr size_t MIN_HASH_TABLE_SIZE = 128; |
| 48 | |
| 49 | /** \brief Maximum load factor (between zero and one) for a hash table. */ |
| 50 | static constexpr double MAX_HASH_TABLE_LOAD = 0.7; |
| 51 | |
| 52 | /** \brief Minimum size (in bits) for a bloom filter. Must be a power of two. */ |
| 53 | static constexpr u32 MIN_BLOOM_FILTER_SIZE = 256; |
| 54 | |
| 55 | /** \brief Maximum load factor (between zero and one) for a bloom filter. */ |
| 56 | static constexpr double MAX_BLOOM_FILTER_LOAD = 0.25; |
| 57 | |
| 58 | struct LongLitModeInfo { |
| 59 | u32 num_literals = 0; //!< Number of strings for this mode. |
| 60 | u32 hashed_positions = 0; //!< Number of hashable string positions. |
| 61 | }; |
| 62 | |
| 63 | struct LongLitInfo { |
| 64 | LongLitModeInfo caseful; |
| 65 | LongLitModeInfo nocase; |
| 66 | }; |
| 67 | |
| 68 | static |
| 69 | u32 roundUpToPowerOfTwo(u32 x) { |
| 70 | assert(x != 0); |
| 71 | u32 bits = lg2(x - 1) + 1; |
| 72 | assert(bits < 32); |
| 73 | return 1U << bits; |
| 74 | } |
| 75 | |
| 76 | static |
| 77 | LongLitInfo analyzeLongLits(const vector<ue2_case_string> &lits, |
| 78 | size_t max_len) { |
| 79 | LongLitInfo info; |
| 80 | |
| 81 | for (const auto &lit : lits) { |
| 82 | auto &lit_info = lit.nocase ? info.nocase : info.caseful; |
| 83 | assert(lit.s.size() > max_len); |
| 84 | lit_info.num_literals++; |
| 85 | lit_info.hashed_positions += lit.s.size() - max_len; |
| 86 | } |
| 87 | |
| 88 | DEBUG_PRINTF("case: hashed %u positions\n" , info.caseful.hashed_positions); |
| 89 | DEBUG_PRINTF("nocase: hashed %u positions\n" , info.nocase.hashed_positions); |
| 90 | |
| 91 | return info; |
| 92 | } |
| 93 | |
| 94 | static |
| 95 | void addToBloomFilter(vector<u8> &bloom, const u8 *substr, bool nocase) { |
| 96 | const u32 num_keys = verify_u32(bloom.size() * 8); |
| 97 | const u32 key_mask = (1U << lg2(num_keys)) -1; |
| 98 | |
| 99 | const auto hash_functions = { bloomHash_1, bloomHash_2, bloomHash_3 }; |
| 100 | for (const auto &hash_func : hash_functions) { |
| 101 | u32 hash = hash_func(substr, nocase); |
| 102 | u32 key = hash & key_mask; |
| 103 | DEBUG_PRINTF("set key %u (of %zu)\n" , key, bloom.size() * 8); |
| 104 | bloom[key / 8] |= 1U << (key % 8); |
| 105 | } |
| 106 | } |
| 107 | |
| 108 | static |
| 109 | size_t bloomOccupancy(const vector<u8> &bloom) { |
| 110 | return accumulate(begin(bloom), end(bloom), 0, |
| 111 | [](const size_t &sum, const u8 &elem) { |
| 112 | return sum + popcount32(elem); |
| 113 | }); |
| 114 | } |
| 115 | |
| 116 | static |
| 117 | double bloomLoad(const vector<u8> &bloom) { |
| 118 | return (double)bloomOccupancy(bloom) / (double)(bloom.size() * 8); |
| 119 | } |
| 120 | |
| 121 | static |
| 122 | vector<u8> buildBloomFilter(const vector<ue2_case_string> &lits, size_t max_len, |
| 123 | size_t num_entries, bool nocase) { |
| 124 | assert(num_entries % 8 == 0); |
| 125 | assert((num_entries & (num_entries - 1)) == 0); // Must be power of two. |
| 126 | |
| 127 | vector<u8> bloom(num_entries / 8, 0); |
| 128 | |
| 129 | if (!num_entries) { |
| 130 | return bloom; |
| 131 | } |
| 132 | |
| 133 | for (const auto &lit : lits) { |
| 134 | if (nocase != lit.nocase) { |
| 135 | continue; |
| 136 | } |
| 137 | for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) { |
| 138 | const u8 *substr = (const u8 *)lit.s.c_str() + offset; |
| 139 | addToBloomFilter(bloom, substr, nocase); |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | DEBUG_PRINTF("%s bloom filter occupancy %zu of %zu entries\n" , |
| 144 | nocase ? "nocase" : "caseful" , bloomOccupancy(bloom), |
| 145 | num_entries); |
| 146 | |
| 147 | return bloom; |
| 148 | } |
| 149 | |
| 150 | |
| 151 | static |
| 152 | vector<u8> makeBloomFilter(const vector<ue2_case_string> &lits, |
| 153 | size_t max_len, bool nocase) { |
| 154 | vector<u8> bloom; |
| 155 | |
| 156 | size_t num_entries = MIN_BLOOM_FILTER_SIZE; |
| 157 | for (;;) { |
| 158 | bloom = buildBloomFilter(lits, max_len, num_entries, nocase); |
| 159 | DEBUG_PRINTF("built %s bloom for %zu entries: load %f\n" , |
| 160 | nocase ? "nocase" : "caseful" , num_entries, |
| 161 | bloomLoad(bloom)); |
| 162 | if (bloomLoad(bloom) < MAX_BLOOM_FILTER_LOAD) { |
| 163 | break; |
| 164 | } |
| 165 | num_entries *= 2; |
| 166 | } |
| 167 | return bloom; |
| 168 | } |
| 169 | |
| 170 | static UNUSED |
| 171 | size_t hashTableOccupancy(const vector<RoseLongLitHashEntry> &tab) { |
| 172 | return count_if(begin(tab), end(tab), [](const RoseLongLitHashEntry &ent) { |
| 173 | return ent.str_offset != 0; |
| 174 | }); |
| 175 | } |
| 176 | |
| 177 | static UNUSED |
| 178 | double hashTableLoad(const vector<RoseLongLitHashEntry> &tab) { |
| 179 | return (double)hashTableOccupancy(tab) / (double)(tab.size()); |
| 180 | } |
| 181 | |
| 182 | using LitOffsetVector = small_vector<pair<u32, u32>, 1>; |
| 183 | |
| 184 | static |
| 185 | vector<RoseLongLitHashEntry> buildHashTable( |
| 186 | size_t max_len, const vector<u32> &litToOffsetVal, |
| 187 | const map<u32, LitOffsetVector> &hashToLitOffPairs, |
| 188 | size_t numEntries) { |
| 189 | vector<RoseLongLitHashEntry> tab(numEntries, {0,0}); |
| 190 | |
| 191 | if (!numEntries) { |
| 192 | return tab; |
| 193 | } |
| 194 | |
| 195 | for (const auto &m : hashToLitOffPairs) { |
| 196 | u32 hash = m.first; |
| 197 | const LitOffsetVector &d = m.second; |
| 198 | |
| 199 | u32 bucket = hash % numEntries; |
| 200 | |
| 201 | // Placement via linear probing. |
| 202 | for (const auto &lit_offset : d) { |
| 203 | while (tab[bucket].str_offset != 0) { |
| 204 | bucket++; |
| 205 | if (bucket == numEntries) { |
| 206 | bucket = 0; |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | u32 lit_id = lit_offset.first; |
| 211 | u32 offset = lit_offset.second; |
| 212 | |
| 213 | DEBUG_PRINTF("hash 0x%08x lit_id %u offset %u bucket %u\n" , hash, |
| 214 | lit_id, offset, bucket); |
| 215 | |
| 216 | auto &entry = tab[bucket]; |
| 217 | entry.str_offset = verify_u32(litToOffsetVal.at(lit_id)); |
| 218 | assert(entry.str_offset != 0); |
| 219 | entry.str_len = offset + max_len; |
| 220 | } |
| 221 | } |
| 222 | |
| 223 | DEBUG_PRINTF("hash table occupancy %zu of %zu entries\n" , |
| 224 | hashTableOccupancy(tab), numEntries); |
| 225 | |
| 226 | return tab; |
| 227 | } |
| 228 | |
| 229 | static |
| 230 | map<u32, LitOffsetVector> computeLitHashes(const vector<ue2_case_string> &lits, |
| 231 | size_t max_len, bool nocase) { |
| 232 | map<u32, LitOffsetVector> hashToLitOffPairs; |
| 233 | |
| 234 | for (u32 lit_id = 0; lit_id < lits.size(); lit_id++) { |
| 235 | const ue2_case_string &lit = lits[lit_id]; |
| 236 | if (nocase != lit.nocase) { |
| 237 | continue; |
| 238 | } |
| 239 | for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) { |
| 240 | const u8 *substr = (const u8 *)lit.s.c_str() + offset; |
| 241 | u32 hash = hashLongLiteral(substr, max_len, lit.nocase); |
| 242 | hashToLitOffPairs[hash].emplace_back(lit_id, offset); |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | for (auto &m : hashToLitOffPairs) { |
| 247 | LitOffsetVector &d = m.second; |
| 248 | if (d.size() == 1) { |
| 249 | continue; |
| 250 | } |
| 251 | |
| 252 | // Sort by (offset, string) so that we'll be able to remove identical |
| 253 | // string prefixes. |
| 254 | stable_sort(begin(d), end(d), |
| 255 | [&](const pair<u32, u32> &a, const pair<u32, u32> &b) { |
| 256 | const auto &str_a = lits[a.first].s; |
| 257 | const auto &str_b = lits[b.first].s; |
| 258 | return tie(a.second, str_a) < tie(b.second, str_b); |
| 259 | }); |
| 260 | |
| 261 | // Remove entries that point to the same literal prefix. |
| 262 | d.erase(unique(begin(d), end(d), |
| 263 | [&](const pair<u32, u32> &a, const pair<u32, u32> &b) { |
| 264 | if (a.second != b.second) { |
| 265 | return false; |
| 266 | } |
| 267 | const auto &str_a = lits[a.first].s; |
| 268 | const auto &str_b = lits[b.first].s; |
| 269 | const size_t len = max_len + a.second; |
| 270 | return equal(begin(str_a), begin(str_a) + len, |
| 271 | begin(str_b)); |
| 272 | }), |
| 273 | end(d)); |
| 274 | |
| 275 | // Sort d by distance of the residual string (len minus our depth into |
| 276 | // the string). We need to put the 'furthest back' string first. |
| 277 | stable_sort(begin(d), end(d), |
| 278 | [](const pair<u32, u32> &a, const pair<u32, u32> &b) { |
| 279 | if (a.second != b.second) { |
| 280 | return a.second > b.second; /* longest is first */ |
| 281 | } |
| 282 | return a.first < b.first; |
| 283 | }); |
| 284 | } |
| 285 | |
| 286 | return hashToLitOffPairs; |
| 287 | } |
| 288 | |
| 289 | static |
| 290 | vector<RoseLongLitHashEntry> makeHashTable(const vector<ue2_case_string> &lits, |
| 291 | size_t max_len, |
| 292 | const vector<u32> &litToOffsetVal, |
| 293 | u32 numPositions, bool nocase) { |
| 294 | // Compute lit substring hashes. |
| 295 | const auto hashToLitOffPairs = computeLitHashes(lits, max_len, nocase); |
| 296 | |
| 297 | // Compute the size of the hash table: we need enough entries to satisfy |
| 298 | // our max load constraint, and it must be a power of two. |
| 299 | size_t num_entries = (double)numPositions / MAX_HASH_TABLE_LOAD + 1; |
| 300 | num_entries = roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, num_entries)); |
| 301 | |
| 302 | auto tab = buildHashTable(max_len, litToOffsetVal, hashToLitOffPairs, |
| 303 | num_entries); |
| 304 | DEBUG_PRINTF("built %s hash table for %zu entries: load %f\n" , |
| 305 | nocase ? "nocase" : "caseful" , num_entries, |
| 306 | hashTableLoad(tab)); |
| 307 | assert(hashTableLoad(tab) < MAX_HASH_TABLE_LOAD); |
| 308 | |
| 309 | return tab; |
| 310 | } |
| 311 | |
| 312 | static |
| 313 | vector<u8> buildLits(const vector<ue2_case_string> &lits, u32 baseOffset, |
| 314 | vector<u32> &litToOffsetVal) { |
| 315 | vector<u8> blob; |
| 316 | litToOffsetVal.resize(lits.size(), 0); |
| 317 | |
| 318 | u32 lit_id = 0; |
| 319 | for (const auto &lit : lits) { |
| 320 | u32 offset = baseOffset + verify_u32(blob.size()); |
| 321 | blob.insert(blob.end(), begin(lit.s), end(lit.s)); |
| 322 | litToOffsetVal[lit_id] = offset; |
| 323 | lit_id++; |
| 324 | } |
| 325 | |
| 326 | DEBUG_PRINTF("built %zu bytes of strings\n" , blob.size()); |
| 327 | return blob; |
| 328 | } |
| 329 | |
| 330 | u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, |
| 331 | vector<ue2_case_string> &lits, |
| 332 | size_t longLitLengthThreshold, |
| 333 | size_t *historyRequired, |
| 334 | size_t *longLitStreamStateRequired) { |
| 335 | // Work in terms of history requirement (i.e. literal len - 1). |
| 336 | const size_t max_len = longLitLengthThreshold - 1; |
| 337 | |
| 338 | // We should only be building the long literal hash table in streaming mode. |
| 339 | if (!build.cc.streaming) { |
| 340 | return 0; |
| 341 | } |
| 342 | |
| 343 | if (lits.empty()) { |
| 344 | DEBUG_PRINTF("no long literals\n" ); |
| 345 | return 0; |
| 346 | } |
| 347 | |
| 348 | // The last char of each literal is trimmed as we're not interested in full |
| 349 | // matches, only partial matches. |
| 350 | for (auto &lit : lits) { |
| 351 | assert(!lit.s.empty()); |
| 352 | lit.s.pop_back(); |
| 353 | } |
| 354 | |
| 355 | // Sort by caseful/caseless and in lexicographical order. |
| 356 | stable_sort(begin(lits), end(lits), [](const ue2_case_string &a, |
| 357 | const ue2_case_string &b) { |
| 358 | if (a.nocase != b.nocase) { |
| 359 | return a.nocase < b.nocase; |
| 360 | } |
| 361 | return a.s < b.s; |
| 362 | }); |
| 363 | |
| 364 | // Find literals that are prefixes of other literals (including |
| 365 | // duplicates). Note that we iterate in reverse, since we want to retain |
| 366 | // only the longest string from a set of prefixes. |
| 367 | auto it = unique(lits.rbegin(), lits.rend(), [](const ue2_case_string &a, |
| 368 | const ue2_case_string &b) { |
| 369 | return a.nocase == b.nocase && a.s.size() >= b.s.size() && |
| 370 | equal(b.s.begin(), b.s.end(), a.s.begin()); |
| 371 | }); |
| 372 | |
| 373 | // Erase dupes found by unique(). |
| 374 | lits.erase(lits.begin(), it.base()); |
| 375 | |
| 376 | LongLitInfo info = analyzeLongLits(lits, max_len); |
| 377 | |
| 378 | vector<u32> litToOffsetVal; |
| 379 | const size_t = ROUNDUP_16(sizeof(RoseLongLitTable)); |
| 380 | vector<u8> lit_blob = buildLits(lits, headerSize, litToOffsetVal); |
| 381 | |
| 382 | // Build caseful bloom filter and hash table. |
| 383 | vector<u8> bloom_case; |
| 384 | vector<RoseLongLitHashEntry> tab_case; |
| 385 | if (info.caseful.num_literals) { |
| 386 | bloom_case = makeBloomFilter(lits, max_len, false); |
| 387 | tab_case = makeHashTable(lits, max_len, litToOffsetVal, |
| 388 | info.caseful.hashed_positions, false); |
| 389 | } |
| 390 | |
| 391 | // Build nocase bloom filter and hash table. |
| 392 | vector<u8> bloom_nocase; |
| 393 | vector<RoseLongLitHashEntry> tab_nocase; |
| 394 | if (info.nocase.num_literals) { |
| 395 | bloom_nocase = makeBloomFilter(lits, max_len, true); |
| 396 | tab_nocase = makeHashTable(lits, max_len, litToOffsetVal, |
| 397 | info.nocase.hashed_positions, true); |
| 398 | } |
| 399 | |
| 400 | size_t wholeLitTabSize = ROUNDUP_16(byte_length(lit_blob)); |
| 401 | size_t htOffsetCase = headerSize + wholeLitTabSize; |
| 402 | size_t htOffsetNocase = htOffsetCase + byte_length(tab_case); |
| 403 | size_t bloomOffsetCase = htOffsetNocase + byte_length(tab_nocase); |
| 404 | size_t bloomOffsetNocase = bloomOffsetCase + byte_length(bloom_case); |
| 405 | |
| 406 | size_t tabSize = ROUNDUP_16(bloomOffsetNocase + byte_length(bloom_nocase)); |
| 407 | |
| 408 | // need to add +2 to both of these to allow space for the actual largest |
| 409 | // value as well as handling the fact that we add one to the space when |
| 410 | // storing out a position to allow zero to mean "no stream state value" |
| 411 | u8 streamBitsCase = lg2(roundUpToPowerOfTwo(tab_case.size() + 2)); |
| 412 | u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(tab_nocase.size() + 2)); |
| 413 | u32 tot_state_bytes = ROUNDUP_N(streamBitsCase + streamBitsNocase, 8) / 8; |
| 414 | |
| 415 | auto table = make_zeroed_bytecode_ptr<char>(tabSize, 16); |
| 416 | assert(table); // otherwise would have thrown std::bad_alloc |
| 417 | |
| 418 | // Fill in the RoseLongLitTable header structure. |
| 419 | RoseLongLitTable * = (RoseLongLitTable *)(table.get()); |
| 420 | header->size = verify_u32(tabSize); |
| 421 | header->maxLen = verify_u8(max_len); // u8 so doesn't matter; won't go > 255 |
| 422 | header->caseful.hashOffset = verify_u32(htOffsetCase); |
| 423 | header->caseful.hashBits = lg2(tab_case.size()); |
| 424 | header->caseful.streamStateBits = streamBitsCase; |
| 425 | header->caseful.bloomOffset = verify_u32(bloomOffsetCase); |
| 426 | header->caseful.bloomBits = lg2(bloom_case.size() * 8); |
| 427 | header->nocase.hashOffset = verify_u32(htOffsetNocase); |
| 428 | header->nocase.hashBits = lg2(tab_nocase.size()); |
| 429 | header->nocase.streamStateBits = streamBitsNocase; |
| 430 | header->nocase.bloomOffset = verify_u32(bloomOffsetNocase); |
| 431 | header->nocase.bloomBits = lg2(bloom_nocase.size() * 8); |
| 432 | assert(tot_state_bytes < sizeof(u64a)); |
| 433 | header->streamStateBytes = verify_u8(tot_state_bytes); // u8 |
| 434 | |
| 435 | // Copy in the literal strings, hash tables and bloom filters, |
| 436 | copy_bytes(table.get() + headerSize, lit_blob); |
| 437 | copy_bytes(table.get() + htOffsetCase, tab_case); |
| 438 | copy_bytes(table.get() + bloomOffsetCase, bloom_case); |
| 439 | copy_bytes(table.get() + htOffsetNocase, tab_nocase); |
| 440 | copy_bytes(table.get() + bloomOffsetNocase, bloom_nocase); |
| 441 | |
| 442 | DEBUG_PRINTF("built streaming table, size=%zu\n" , tabSize); |
| 443 | DEBUG_PRINTF("requires %zu bytes of history\n" , max_len); |
| 444 | DEBUG_PRINTF("requires %u bytes of stream state\n" , tot_state_bytes); |
| 445 | |
| 446 | *historyRequired = max(*historyRequired, max_len); |
| 447 | *longLitStreamStateRequired = tot_state_bytes; |
| 448 | |
| 449 | return blob.add(table); |
| 450 | } |
| 451 | |
| 452 | } // namespace ue2 |
| 453 | |