| 1 | /* NOLINT(build/header_guard) */ |
| 2 | /* Copyright 2018 Google Inc. All Rights Reserved. |
| 3 | |
| 4 | Distributed under MIT license. |
| 5 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
| 6 | */ |
| 7 | |
| 8 | /* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */ |
| 9 | /* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */ |
| 10 | /* JUMP = skip bytes for speedup */ |
| 11 | |
| 12 | /* Rolling hash for long distance long string matches. Stores one position |
| 13 | per bucket, bucket key is computed over a long region. */ |
| 14 | |
| 15 | #define HashRolling HASHER() |
| 16 | |
| 17 | static const uint32_t FN(kRollingHashMul32) = 69069; |
| 18 | static const uint32_t FN(kInvalidPos) = 0xffffffff; |
| 19 | |
| 20 | /* This hasher uses a longer forward length, but returning a higher value here |
| 21 | will hurt compression by the main hasher when combined with a composite |
| 22 | hasher. The hasher tests for forward itself instead. */ |
| 23 | static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; } |
| 24 | static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; } |
| 25 | |
| 26 | /* Computes a code from a single byte. A lookup table of 256 values could be |
| 27 | used, but simply adding 1 works about as good. */ |
| 28 | static uint32_t FN(HashByte)(uint8_t byte) { |
| 29 | return (uint32_t)byte + 1u; |
| 30 | } |
| 31 | |
| 32 | static uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add, |
| 33 | uint32_t factor) { |
| 34 | return (uint32_t)(factor * state + FN(HashByte)(add)); |
| 35 | } |
| 36 | |
| 37 | static uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add, |
| 38 | uint8_t rem, uint32_t factor, |
| 39 | uint32_t factor_remove) { |
| 40 | return (uint32_t)(factor * state + |
| 41 | FN(HashByte)(add) - factor_remove * FN(HashByte)(rem)); |
| 42 | } |
| 43 | |
| 44 | typedef struct HashRolling { |
| 45 | uint32_t state; |
| 46 | uint32_t* table; |
| 47 | size_t next_ix; |
| 48 | |
| 49 | uint32_t chunk_len; |
| 50 | uint32_t factor; |
| 51 | uint32_t factor_remove; |
| 52 | } HashRolling; |
| 53 | |
| 54 | static BROTLI_INLINE HashRolling* FN(Self)(HasherHandle handle) { |
| 55 | return (HashRolling*)&(GetHasherCommon(handle)[1]); |
| 56 | } |
| 57 | |
| 58 | static void FN(Initialize)( |
| 59 | HasherHandle handle, const BrotliEncoderParams* params) { |
| 60 | HashRolling* self = FN(Self)(handle); |
| 61 | size_t i; |
| 62 | self->state = 0; |
| 63 | self->next_ix = 0; |
| 64 | |
| 65 | self->factor = FN(kRollingHashMul32); |
| 66 | |
| 67 | /* Compute the factor of the oldest byte to remove: factor**steps modulo |
| 68 | 0xffffffff (the multiplications rely on 32-bit overflow) */ |
| 69 | self->factor_remove = 1; |
| 70 | for (i = 0; i < CHUNKLEN; i += JUMP) { |
| 71 | self->factor_remove *= self->factor; |
| 72 | } |
| 73 | |
| 74 | self->table = (uint32_t*)((HasherHandle)self + sizeof(HashRolling)); |
| 75 | for (i = 0; i < NUMBUCKETS; i++) { |
| 76 | self->table[i] = FN(kInvalidPos); |
| 77 | } |
| 78 | |
| 79 | BROTLI_UNUSED(params); |
| 80 | } |
| 81 | |
| 82 | static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot, |
| 83 | size_t input_size, const uint8_t* data) { |
| 84 | HashRolling* self = FN(Self)(handle); |
| 85 | size_t i; |
| 86 | /* Too small size, cannot use this hasher. */ |
| 87 | if (input_size < CHUNKLEN) return; |
| 88 | self->state = 0; |
| 89 | for (i = 0; i < CHUNKLEN; i += JUMP) { |
| 90 | self->state = FN(HashRollingFunctionInitial)( |
| 91 | self->state, data[i], self->factor); |
| 92 | } |
| 93 | BROTLI_UNUSED(one_shot); |
| 94 | } |
| 95 | |
| 96 | static BROTLI_INLINE size_t FN(HashMemAllocInBytes)( |
| 97 | const BrotliEncoderParams* params, BROTLI_BOOL one_shot, |
| 98 | size_t input_size) { |
| 99 | return sizeof(HashRolling) + NUMBUCKETS * sizeof(uint32_t); |
| 100 | BROTLI_UNUSED(params); |
| 101 | BROTLI_UNUSED(one_shot); |
| 102 | BROTLI_UNUSED(input_size); |
| 103 | } |
| 104 | |
| 105 | static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle, |
| 106 | const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) { |
| 107 | BROTLI_UNUSED(handle); |
| 108 | BROTLI_UNUSED(data); |
| 109 | BROTLI_UNUSED(mask); |
| 110 | BROTLI_UNUSED(ix); |
| 111 | } |
| 112 | |
| 113 | static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, |
| 114 | const uint8_t* data, const size_t mask, const size_t ix_start, |
| 115 | const size_t ix_end) { |
| 116 | BROTLI_UNUSED(handle); |
| 117 | BROTLI_UNUSED(data); |
| 118 | BROTLI_UNUSED(mask); |
| 119 | BROTLI_UNUSED(ix_start); |
| 120 | BROTLI_UNUSED(ix_end); |
| 121 | } |
| 122 | |
| 123 | static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle, |
| 124 | size_t num_bytes, size_t position, const uint8_t* ringbuffer, |
| 125 | size_t ring_buffer_mask) { |
| 126 | /* In this case we must re-initialize the hasher from scratch from the |
| 127 | current position. */ |
| 128 | HashRolling* self = FN(Self)(handle); |
| 129 | size_t position_masked; |
| 130 | size_t available = num_bytes; |
| 131 | if ((position & (JUMP - 1)) != 0) { |
| 132 | size_t diff = JUMP - (position & (JUMP - 1)); |
| 133 | available = (diff > available) ? 0 : (available - diff); |
| 134 | position += diff; |
| 135 | } |
| 136 | position_masked = position & ring_buffer_mask; |
| 137 | /* wrapping around ringbuffer not handled. */ |
| 138 | if (available > ring_buffer_mask - position_masked) { |
| 139 | available = ring_buffer_mask - position_masked; |
| 140 | } |
| 141 | |
| 142 | FN(Prepare)(handle, BROTLI_FALSE, available, |
| 143 | ringbuffer + (position & ring_buffer_mask)); |
| 144 | self->next_ix = position; |
| 145 | BROTLI_UNUSED(num_bytes); |
| 146 | } |
| 147 | |
| 148 | static BROTLI_INLINE void FN(PrepareDistanceCache)( |
| 149 | HasherHandle handle, int* BROTLI_RESTRICT distance_cache) { |
| 150 | BROTLI_UNUSED(handle); |
| 151 | BROTLI_UNUSED(distance_cache); |
| 152 | } |
| 153 | |
| 154 | static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, |
| 155 | const BrotliEncoderDictionary* dictionary, |
| 156 | const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, |
| 157 | const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, |
| 158 | const size_t max_length, const size_t max_backward, |
| 159 | const size_t gap, const size_t max_distance, |
| 160 | HasherSearchResult* BROTLI_RESTRICT out) { |
| 161 | HashRolling* self = FN(Self)(handle); |
| 162 | const size_t cur_ix_masked = cur_ix & ring_buffer_mask; |
| 163 | size_t pos = self->next_ix; |
| 164 | |
| 165 | if ((cur_ix & (JUMP - 1)) != 0) return; |
| 166 | |
| 167 | /* Not enough lookahead */ |
| 168 | if (max_length < CHUNKLEN) return; |
| 169 | |
| 170 | for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) { |
| 171 | uint32_t code = self->state & MASK; |
| 172 | |
| 173 | uint8_t rem = data[pos & ring_buffer_mask]; |
| 174 | uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask]; |
| 175 | size_t found_ix = FN(kInvalidPos); |
| 176 | |
| 177 | self->state = FN(HashRollingFunction)( |
| 178 | self->state, add, rem, self->factor, self->factor_remove); |
| 179 | |
| 180 | if (code < NUMBUCKETS) { |
| 181 | found_ix = self->table[code]; |
| 182 | self->table[code] = (uint32_t)pos; |
| 183 | if (pos == cur_ix && found_ix != FN(kInvalidPos)) { |
| 184 | /* The cast to 32-bit makes backward distances up to 4GB work even |
| 185 | if cur_ix is above 4GB, despite using 32-bit values in the table. */ |
| 186 | size_t backward = (uint32_t)(cur_ix - found_ix); |
| 187 | if (backward <= max_backward) { |
| 188 | const size_t found_ix_masked = found_ix & ring_buffer_mask; |
| 189 | const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked], |
| 190 | &data[cur_ix_masked], |
| 191 | max_length); |
| 192 | if (len >= 4 && len > out->len) { |
| 193 | score_t score = BackwardReferenceScore(len, backward); |
| 194 | if (score > out->score) { |
| 195 | out->len = len; |
| 196 | out->distance = backward; |
| 197 | out->score = score; |
| 198 | out->len_code_delta = 0; |
| 199 | } |
| 200 | } |
| 201 | } |
| 202 | } |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | self->next_ix = cur_ix + JUMP; |
| 207 | |
| 208 | /* NOTE: this hasher does not search in the dictionary. It is used as |
| 209 | backup-hasher, the main hasher already searches in it. */ |
| 210 | BROTLI_UNUSED(dictionary); |
| 211 | BROTLI_UNUSED(distance_cache); |
| 212 | BROTLI_UNUSED(gap); |
| 213 | BROTLI_UNUSED(max_distance); |
| 214 | } |
| 215 | |
| 216 | #undef HashRolling |
| 217 | |