1 | /* NOLINT(build/header_guard) */ |
2 | /* Copyright 2018 Google Inc. All Rights Reserved. |
3 | |
4 | Distributed under MIT license. |
5 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
6 | */ |
7 | |
8 | /* template parameters: FN, JUMP, NUMBUCKETS, MASK, CHUNKLEN */ |
9 | /* NUMBUCKETS / (MASK + 1) = probability of storing and using hash code. */ |
10 | /* JUMP = skip bytes for speedup */ |
11 | |
12 | /* Rolling hash for long distance long string matches. Stores one position |
13 | per bucket, bucket key is computed over a long region. */ |
14 | |
15 | #define HashRolling HASHER() |
16 | |
17 | static const uint32_t FN(kRollingHashMul32) = 69069; |
18 | static const uint32_t FN(kInvalidPos) = 0xffffffff; |
19 | |
20 | /* This hasher uses a longer forward length, but returning a higher value here |
21 | will hurt compression by the main hasher when combined with a composite |
22 | hasher. The hasher tests for forward itself instead. */ |
23 | static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; } |
24 | static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; } |
25 | |
26 | /* Computes a code from a single byte. A lookup table of 256 values could be |
27 | used, but simply adding 1 works about as good. */ |
28 | static uint32_t FN(HashByte)(uint8_t byte) { |
29 | return (uint32_t)byte + 1u; |
30 | } |
31 | |
32 | static uint32_t FN(HashRollingFunctionInitial)(uint32_t state, uint8_t add, |
33 | uint32_t factor) { |
34 | return (uint32_t)(factor * state + FN(HashByte)(add)); |
35 | } |
36 | |
37 | static uint32_t FN(HashRollingFunction)(uint32_t state, uint8_t add, |
38 | uint8_t rem, uint32_t factor, |
39 | uint32_t factor_remove) { |
40 | return (uint32_t)(factor * state + |
41 | FN(HashByte)(add) - factor_remove * FN(HashByte)(rem)); |
42 | } |
43 | |
44 | typedef struct HashRolling { |
45 | uint32_t state; |
46 | uint32_t* table; |
47 | size_t next_ix; |
48 | |
49 | uint32_t chunk_len; |
50 | uint32_t factor; |
51 | uint32_t factor_remove; |
52 | } HashRolling; |
53 | |
54 | static BROTLI_INLINE HashRolling* FN(Self)(HasherHandle handle) { |
55 | return (HashRolling*)&(GetHasherCommon(handle)[1]); |
56 | } |
57 | |
58 | static void FN(Initialize)( |
59 | HasherHandle handle, const BrotliEncoderParams* params) { |
60 | HashRolling* self = FN(Self)(handle); |
61 | size_t i; |
62 | self->state = 0; |
63 | self->next_ix = 0; |
64 | |
65 | self->factor = FN(kRollingHashMul32); |
66 | |
67 | /* Compute the factor of the oldest byte to remove: factor**steps modulo |
68 | 0xffffffff (the multiplications rely on 32-bit overflow) */ |
69 | self->factor_remove = 1; |
70 | for (i = 0; i < CHUNKLEN; i += JUMP) { |
71 | self->factor_remove *= self->factor; |
72 | } |
73 | |
74 | self->table = (uint32_t*)((HasherHandle)self + sizeof(HashRolling)); |
75 | for (i = 0; i < NUMBUCKETS; i++) { |
76 | self->table[i] = FN(kInvalidPos); |
77 | } |
78 | |
79 | BROTLI_UNUSED(params); |
80 | } |
81 | |
82 | static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot, |
83 | size_t input_size, const uint8_t* data) { |
84 | HashRolling* self = FN(Self)(handle); |
85 | size_t i; |
86 | /* Too small size, cannot use this hasher. */ |
87 | if (input_size < CHUNKLEN) return; |
88 | self->state = 0; |
89 | for (i = 0; i < CHUNKLEN; i += JUMP) { |
90 | self->state = FN(HashRollingFunctionInitial)( |
91 | self->state, data[i], self->factor); |
92 | } |
93 | BROTLI_UNUSED(one_shot); |
94 | } |
95 | |
96 | static BROTLI_INLINE size_t FN(HashMemAllocInBytes)( |
97 | const BrotliEncoderParams* params, BROTLI_BOOL one_shot, |
98 | size_t input_size) { |
99 | return sizeof(HashRolling) + NUMBUCKETS * sizeof(uint32_t); |
100 | BROTLI_UNUSED(params); |
101 | BROTLI_UNUSED(one_shot); |
102 | BROTLI_UNUSED(input_size); |
103 | } |
104 | |
105 | static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle, |
106 | const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) { |
107 | BROTLI_UNUSED(handle); |
108 | BROTLI_UNUSED(data); |
109 | BROTLI_UNUSED(mask); |
110 | BROTLI_UNUSED(ix); |
111 | } |
112 | |
113 | static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, |
114 | const uint8_t* data, const size_t mask, const size_t ix_start, |
115 | const size_t ix_end) { |
116 | BROTLI_UNUSED(handle); |
117 | BROTLI_UNUSED(data); |
118 | BROTLI_UNUSED(mask); |
119 | BROTLI_UNUSED(ix_start); |
120 | BROTLI_UNUSED(ix_end); |
121 | } |
122 | |
123 | static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle, |
124 | size_t num_bytes, size_t position, const uint8_t* ringbuffer, |
125 | size_t ring_buffer_mask) { |
126 | /* In this case we must re-initialize the hasher from scratch from the |
127 | current position. */ |
128 | HashRolling* self = FN(Self)(handle); |
129 | size_t position_masked; |
130 | size_t available = num_bytes; |
131 | if ((position & (JUMP - 1)) != 0) { |
132 | size_t diff = JUMP - (position & (JUMP - 1)); |
133 | available = (diff > available) ? 0 : (available - diff); |
134 | position += diff; |
135 | } |
136 | position_masked = position & ring_buffer_mask; |
137 | /* wrapping around ringbuffer not handled. */ |
138 | if (available > ring_buffer_mask - position_masked) { |
139 | available = ring_buffer_mask - position_masked; |
140 | } |
141 | |
142 | FN(Prepare)(handle, BROTLI_FALSE, available, |
143 | ringbuffer + (position & ring_buffer_mask)); |
144 | self->next_ix = position; |
145 | BROTLI_UNUSED(num_bytes); |
146 | } |
147 | |
148 | static BROTLI_INLINE void FN(PrepareDistanceCache)( |
149 | HasherHandle handle, int* BROTLI_RESTRICT distance_cache) { |
150 | BROTLI_UNUSED(handle); |
151 | BROTLI_UNUSED(distance_cache); |
152 | } |
153 | |
154 | static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle, |
155 | const BrotliEncoderDictionary* dictionary, |
156 | const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask, |
157 | const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix, |
158 | const size_t max_length, const size_t max_backward, |
159 | const size_t gap, const size_t max_distance, |
160 | HasherSearchResult* BROTLI_RESTRICT out) { |
161 | HashRolling* self = FN(Self)(handle); |
162 | const size_t cur_ix_masked = cur_ix & ring_buffer_mask; |
163 | size_t pos = self->next_ix; |
164 | |
165 | if ((cur_ix & (JUMP - 1)) != 0) return; |
166 | |
167 | /* Not enough lookahead */ |
168 | if (max_length < CHUNKLEN) return; |
169 | |
170 | for (pos = self->next_ix; pos <= cur_ix; pos += JUMP) { |
171 | uint32_t code = self->state & MASK; |
172 | |
173 | uint8_t rem = data[pos & ring_buffer_mask]; |
174 | uint8_t add = data[(pos + CHUNKLEN) & ring_buffer_mask]; |
175 | size_t found_ix = FN(kInvalidPos); |
176 | |
177 | self->state = FN(HashRollingFunction)( |
178 | self->state, add, rem, self->factor, self->factor_remove); |
179 | |
180 | if (code < NUMBUCKETS) { |
181 | found_ix = self->table[code]; |
182 | self->table[code] = (uint32_t)pos; |
183 | if (pos == cur_ix && found_ix != FN(kInvalidPos)) { |
184 | /* The cast to 32-bit makes backward distances up to 4GB work even |
185 | if cur_ix is above 4GB, despite using 32-bit values in the table. */ |
186 | size_t backward = (uint32_t)(cur_ix - found_ix); |
187 | if (backward <= max_backward) { |
188 | const size_t found_ix_masked = found_ix & ring_buffer_mask; |
189 | const size_t len = FindMatchLengthWithLimit(&data[found_ix_masked], |
190 | &data[cur_ix_masked], |
191 | max_length); |
192 | if (len >= 4 && len > out->len) { |
193 | score_t score = BackwardReferenceScore(len, backward); |
194 | if (score > out->score) { |
195 | out->len = len; |
196 | out->distance = backward; |
197 | out->score = score; |
198 | out->len_code_delta = 0; |
199 | } |
200 | } |
201 | } |
202 | } |
203 | } |
204 | } |
205 | |
206 | self->next_ix = cur_ix + JUMP; |
207 | |
208 | /* NOTE: this hasher does not search in the dictionary. It is used as |
209 | backup-hasher, the main hasher already searches in it. */ |
210 | BROTLI_UNUSED(dictionary); |
211 | BROTLI_UNUSED(distance_cache); |
212 | BROTLI_UNUSED(gap); |
213 | BROTLI_UNUSED(max_distance); |
214 | } |
215 | |
216 | #undef HashRolling |
217 | |