1/* NOLINT(build/header_guard) */
2/* Copyright 2016 Google Inc. All Rights Reserved.
3
4 Distributed under MIT license.
5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6*/
7
8/* template parameters: FN, BUCKET_BITS, NUM_BANKS, BANK_BITS,
9 NUM_LAST_DISTANCES_TO_CHECK */
10
11/* A (forgetful) hash table to the data seen by the compressor, to
12 help create backward references to previous data.
13
14 Hashes are stored in chains which are bucketed to groups. Group of chains
15 share a storage "bank". When more than "bank size" chain nodes are added,
16 oldest nodes are replaced; this way several chains may share a tail. */
17
18#define HashForgetfulChain HASHER()
19
20#define BANK_SIZE (1 << BANK_BITS)
21
22/* Number of hash buckets. */
23#define BUCKET_SIZE (1 << BUCKET_BITS)
24
25#define CAPPED_CHAINS 0
26
27static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
28static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
29
30/* HashBytes is the function that chooses the bucket to place the address in.*/
31static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t* data) {
32 const uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
33 /* The higher bits contain more mixture from the multiplication,
34 so we take our results from there. */
35 return h >> (32 - BUCKET_BITS);
36}
37
38typedef struct FN(Slot) {
39 uint16_t delta;
40 uint16_t next;
41} FN(Slot);
42
43typedef struct FN(Bank) {
44 FN(Slot) slots[BANK_SIZE];
45} FN(Bank);
46
47typedef struct HashForgetfulChain {
48 uint32_t addr[BUCKET_SIZE];
49 uint16_t head[BUCKET_SIZE];
50 /* Truncated hash used for quick rejection of "distance cache" candidates. */
51 uint8_t tiny_hash[65536];
52 FN(Bank) banks[NUM_BANKS];
53 uint16_t free_slot_idx[NUM_BANKS];
54 size_t max_hops;
55} HashForgetfulChain;
56
57static BROTLI_INLINE HashForgetfulChain* FN(Self)(HasherHandle handle) {
58 return (HashForgetfulChain*)&(GetHasherCommon(handle)[1]);
59}
60
61static void FN(Initialize)(
62 HasherHandle handle, const BrotliEncoderParams* params) {
63 FN(Self)(handle)->max_hops =
64 (params->quality > 6 ? 7u : 8u) << (params->quality - 4);
65}
66
67static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
68 size_t input_size, const uint8_t* data) {
69 HashForgetfulChain* self = FN(Self)(handle);
70 /* Partial preparation is 100 times slower (per socket). */
71 size_t partial_prepare_threshold = BUCKET_SIZE >> 6;
72 if (one_shot && input_size <= partial_prepare_threshold) {
73 size_t i;
74 for (i = 0; i < input_size; ++i) {
75 size_t bucket = FN(HashBytes)(&data[i]);
76 /* See InitEmpty comment. */
77 self->addr[bucket] = 0xCCCCCCCC;
78 self->head[bucket] = 0xCCCC;
79 }
80 } else {
81 /* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position
82 processed by hasher never reaches 3GB + 64M; this makes all new chains
83 to be terminated after the first node. */
84 memset(self->addr, 0xCC, sizeof(self->addr));
85 memset(self->head, 0, sizeof(self->head));
86 }
87 memset(self->tiny_hash, 0, sizeof(self->tiny_hash));
88 memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));
89}
90
91static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
92 const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
93 size_t input_size) {
94 BROTLI_UNUSED(params);
95 BROTLI_UNUSED(one_shot);
96 BROTLI_UNUSED(input_size);
97 return sizeof(HashForgetfulChain);
98}
99
100/* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend
101 node to corresponding chain; also update tiny_hash for current position. */
102static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle,
103 const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
104 HashForgetfulChain* self = FN(Self)(handle);
105 const size_t key = FN(HashBytes)(&data[ix & mask]);
106 const size_t bank = key & (NUM_BANKS - 1);
107 const size_t idx = self->free_slot_idx[bank]++ & (BANK_SIZE - 1);
108 size_t delta = ix - self->addr[key];
109 self->tiny_hash[(uint16_t)ix] = (uint8_t)key;
110 if (delta > 0xFFFF) delta = CAPPED_CHAINS ? 0 : 0xFFFF;
111 self->banks[bank].slots[idx].delta = (uint16_t)delta;
112 self->banks[bank].slots[idx].next = self->head[key];
113 self->addr[key] = (uint32_t)ix;
114 self->head[key] = (uint16_t)idx;
115}
116
117static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
118 const uint8_t* data, const size_t mask, const size_t ix_start,
119 const size_t ix_end) {
120 size_t i;
121 for (i = ix_start; i < ix_end; ++i) {
122 FN(Store)(handle, data, mask, i);
123 }
124}
125
126static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
127 size_t num_bytes, size_t position, const uint8_t* ringbuffer,
128 size_t ring_buffer_mask) {
129 if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
130 /* Prepare the hashes for three last bytes of the last write.
131 These could not be calculated before, since they require knowledge
132 of both the previous and the current block. */
133 FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 3);
134 FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 2);
135 FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 1);
136 }
137}
138
139static BROTLI_INLINE void FN(PrepareDistanceCache)(
140 HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
141 BROTLI_UNUSED(handle);
142 PrepareDistanceCache(distance_cache, NUM_LAST_DISTANCES_TO_CHECK);
143}
144
145/* Find a longest backward match of &data[cur_ix] up to the length of
146 max_length and stores the position cur_ix in the hash table.
147
148 REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
149 values; if this method is invoked repeatedly with the same distance
150 cache values, it is enough to invoke FN(PrepareDistanceCache) once.
151
152 Does not look for matches longer than max_length.
153 Does not look for matches further away than max_backward.
154 Writes the best match into |out|.
155 |out|->score is updated only if a better match is found. */
156static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
157 const BrotliEncoderDictionary* dictionary,
158 const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
159 const int* BROTLI_RESTRICT distance_cache,
160 const size_t cur_ix, const size_t max_length, const size_t max_backward,
161 const size_t gap, const size_t max_distance,
162 HasherSearchResult* BROTLI_RESTRICT out) {
163 HashForgetfulChain* self = FN(Self)(handle);
164 const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
165 /* Don't accept a short copy from far away. */
166 score_t min_score = out->score;
167 score_t best_score = out->score;
168 size_t best_len = out->len;
169 size_t i;
170 const size_t key = FN(HashBytes)(&data[cur_ix_masked]);
171 const uint8_t tiny_hash = (uint8_t)(key);
172 out->len = 0;
173 out->len_code_delta = 0;
174 /* Try last distance first. */
175 for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {
176 const size_t backward = (size_t)distance_cache[i];
177 size_t prev_ix = (cur_ix - backward);
178 /* For distance code 0 we want to consider 2-byte matches. */
179 if (i > 0 && self->tiny_hash[(uint16_t)prev_ix] != tiny_hash) continue;
180 if (prev_ix >= cur_ix || backward > max_backward) {
181 continue;
182 }
183 prev_ix &= ring_buffer_mask;
184 {
185 const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
186 &data[cur_ix_masked],
187 max_length);
188 if (len >= 2) {
189 score_t score = BackwardReferenceScoreUsingLastDistance(len);
190 if (best_score < score) {
191 if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
192 if (best_score < score) {
193 best_score = score;
194 best_len = len;
195 out->len = best_len;
196 out->distance = backward;
197 out->score = best_score;
198 }
199 }
200 }
201 }
202 }
203 {
204 const size_t bank = key & (NUM_BANKS - 1);
205 size_t backward = 0;
206 size_t hops = self->max_hops;
207 size_t delta = cur_ix - self->addr[key];
208 size_t slot = self->head[key];
209 while (hops--) {
210 size_t prev_ix;
211 size_t last = slot;
212 backward += delta;
213 if (backward > max_backward || (CAPPED_CHAINS && !delta)) break;
214 prev_ix = (cur_ix - backward) & ring_buffer_mask;
215 slot = self->banks[bank].slots[last].next;
216 delta = self->banks[bank].slots[last].delta;
217 if (cur_ix_masked + best_len > ring_buffer_mask ||
218 prev_ix + best_len > ring_buffer_mask ||
219 data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
220 continue;
221 }
222 {
223 const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
224 &data[cur_ix_masked],
225 max_length);
226 if (len >= 4) {
227 /* Comparing for >= 3 does not change the semantics, but just saves
228 for a few unnecessary binary logarithms in backward reference
229 score, since we are not interested in such short matches. */
230 score_t score = BackwardReferenceScore(len, backward);
231 if (best_score < score) {
232 best_score = score;
233 best_len = len;
234 out->len = best_len;
235 out->distance = backward;
236 out->score = best_score;
237 }
238 }
239 }
240 }
241 FN(Store)(handle, data, ring_buffer_mask, cur_ix);
242 }
243 if (out->score == min_score) {
244 SearchInStaticDictionary(dictionary,
245 handle, &data[cur_ix_masked], max_length, max_backward + gap,
246 max_distance, out, BROTLI_FALSE);
247 }
248}
249
250#undef BANK_SIZE
251#undef BUCKET_SIZE
252#undef CAPPED_CHAINS
253
254#undef HashForgetfulChain
255