1 | /* NOLINT(build/header_guard) */ |
2 | /* Copyright 2013 Google Inc. All Rights Reserved. |
3 | |
4 | Distributed under MIT license. |
5 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
6 | */ |
7 | |
8 | /* template parameters: FN, DataType */ |
9 | |
10 | #define HistogramType FN(Histogram) |
11 | |
12 | static void FN(InitialEntropyCodes)(const DataType* data, size_t length, |
13 | size_t stride, |
14 | size_t num_histograms, |
15 | HistogramType* histograms) { |
16 | uint32_t seed = 7; |
17 | size_t block_length = length / num_histograms; |
18 | size_t i; |
19 | FN(ClearHistograms)(histograms, num_histograms); |
20 | for (i = 0; i < num_histograms; ++i) { |
21 | size_t pos = length * i / num_histograms; |
22 | if (i != 0) { |
23 | pos += MyRand(&seed) % block_length; |
24 | } |
25 | if (pos + stride >= length) { |
26 | pos = length - stride - 1; |
27 | } |
28 | FN(HistogramAddVector)(&histograms[i], data + pos, stride); |
29 | } |
30 | } |
31 | |
32 | static void FN(RandomSample)(uint32_t* seed, |
33 | const DataType* data, |
34 | size_t length, |
35 | size_t stride, |
36 | HistogramType* sample) { |
37 | size_t pos = 0; |
38 | if (stride >= length) { |
39 | stride = length; |
40 | } else { |
41 | pos = MyRand(seed) % (length - stride + 1); |
42 | } |
43 | FN(HistogramAddVector)(sample, data + pos, stride); |
44 | } |
45 | |
46 | static void FN(RefineEntropyCodes)(const DataType* data, size_t length, |
47 | size_t stride, |
48 | size_t num_histograms, |
49 | HistogramType* histograms) { |
50 | size_t iters = |
51 | kIterMulForRefining * length / stride + kMinItersForRefining; |
52 | uint32_t seed = 7; |
53 | size_t iter; |
54 | iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms; |
55 | for (iter = 0; iter < iters; ++iter) { |
56 | HistogramType sample; |
57 | FN(HistogramClear)(&sample); |
58 | FN(RandomSample)(&seed, data, length, stride, &sample); |
59 | FN(HistogramAddHistogram)(&histograms[iter % num_histograms], &sample); |
60 | } |
61 | } |
62 | |
63 | /* Assigns a block id from the range [0, num_histograms) to each data element |
64 | in data[0..length) and fills in block_id[0..length) with the assigned values. |
65 | Returns the number of blocks, i.e. one plus the number of block switches. */ |
66 | static size_t FN(FindBlocks)(const DataType* data, const size_t length, |
67 | const double block_switch_bitcost, |
68 | const size_t num_histograms, |
69 | const HistogramType* histograms, |
70 | double* insert_cost, |
71 | double* cost, |
72 | uint8_t* switch_signal, |
73 | uint8_t* block_id) { |
74 | const size_t data_size = FN(HistogramDataSize)(); |
75 | const size_t bitmaplen = (num_histograms + 7) >> 3; |
76 | size_t num_blocks = 1; |
77 | size_t i; |
78 | size_t j; |
79 | BROTLI_DCHECK(num_histograms <= 256); |
80 | if (num_histograms <= 1) { |
81 | for (i = 0; i < length; ++i) { |
82 | block_id[i] = 0; |
83 | } |
84 | return 1; |
85 | } |
86 | memset(insert_cost, 0, sizeof(insert_cost[0]) * data_size * num_histograms); |
87 | for (i = 0; i < num_histograms; ++i) { |
88 | insert_cost[i] = FastLog2((uint32_t)histograms[i].total_count_); |
89 | } |
90 | for (i = data_size; i != 0;) { |
91 | --i; |
92 | for (j = 0; j < num_histograms; ++j) { |
93 | insert_cost[i * num_histograms + j] = |
94 | insert_cost[j] - BitCost(histograms[j].data_[i]); |
95 | } |
96 | } |
97 | memset(cost, 0, sizeof(cost[0]) * num_histograms); |
98 | memset(switch_signal, 0, sizeof(switch_signal[0]) * length * bitmaplen); |
99 | /* After each iteration of this loop, cost[k] will contain the difference |
100 | between the minimum cost of arriving at the current byte position using |
101 | entropy code k, and the minimum cost of arriving at the current byte |
102 | position. This difference is capped at the block switch cost, and if it |
103 | reaches block switch cost, it means that when we trace back from the last |
104 | position, we need to switch here. */ |
105 | for (i = 0; i < length; ++i) { |
106 | const size_t byte_ix = i; |
107 | size_t ix = byte_ix * bitmaplen; |
108 | size_t insert_cost_ix = data[byte_ix] * num_histograms; |
109 | double min_cost = 1e99; |
110 | double block_switch_cost = block_switch_bitcost; |
111 | size_t k; |
112 | for (k = 0; k < num_histograms; ++k) { |
113 | /* We are coding the symbol in data[byte_ix] with entropy code k. */ |
114 | cost[k] += insert_cost[insert_cost_ix + k]; |
115 | if (cost[k] < min_cost) { |
116 | min_cost = cost[k]; |
117 | block_id[byte_ix] = (uint8_t)k; |
118 | } |
119 | } |
120 | /* More blocks for the beginning. */ |
121 | if (byte_ix < 2000) { |
122 | block_switch_cost *= 0.77 + 0.07 * (double)byte_ix / 2000; |
123 | } |
124 | for (k = 0; k < num_histograms; ++k) { |
125 | cost[k] -= min_cost; |
126 | if (cost[k] >= block_switch_cost) { |
127 | const uint8_t mask = (uint8_t)(1u << (k & 7)); |
128 | cost[k] = block_switch_cost; |
129 | BROTLI_DCHECK((k >> 3) < bitmaplen); |
130 | switch_signal[ix + (k >> 3)] |= mask; |
131 | } |
132 | } |
133 | } |
134 | { /* Trace back from the last position and switch at the marked places. */ |
135 | size_t byte_ix = length - 1; |
136 | size_t ix = byte_ix * bitmaplen; |
137 | uint8_t cur_id = block_id[byte_ix]; |
138 | while (byte_ix > 0) { |
139 | const uint8_t mask = (uint8_t)(1u << (cur_id & 7)); |
140 | BROTLI_DCHECK(((size_t)cur_id >> 3) < bitmaplen); |
141 | --byte_ix; |
142 | ix -= bitmaplen; |
143 | if (switch_signal[ix + (cur_id >> 3)] & mask) { |
144 | if (cur_id != block_id[byte_ix]) { |
145 | cur_id = block_id[byte_ix]; |
146 | ++num_blocks; |
147 | } |
148 | } |
149 | block_id[byte_ix] = cur_id; |
150 | } |
151 | } |
152 | return num_blocks; |
153 | } |
154 | |
155 | static size_t FN(RemapBlockIds)(uint8_t* block_ids, const size_t length, |
156 | uint16_t* new_id, const size_t num_histograms) { |
157 | static const uint16_t kInvalidId = 256; |
158 | uint16_t next_id = 0; |
159 | size_t i; |
160 | for (i = 0; i < num_histograms; ++i) { |
161 | new_id[i] = kInvalidId; |
162 | } |
163 | for (i = 0; i < length; ++i) { |
164 | BROTLI_DCHECK(block_ids[i] < num_histograms); |
165 | if (new_id[block_ids[i]] == kInvalidId) { |
166 | new_id[block_ids[i]] = next_id++; |
167 | } |
168 | } |
169 | for (i = 0; i < length; ++i) { |
170 | block_ids[i] = (uint8_t)new_id[block_ids[i]]; |
171 | BROTLI_DCHECK(block_ids[i] < num_histograms); |
172 | } |
173 | BROTLI_DCHECK(next_id <= num_histograms); |
174 | return next_id; |
175 | } |
176 | |
177 | static void FN(BuildBlockHistograms)(const DataType* data, const size_t length, |
178 | const uint8_t* block_ids, |
179 | const size_t num_histograms, |
180 | HistogramType* histograms) { |
181 | size_t i; |
182 | FN(ClearHistograms)(histograms, num_histograms); |
183 | for (i = 0; i < length; ++i) { |
184 | FN(HistogramAdd)(&histograms[block_ids[i]], data[i]); |
185 | } |
186 | } |
187 | |
188 | static void FN(ClusterBlocks)(MemoryManager* m, |
189 | const DataType* data, const size_t length, |
190 | const size_t num_blocks, |
191 | uint8_t* block_ids, |
192 | BlockSplit* split) { |
193 | uint32_t* histogram_symbols = BROTLI_ALLOC(m, uint32_t, num_blocks); |
194 | uint32_t* block_lengths = BROTLI_ALLOC(m, uint32_t, num_blocks); |
195 | const size_t expected_num_clusters = CLUSTERS_PER_BATCH * |
196 | (num_blocks + HISTOGRAMS_PER_BATCH - 1) / HISTOGRAMS_PER_BATCH; |
197 | size_t all_histograms_size = 0; |
198 | size_t all_histograms_capacity = expected_num_clusters; |
199 | HistogramType* all_histograms = |
200 | BROTLI_ALLOC(m, HistogramType, all_histograms_capacity); |
201 | size_t cluster_size_size = 0; |
202 | size_t cluster_size_capacity = expected_num_clusters; |
203 | uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, cluster_size_capacity); |
204 | size_t num_clusters = 0; |
205 | HistogramType* histograms = BROTLI_ALLOC(m, HistogramType, |
206 | BROTLI_MIN(size_t, num_blocks, HISTOGRAMS_PER_BATCH)); |
207 | size_t max_num_pairs = |
208 | HISTOGRAMS_PER_BATCH * HISTOGRAMS_PER_BATCH / 2; |
209 | size_t pairs_capacity = max_num_pairs + 1; |
210 | HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity); |
211 | size_t pos = 0; |
212 | uint32_t* clusters; |
213 | size_t num_final_clusters; |
214 | static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX; |
215 | uint32_t* new_index; |
216 | size_t i; |
217 | uint32_t sizes[HISTOGRAMS_PER_BATCH] = { 0 }; |
218 | uint32_t new_clusters[HISTOGRAMS_PER_BATCH] = { 0 }; |
219 | uint32_t symbols[HISTOGRAMS_PER_BATCH] = { 0 }; |
220 | uint32_t remap[HISTOGRAMS_PER_BATCH] = { 0 }; |
221 | |
222 | if (BROTLI_IS_OOM(m)) return; |
223 | |
224 | memset(block_lengths, 0, num_blocks * sizeof(uint32_t)); |
225 | |
226 | { |
227 | size_t block_idx = 0; |
228 | for (i = 0; i < length; ++i) { |
229 | BROTLI_DCHECK(block_idx < num_blocks); |
230 | ++block_lengths[block_idx]; |
231 | if (i + 1 == length || block_ids[i] != block_ids[i + 1]) { |
232 | ++block_idx; |
233 | } |
234 | } |
235 | BROTLI_DCHECK(block_idx == num_blocks); |
236 | } |
237 | |
238 | for (i = 0; i < num_blocks; i += HISTOGRAMS_PER_BATCH) { |
239 | const size_t num_to_combine = |
240 | BROTLI_MIN(size_t, num_blocks - i, HISTOGRAMS_PER_BATCH); |
241 | size_t num_new_clusters; |
242 | size_t j; |
243 | for (j = 0; j < num_to_combine; ++j) { |
244 | size_t k; |
245 | FN(HistogramClear)(&histograms[j]); |
246 | for (k = 0; k < block_lengths[i + j]; ++k) { |
247 | FN(HistogramAdd)(&histograms[j], data[pos++]); |
248 | } |
249 | histograms[j].bit_cost_ = FN(BrotliPopulationCost)(&histograms[j]); |
250 | new_clusters[j] = (uint32_t)j; |
251 | symbols[j] = (uint32_t)j; |
252 | sizes[j] = 1; |
253 | } |
254 | num_new_clusters = FN(BrotliHistogramCombine)( |
255 | histograms, sizes, symbols, new_clusters, pairs, num_to_combine, |
256 | num_to_combine, HISTOGRAMS_PER_BATCH, max_num_pairs); |
257 | BROTLI_ENSURE_CAPACITY(m, HistogramType, all_histograms, |
258 | all_histograms_capacity, all_histograms_size + num_new_clusters); |
259 | BROTLI_ENSURE_CAPACITY(m, uint32_t, cluster_size, |
260 | cluster_size_capacity, cluster_size_size + num_new_clusters); |
261 | if (BROTLI_IS_OOM(m)) return; |
262 | for (j = 0; j < num_new_clusters; ++j) { |
263 | all_histograms[all_histograms_size++] = histograms[new_clusters[j]]; |
264 | cluster_size[cluster_size_size++] = sizes[new_clusters[j]]; |
265 | remap[new_clusters[j]] = (uint32_t)j; |
266 | } |
267 | for (j = 0; j < num_to_combine; ++j) { |
268 | histogram_symbols[i + j] = (uint32_t)num_clusters + remap[symbols[j]]; |
269 | } |
270 | num_clusters += num_new_clusters; |
271 | BROTLI_DCHECK(num_clusters == cluster_size_size); |
272 | BROTLI_DCHECK(num_clusters == all_histograms_size); |
273 | } |
274 | BROTLI_FREE(m, histograms); |
275 | |
276 | max_num_pairs = |
277 | BROTLI_MIN(size_t, 64 * num_clusters, (num_clusters / 2) * num_clusters); |
278 | if (pairs_capacity < max_num_pairs + 1) { |
279 | BROTLI_FREE(m, pairs); |
280 | pairs = BROTLI_ALLOC(m, HistogramPair, max_num_pairs + 1); |
281 | if (BROTLI_IS_OOM(m)) return; |
282 | } |
283 | |
284 | clusters = BROTLI_ALLOC(m, uint32_t, num_clusters); |
285 | if (BROTLI_IS_OOM(m)) return; |
286 | for (i = 0; i < num_clusters; ++i) { |
287 | clusters[i] = (uint32_t)i; |
288 | } |
289 | num_final_clusters = FN(BrotliHistogramCombine)( |
290 | all_histograms, cluster_size, histogram_symbols, clusters, pairs, |
291 | num_clusters, num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES, |
292 | max_num_pairs); |
293 | BROTLI_FREE(m, pairs); |
294 | BROTLI_FREE(m, cluster_size); |
295 | |
296 | new_index = BROTLI_ALLOC(m, uint32_t, num_clusters); |
297 | if (BROTLI_IS_OOM(m)) return; |
298 | for (i = 0; i < num_clusters; ++i) new_index[i] = kInvalidIndex; |
299 | pos = 0; |
300 | { |
301 | uint32_t next_index = 0; |
302 | for (i = 0; i < num_blocks; ++i) { |
303 | HistogramType histo; |
304 | size_t j; |
305 | uint32_t best_out; |
306 | double best_bits; |
307 | FN(HistogramClear)(&histo); |
308 | for (j = 0; j < block_lengths[i]; ++j) { |
309 | FN(HistogramAdd)(&histo, data[pos++]); |
310 | } |
311 | best_out = (i == 0) ? histogram_symbols[0] : histogram_symbols[i - 1]; |
312 | best_bits = |
313 | FN(BrotliHistogramBitCostDistance)(&histo, &all_histograms[best_out]); |
314 | for (j = 0; j < num_final_clusters; ++j) { |
315 | const double cur_bits = FN(BrotliHistogramBitCostDistance)( |
316 | &histo, &all_histograms[clusters[j]]); |
317 | if (cur_bits < best_bits) { |
318 | best_bits = cur_bits; |
319 | best_out = clusters[j]; |
320 | } |
321 | } |
322 | histogram_symbols[i] = best_out; |
323 | if (new_index[best_out] == kInvalidIndex) { |
324 | new_index[best_out] = next_index++; |
325 | } |
326 | } |
327 | } |
328 | BROTLI_FREE(m, clusters); |
329 | BROTLI_FREE(m, all_histograms); |
330 | BROTLI_ENSURE_CAPACITY( |
331 | m, uint8_t, split->types, split->types_alloc_size, num_blocks); |
332 | BROTLI_ENSURE_CAPACITY( |
333 | m, uint32_t, split->lengths, split->lengths_alloc_size, num_blocks); |
334 | if (BROTLI_IS_OOM(m)) return; |
335 | { |
336 | uint32_t cur_length = 0; |
337 | size_t block_idx = 0; |
338 | uint8_t max_type = 0; |
339 | for (i = 0; i < num_blocks; ++i) { |
340 | cur_length += block_lengths[i]; |
341 | if (i + 1 == num_blocks || |
342 | histogram_symbols[i] != histogram_symbols[i + 1]) { |
343 | const uint8_t id = (uint8_t)new_index[histogram_symbols[i]]; |
344 | split->types[block_idx] = id; |
345 | split->lengths[block_idx] = cur_length; |
346 | max_type = BROTLI_MAX(uint8_t, max_type, id); |
347 | cur_length = 0; |
348 | ++block_idx; |
349 | } |
350 | } |
351 | split->num_blocks = block_idx; |
352 | split->num_types = (size_t)max_type + 1; |
353 | } |
354 | BROTLI_FREE(m, new_index); |
355 | BROTLI_FREE(m, block_lengths); |
356 | BROTLI_FREE(m, histogram_symbols); |
357 | } |
358 | |
359 | static void FN(SplitByteVector)(MemoryManager* m, |
360 | const DataType* data, const size_t length, |
361 | const size_t literals_per_histogram, |
362 | const size_t max_histograms, |
363 | const size_t sampling_stride_length, |
364 | const double block_switch_cost, |
365 | const BrotliEncoderParams* params, |
366 | BlockSplit* split) { |
367 | const size_t data_size = FN(HistogramDataSize)(); |
368 | size_t num_histograms = length / literals_per_histogram + 1; |
369 | HistogramType* histograms; |
370 | if (num_histograms > max_histograms) { |
371 | num_histograms = max_histograms; |
372 | } |
373 | if (length == 0) { |
374 | split->num_types = 1; |
375 | return; |
376 | } else if (length < kMinLengthForBlockSplitting) { |
377 | BROTLI_ENSURE_CAPACITY(m, uint8_t, |
378 | split->types, split->types_alloc_size, split->num_blocks + 1); |
379 | BROTLI_ENSURE_CAPACITY(m, uint32_t, |
380 | split->lengths, split->lengths_alloc_size, split->num_blocks + 1); |
381 | if (BROTLI_IS_OOM(m)) return; |
382 | split->num_types = 1; |
383 | split->types[split->num_blocks] = 0; |
384 | split->lengths[split->num_blocks] = (uint32_t)length; |
385 | split->num_blocks++; |
386 | return; |
387 | } |
388 | histograms = BROTLI_ALLOC(m, HistogramType, num_histograms); |
389 | if (BROTLI_IS_OOM(m)) return; |
390 | /* Find good entropy codes. */ |
391 | FN(InitialEntropyCodes)(data, length, |
392 | sampling_stride_length, |
393 | num_histograms, histograms); |
394 | FN(RefineEntropyCodes)(data, length, |
395 | sampling_stride_length, |
396 | num_histograms, histograms); |
397 | { |
398 | /* Find a good path through literals with the good entropy codes. */ |
399 | uint8_t* block_ids = BROTLI_ALLOC(m, uint8_t, length); |
400 | size_t num_blocks = 0; |
401 | const size_t bitmaplen = (num_histograms + 7) >> 3; |
402 | double* insert_cost = BROTLI_ALLOC(m, double, data_size * num_histograms); |
403 | double* cost = BROTLI_ALLOC(m, double, num_histograms); |
404 | uint8_t* switch_signal = BROTLI_ALLOC(m, uint8_t, length * bitmaplen); |
405 | uint16_t* new_id = BROTLI_ALLOC(m, uint16_t, num_histograms); |
406 | const size_t iters = params->quality < HQ_ZOPFLIFICATION_QUALITY ? 3 : 10; |
407 | size_t i; |
408 | if (BROTLI_IS_OOM(m)) return; |
409 | for (i = 0; i < iters; ++i) { |
410 | num_blocks = FN(FindBlocks)(data, length, |
411 | block_switch_cost, |
412 | num_histograms, histograms, |
413 | insert_cost, cost, switch_signal, |
414 | block_ids); |
415 | num_histograms = FN(RemapBlockIds)(block_ids, length, |
416 | new_id, num_histograms); |
417 | FN(BuildBlockHistograms)(data, length, block_ids, |
418 | num_histograms, histograms); |
419 | } |
420 | BROTLI_FREE(m, insert_cost); |
421 | BROTLI_FREE(m, cost); |
422 | BROTLI_FREE(m, switch_signal); |
423 | BROTLI_FREE(m, new_id); |
424 | BROTLI_FREE(m, histograms); |
425 | FN(ClusterBlocks)(m, data, length, num_blocks, block_ids, split); |
426 | if (BROTLI_IS_OOM(m)) return; |
427 | BROTLI_FREE(m, block_ids); |
428 | } |
429 | } |
430 | |
431 | #undef HistogramType |
432 | |