histogram_enc.c source code [engine/third_party/libwebp/src/enc/histogram_enc.c]

1	// Copyright 2012 Google Inc. All Rights Reserved.
2	//
3	// Use of this source code is governed by a BSD-style license
4	// that can be found in the COPYING file in the root of the source
5	// tree. An additional intellectual property rights grant can be found
6	// in the file PATENTS. All contributing project authors may
7	// be found in the AUTHORS file in the root of the source tree.
8	// -----------------------------------------------------------------------------
9	//
10	// Author: Jyrki Alakuijala (jyrki@google.com)
11	//
12	#ifdef HAVE_CONFIG_H
13	#include "../webp/config.h"
14	#endif
15
16	#include <math.h>
17
18	#include "./backward_references_enc.h"
19	#include "./histogram_enc.h"
20	#include "../dsp/lossless.h"
21	#include "../dsp/lossless_common.h"
22	#include "../utils/utils.h"
23
24	#define MAX_COST 1.e38
25
26	// Number of partitions for the three dominant (literal, red and blue) symbol
27	// costs.
28	#define NUM_PARTITIONS 4
29	// The size of the bin-hash corresponding to the three dominant costs.
30	#define BIN_SIZE (NUM_PARTITIONS * NUM_PARTITIONS * NUM_PARTITIONS)
31	// Maximum number of histograms allowed in greedy combining algorithm.
32	#define MAX_HISTO_GREEDY 100
33
34	static void HistogramClear(VP8LHistogram* const p) {
35	uint32_t* const literal = p->literal_;
36	const int cache_bits = p->palette_code_bits_;
37	const int histo_size = VP8LGetHistogramSize(cache_bits);
38	memset(p, `0`, histo_size);
39	p->palette_code_bits_ = cache_bits;
40	p->literal_ = literal;
41	}
42
43	// Swap two histogram pointers.
44	static void HistogramSwap(VP8LHistogram** const A, VP8LHistogram** const B) {
45	VP8LHistogram* const tmp = *A;
46	A = B;
47	*B = tmp;
48	}
49
50	static void HistogramCopy(const VP8LHistogram* const src,
51	VP8LHistogram* const dst) {
52	uint32_t* const dst_literal = dst->literal_;
53	const int dst_cache_bits = dst->palette_code_bits_;
54	const int histo_size = VP8LGetHistogramSize(dst_cache_bits);
55	assert(src->palette_code_bits_ == dst_cache_bits);
56	memcpy(dst, src, histo_size);
57	dst->literal_ = dst_literal;
58	}
59
60	int VP8LGetHistogramSize(int cache_bits) {
61	const int literal_size = VP8LHistogramNumCodes(cache_bits);
62	const size_t total_size = sizeof(VP8LHistogram) + sizeof(int) * literal_size;
63	assert(total_size <= (size_t)`0x7fffffff`);
64	return (int)total_size;
65	}
66
67	void VP8LFreeHistogram(VP8LHistogram* const histo) {
68	WebPSafeFree(histo);
69	}
70
71	void VP8LFreeHistogramSet(VP8LHistogramSet* const histo) {
72	WebPSafeFree(histo);
73	}
74
75	void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,
76	VP8LHistogram* const histo) {
77	VP8LRefsCursor c = VP8LRefsCursorInit(refs);
78	while (VP8LRefsCursorOk(&c)) {
79	VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos);
80	VP8LRefsCursorNext(&c);
81	}
82	}
83
84	void VP8LHistogramCreate(VP8LHistogram* const p,
85	const VP8LBackwardRefs* const refs,
86	int palette_code_bits) {
87	if (palette_code_bits >= `0`) {
88	p->palette_code_bits_ = palette_code_bits;
89	}
90	HistogramClear(p);
91	VP8LHistogramStoreRefs(refs, p);
92	}
93
94	void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits) {
95	p->palette_code_bits_ = palette_code_bits;
96	HistogramClear(p);
97	}
98
99	VP8LHistogram* VP8LAllocateHistogram(int cache_bits) {
100	VP8LHistogram* histo = NULL;
101	const int total_size = VP8LGetHistogramSize(cache_bits);
102	uint8_t* const memory = (uint8_t)WebPSafeMalloc(total_size, sizeof(memory));
103	if (memory == NULL) return NULL;
104	histo = (VP8LHistogram*)memory;
105	// literal_ won't necessary be aligned.
106	histo->literal_ = (uint32_t)(memory + sizeof*(VP8LHistogram));
107	VP8LHistogramInit(histo, cache_bits);
108	return histo;
109	}
110
111	VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
112	int i;
113	VP8LHistogramSet* set;
114	const int histo_size = VP8LGetHistogramSize(cache_bits);
115	const size_t total_size =
116	sizeof(set) + size (sizeof(*set->histograms) +
117	histo_size + WEBP_ALIGN_CST);
118	uint8_t* memory = (uint8_t)WebPSafeMalloc(total_size, sizeof(memory));
119	if (memory == NULL) return NULL;
120
121	set = (VP8LHistogramSet*)memory;
122	memory += sizeof(*set);
123	set->histograms = (VP8LHistogram**)memory;
124	memory += size * sizeof(*set->histograms);
125	set->max_size = size;
126	set->size = size;
127	for (i = `0`; i < size; ++i) {
128	memory = (uint8_t*)WEBP_ALIGN(memory);
129	set->histograms[i] = (VP8LHistogram*)memory;
130	// literal_ won't necessary be aligned.
131	set->histograms[i]->literal_ = (uint32_t)(memory + sizeof*(VP8LHistogram));
132	VP8LHistogramInit(set->histograms[i], cache_bits);
133	memory += histo_size;
134	}
135	return set;
136	}
137
138	// -----------------------------------------------------------------------------
139
140	void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
141	const PixOrCopy* const v) {
142	if (PixOrCopyIsLiteral(v)) {
143	++histo->alpha_[PixOrCopyLiteral(v, `3`)];
144	++histo->red_[PixOrCopyLiteral(v, `2`)];
145	++histo->literal_[PixOrCopyLiteral(v, `1`)];
146	++histo->blue_[PixOrCopyLiteral(v, `0`)];
147	} else if (PixOrCopyIsCacheIdx(v)) {
148	const int literal_ix =
149	NUM_LITERAL_CODES + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v);
150	++histo->literal_[literal_ix];
151	} else {
152	int code, extra_bits;
153	VP8LPrefixEncodeBits(PixOrCopyLength(v), &code, &extra_bits);
154	++histo->literal_[NUM_LITERAL_CODES + code];
155	VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits);
156	++histo->distance_[code];
157	}
158	}
159
160	// -----------------------------------------------------------------------------
161	// Entropy-related functions.
162
163	static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) {
164	double mix;
165	if (entropy->nonzeros < `5`) {
166	if (entropy->nonzeros <= `1`) {
167	return `0`;
168	}
169	// Two symbols, they will be 0 and 1 in a Huffman code.
170	// Let's mix in a bit of entropy to favor good clustering when
171	// distributions of these are combined.
172	if (entropy->nonzeros == `2`) {
173	return `0.99` * entropy->sum + `0.01` * entropy->entropy;
174	}
175	// No matter what the entropy says, we cannot be better than min_limit
176	// with Huffman coding. I am mixing a bit of entropy into the
177	// min_limit since it produces much better (~0.5 %) compression results
178	// perhaps because of better entropy clustering.
179	if (entropy->nonzeros == `3`) {
180	mix = `0.95`;
181	} else {
182	mix = `0.7`; // nonzeros == 4.
183	}
184	} else {
185	mix = `0.627`;
186	}
187
188	{
189	double min_limit = `2` * entropy->sum - entropy->max_val;
190	min_limit = mix * min_limit + (`1.0` - mix) * entropy->entropy;
191	return (entropy->entropy < min_limit) ? min_limit : entropy->entropy;
192	}
193	}
194
195	double VP8LBitsEntropy(const uint32_t* const array, int n,
196	uint32_t* const trivial_symbol) {
197	VP8LBitEntropy entropy;
198	VP8LBitsEntropyUnrefined(array, n, &entropy);
199	if (trivial_symbol != NULL) {
200	*trivial_symbol =
201	(entropy.nonzeros == `1`) ? entropy.nonzero_code : VP8L_NON_TRIVIAL_SYM;
202	}
203
204	return BitsEntropyRefine(&entropy);
205	}
206
207	static double InitialHuffmanCost(void) {
208	// Small bias because Huffman code length is typically not stored in
209	// full length.
210	static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * `3`;
211	static const double kSmallBias = `9.1`;
212	return kHuffmanCodeOfHuffmanCodeSize - kSmallBias;
213	}
214
215	// Finalize the Huffman cost based on streak numbers and length type (<3 or >=3)
216	static double FinalHuffmanCost(const VP8LStreaks* const stats) {
217	// The constants in this function are experimental and got rounded from
218	// their original values in 1/8 when switched to 1/1024.
219	double retval = InitialHuffmanCost();
220	// Second coefficient: Many zeros in the histogram are covered efficiently
221	// by a run-length encode. Originally 2/8.
222	retval += stats->counts[`0`] * `1.5625` + `0.234375` * stats->streaks[`0`][`1`];
223	// Second coefficient: Constant values are encoded less efficiently, but still
224	// RLE'ed. Originally 6/8.
225	retval += stats->counts[`1`] * `2.578125` + `0.703125` * stats->streaks[`1`][`1`];
226	// 0s are usually encoded more efficiently than non-0s.
227	// Originally 15/8.
228	retval += `1.796875` * stats->streaks[`0`][`0`];
229	// Originally 26/8.
230	retval += `3.28125` * stats->streaks[`1`][`0`];
231	return retval;
232	}
233
234	// Get the symbol entropy for the distribution 'population'.
235	// Set 'trivial_sym', if there's only one symbol present in the distribution.
236	static double PopulationCost(const uint32_t* const population, int length,
237	uint32_t* const trivial_sym) {
238	VP8LBitEntropy bit_entropy;
239	VP8LStreaks stats;
240	VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats);
241	if (trivial_sym != NULL) {
242	*trivial_sym = (bit_entropy.nonzeros == `1`) ? bit_entropy.nonzero_code
243	: VP8L_NON_TRIVIAL_SYM;
244	}
245
246	return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);
247	}
248
249	// trivial_at_end is 1 if the two histograms only have one element that is
250	// non-zero: both the zero-th one, or both the last one.
251	static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X,
252	const uint32_t* const Y,
253	int length, int trivial_at_end) {
254	VP8LStreaks stats;
255	if (trivial_at_end) {
256	// This configuration is due to palettization that transforms an indexed
257	// pixel into 0xff000000 \| (pixel << 8) in VP8LBundleColorMap.
258	// BitsEntropyRefine is 0 for histograms with only one non-zero value.
259	// Only FinalHuffmanCost needs to be evaluated.
260	memset(&stats, `0`, sizeof(stats));
261	// Deal with the non-zero value at index 0 or length-1.
262	stats.streaks[`1`][`0`] += `1`;
263	// Deal with the following/previous zero streak.
264	stats.counts[`0`] += `1`;
265	stats.streaks[`0`][`1`] += length - `1`;
266	return FinalHuffmanCost(&stats);
267	} else {
268	VP8LBitEntropy bit_entropy;
269	VP8LGetCombinedEntropyUnrefined(X, Y, length, &bit_entropy, &stats);
270
271	return BitsEntropyRefine(&bit_entropy) + FinalHuffmanCost(&stats);
272	}
273	}
274
275	// Estimates the Entropy + Huffman + other block overhead size cost.
276	double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
277	return
278	PopulationCost(
279	p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_), NULL)
280	+ PopulationCost(p->red_, NUM_LITERAL_CODES, NULL)
281	+ PopulationCost(p->blue_, NUM_LITERAL_CODES, NULL)
282	+ PopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL)
283	+ PopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL)
284	+ VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
285	+ VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
286	}
287
288	// -----------------------------------------------------------------------------
289	// Various histogram combine/cost-eval functions
290
291	static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
292	const VP8LHistogram* const b,
293	double cost_threshold,
294	double* cost) {
295	const int palette_code_bits = a->palette_code_bits_;
296	int trivial_at_end = `0`;
297	assert(a->palette_code_bits_ == b->palette_code_bits_);
298	*cost += GetCombinedEntropy(a->literal_, b->literal_,
299	VP8LHistogramNumCodes(palette_code_bits), `0`);
300	*cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,
301	b->literal_ + NUM_LITERAL_CODES,
302	NUM_LENGTH_CODES);
303	if (cost > cost_threshold) return* `0`;
304
305	if (a->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM &&
306	a->trivial_symbol_ == b->trivial_symbol_) {
307	// A, R and B are all 0 or 0xff.
308	const uint32_t color_a = (a->trivial_symbol_ >> `24`) & `0xff`;
309	const uint32_t color_r = (a->trivial_symbol_ >> `16`) & `0xff`;
310	const uint32_t color_b = (a->trivial_symbol_ >> `0`) & `0xff`;
311	if ((color_a == `0` \|\| color_a == `0xff`) &&
312	(color_r == `0` \|\| color_r == `0xff`) &&
313	(color_b == `0` \|\| color_b == `0xff`)) {
314	trivial_at_end = `1`;
315	}
316	}
317
318	*cost +=
319	GetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES, trivial_at_end);
320	if (cost > cost_threshold) return* `0`;
321
322	*cost +=
323	GetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES, trivial_at_end);
324	if (cost > cost_threshold) return* `0`;
325
326	*cost += GetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES,
327	trivial_at_end);
328	if (cost > cost_threshold) return* `0`;
329
330	*cost +=
331	GetCombinedEntropy(a->distance_, b->distance_, NUM_DISTANCE_CODES, `0`);
332	*cost +=
333	VP8LExtraCostCombined(a->distance_, b->distance_, NUM_DISTANCE_CODES);
334	if (cost > cost_threshold) return* `0`;
335
336	return `1`;
337	}
338
339	static WEBP_INLINE void HistogramAdd(const VP8LHistogram* const a,
340	const VP8LHistogram* const b,
341	VP8LHistogram* const out) {
342	VP8LHistogramAdd(a, b, out);
343	out->trivial_symbol_ = (a->trivial_symbol_ == b->trivial_symbol_)
344	? a->trivial_symbol_
345	: VP8L_NON_TRIVIAL_SYM;
346	}
347
348	// Performs out = a + b, computing the cost C(a+b) - C(a) - C(b) while comparing
349	// to the threshold value 'cost_threshold'. The score returned is
350	// Score = C(a+b) - C(a) - C(b), where C(a) + C(b) is known and fixed.
351	// Since the previous score passed is 'cost_threshold', we only need to compare
352	// the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out
353	// early.
354	static double HistogramAddEval(const VP8LHistogram* const a,
355	const VP8LHistogram* const b,
356	VP8LHistogram* const out,
357	double cost_threshold) {
358	double cost = `0`;
359	const double sum_cost = a->bit_cost_ + b->bit_cost_;
360	cost_threshold += sum_cost;
361
362	if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) {
363	HistogramAdd(a, b, out);
364	out->bit_cost_ = cost;
365	out->palette_code_bits_ = a->palette_code_bits_;
366	}
367
368	return cost - sum_cost;
369	}
370
371	// Same as HistogramAddEval(), except that the resulting histogram
372	// is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit
373	// the term C(b) which is constant over all the evaluations.
374	static double HistogramAddThresh(const VP8LHistogram* const a,
375	const VP8LHistogram* const b,
376	double cost_threshold) {
377	double cost = -a->bit_cost_;
378	GetCombinedHistogramEntropy(a, b, cost_threshold, &cost);
379	return cost;
380	}
381
382	// -----------------------------------------------------------------------------
383
384	// The structure to keep track of cost range for the three dominant entropy
385	// symbols.
386	// TODO(skal): Evaluate if float can be used here instead of double for
387	// representing the entropy costs.
388	typedef struct {
389	double literal_max_;
390	double literal_min_;
391	double red_max_;
392	double red_min_;
393	double blue_max_;
394	double blue_min_;
395	} DominantCostRange;
396
397	static void DominantCostRangeInit(DominantCostRange* const c) {
398	c->literal_max_ = `0.`;
399	c->literal_min_ = MAX_COST;
400	c->red_max_ = `0.`;
401	c->red_min_ = MAX_COST;
402	c->blue_max_ = `0.`;
403	c->blue_min_ = MAX_COST;
404	}
405
406	static void UpdateDominantCostRange(
407	const VP8LHistogram* const h, DominantCostRange* const c) {
408	if (c->literal_max_ < h->literal_cost_) c->literal_max_ = h->literal_cost_;
409	if (c->literal_min_ > h->literal_cost_) c->literal_min_ = h->literal_cost_;
410	if (c->red_max_ < h->red_cost_) c->red_max_ = h->red_cost_;
411	if (c->red_min_ > h->red_cost_) c->red_min_ = h->red_cost_;
412	if (c->blue_max_ < h->blue_cost_) c->blue_max_ = h->blue_cost_;
413	if (c->blue_min_ > h->blue_cost_) c->blue_min_ = h->blue_cost_;
414	}
415
416	static void UpdateHistogramCost(VP8LHistogram* const h) {
417	uint32_t alpha_sym, red_sym, blue_sym;
418	const double alpha_cost =
419	PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym);
420	const double distance_cost =
421	PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL) +
422	VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
423	const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
424	h->literal_cost_ = PopulationCost(h->literal_, num_codes, NULL) +
425	VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES,
426	NUM_LENGTH_CODES);
427	h->red_cost_ = PopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym);
428	h->blue_cost_ = PopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym);
429	h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ +
430	alpha_cost + distance_cost;
431	if ((alpha_sym \| red_sym \| blue_sym) == VP8L_NON_TRIVIAL_SYM) {
432	h->trivial_symbol_ = VP8L_NON_TRIVIAL_SYM;
433	} else {
434	h->trivial_symbol_ =
435	((uint32_t)alpha_sym << `24`) \| (red_sym << `16`) \| (blue_sym << `0`);
436	}
437	}
438
439	static int GetBinIdForEntropy(double min, double max, double val) {
440	const double range = max - min;
441	if (range > `0.`) {
442	const double delta = val - min;
443	return (int)((NUM_PARTITIONS - `1e-6`) * delta / range);
444	} else {
445	return `0`;
446	}
447	}
448
449	static int GetHistoBinIndex(const VP8LHistogram* const h,
450	const DominantCostRange* const c, int low_effort) {
451	int bin_id = GetBinIdForEntropy(c->literal_min_, c->literal_max_,
452	h->literal_cost_);
453	assert(bin_id < NUM_PARTITIONS);
454	if (!low_effort) {
455	bin_id = bin_id * NUM_PARTITIONS
456	+ GetBinIdForEntropy(c->red_min_, c->red_max_, h->red_cost_);
457	bin_id = bin_id * NUM_PARTITIONS
458	+ GetBinIdForEntropy(c->blue_min_, c->blue_max_, h->blue_cost_);
459	assert(bin_id < BIN_SIZE);
460	}
461	return bin_id;
462	}
463
464	// Construct the histograms from backward references.
465	static void HistogramBuild(
466	int xsize, int histo_bits, const VP8LBackwardRefs* const backward_refs,
467	VP8LHistogramSet* const image_histo) {
468	int x = `0`, y = `0`;
469	const int histo_xsize = VP8LSubSampleSize(xsize, histo_bits);
470	VP8LHistogram** const histograms = image_histo->histograms;
471	VP8LRefsCursor c = VP8LRefsCursorInit(backward_refs);
472	assert(histo_bits > `0`);
473	while (VP8LRefsCursorOk(&c)) {
474	const PixOrCopy* const v = c.cur_pos;
475	const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits);
476	VP8LHistogramAddSinglePixOrCopy(histograms[ix], v);
477	x += PixOrCopyLength(v);
478	while (x >= xsize) {
479	x -= xsize;
480	++y;
481	}
482	VP8LRefsCursorNext(&c);
483	}
484	}
485
486	// Copies the histograms and computes its bit_cost.
487	static void HistogramCopyAndAnalyze(
488	VP8LHistogramSet* const orig_histo, VP8LHistogramSet* const image_histo) {
489	int i;
490	const int histo_size = orig_histo->size;
491	VP8LHistogram** const orig_histograms = orig_histo->histograms;
492	VP8LHistogram** const histograms = image_histo->histograms;
493	for (i = `0`; i < histo_size; ++i) {
494	VP8LHistogram* const histo = orig_histograms[i];
495	UpdateHistogramCost(histo);
496	// Copy histograms from orig_histo[] to image_histo[].
497	HistogramCopy(histo, histograms[i]);
498	}
499	}
500
501	// Partition histograms to different entropy bins for three dominant (literal,
502	// red and blue) symbol costs and compute the histogram aggregate bit_cost.
503	static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo,
504	uint16_t* const bin_map,
505	int low_effort) {
506	int i;
507	VP8LHistogram** const histograms = image_histo->histograms;
508	const int histo_size = image_histo->size;
509	DominantCostRange cost_range;
510	DominantCostRangeInit(&cost_range);
511
512	// Analyze the dominant (literal, red and blue) entropy costs.
513	for (i = `0`; i < histo_size; ++i) {
514	UpdateDominantCostRange(histograms[i], &cost_range);
515	}
516
517	// bin-hash histograms on three of the dominant (literal, red and blue)
518	// symbol costs and store the resulting bin_id for each histogram.
519	for (i = `0`; i < histo_size; ++i) {
520	bin_map[i] = GetHistoBinIndex(histograms[i], &cost_range, low_effort);
521	}
522	}
523
524	// Compact image_histo[] by merging some histograms with same bin_id together if
525	// it's advantageous.
526	static VP8LHistogram* HistogramCombineEntropyBin(
527	VP8LHistogramSet* const image_histo,
528	VP8LHistogram* cur_combo,
529	const uint16_t* const bin_map, int bin_map_size, int num_bins,
530	double combine_cost_factor, int low_effort) {
531	VP8LHistogram** const histograms = image_histo->histograms;
532	int idx;
533	// Work in-place: processed histograms are put at the beginning of
534	// image_histo[]. At the end, we just have to truncate the array.
535	int size = `0`;
536	struct {
537	int16_t first; // position of the histogram that accumulates all
538	// histograms with the same bin_id
539	uint16_t num_combine_failures; // number of combine failures per bin_id
540	} bin_info[BIN_SIZE];
541
542	assert(num_bins <= BIN_SIZE);
543	for (idx = `0`; idx < num_bins; ++idx) {
544	bin_info[idx].first = -`1`;
545	bin_info[idx].num_combine_failures = `0`;
546	}
547
548	for (idx = `0`; idx < bin_map_size; ++idx) {
549	const int bin_id = bin_map[idx];
550	const int first = bin_info[bin_id].first;
551	assert(size <= idx);
552	if (first == -`1`) {
553	// just move histogram #idx to its final position
554	histograms[size] = histograms[idx];
555	bin_info[bin_id].first = size++;
556	} else if (low_effort) {
557	HistogramAdd(histograms[idx], histograms[first], histograms[first]);
558	} else {
559	// try to merge #idx into #first (both share the same bin_id)
560	const double bit_cost = histograms[idx]->bit_cost_;
561	const double bit_cost_thresh = -bit_cost * combine_cost_factor;
562	const double curr_cost_diff =
563	HistogramAddEval(histograms[first], histograms[idx],
564	cur_combo, bit_cost_thresh);
565	if (curr_cost_diff < bit_cost_thresh) {
566	// Try to merge two histograms only if the combo is a trivial one or
567	// the two candidate histograms are already non-trivial.
568	// For some images, 'try_combine' turns out to be false for a lot of
569	// histogram pairs. In that case, we fallback to combining
570	// histograms as usual to avoid increasing the header size.
571	const int try_combine =
572	(cur_combo->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM) \|\|
573	((histograms[idx]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM) &&
574	(histograms[first]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM));
575	const int max_combine_failures = `32`;
576	if (try_combine \|\|
577	bin_info[bin_id].num_combine_failures >= max_combine_failures) {
578	// move the (better) merged histogram to its final slot
579	HistogramSwap(&cur_combo, &histograms[first]);
580	} else {
581	histograms[size++] = histograms[idx];
582	++bin_info[bin_id].num_combine_failures;
583	}
584	} else {
585	histograms[size++] = histograms[idx];
586	}
587	}
588	}
589	image_histo->size = size;
590	if (low_effort) {
591	// for low_effort case, update the final cost when everything is merged
592	for (idx = `0`; idx < size; ++idx) {
593	UpdateHistogramCost(histograms[idx]);
594	}
595	}
596	return cur_combo;
597	}
598
599	static uint32_t MyRand(uint32_t* const seed) {
600	seed = (seed * `16807ull`) & `0xffffffffu`;
601	if (*seed == `0`) {
602	*seed = `1`;
603	}
604	return *seed;
605	}
606
607	// -----------------------------------------------------------------------------
608	// Histogram pairs priority queue
609
610	// Pair of histograms. Negative idx1 value means that pair is out-of-date.
611	typedef struct {
612	int idx1;
613	int idx2;
614	double cost_diff;
615	double cost_combo;
616	} HistogramPair;
617
618	typedef struct {
619	HistogramPair* queue;
620	int size;
621	int max_size;
622	} HistoQueue;
623
624	static int HistoQueueInit(HistoQueue* const histo_queue, const int max_index) {
625	histo_queue->size = `0`;
626	// max_index^2 for the queue size is safe. If you look at
627	// HistogramCombineGreedy, and imagine that UpdateQueueFront always pushes
628	// data to the queue, you insert at most:
629	// - max_index(max_index-1)/2 (the first two for loops)*
630	// - max_index - 1 in the last for loop at the first iteration of the while
631	// loop, max_index - 2 at the second iteration ... therefore
632	// max_index(max_index-1)/2 overall too*
633	histo_queue->max_size = max_index * max_index;
634	// We allocate max_size + 1 because the last element at index "size" is
635	// used as temporary data (and it could be up to max_size).
636	histo_queue->queue = (HistogramPair*)WebPSafeMalloc(
637	histo_queue->max_size + `1`, sizeof(*histo_queue->queue));
638	return histo_queue->queue != NULL;
639	}
640
641	static void HistoQueueClear(HistoQueue* const histo_queue) {
642	assert(histo_queue != NULL);
643	WebPSafeFree(histo_queue->queue);
644	}
645
646	static void SwapHistogramPairs(HistogramPair *p1,
647	HistogramPair *p2) {
648	const HistogramPair tmp = *p1;
649	p1 = p2;
650	*p2 = tmp;
651	}
652
653	// Given a valid priority queue in range [0, queue_size) this function checks
654	// whether histo_queue[queue_size] should be accepted and swaps it with the
655	// front if it is smaller. Otherwise, it leaves it as is.
656	static void UpdateQueueFront(HistoQueue* const histo_queue) {
657	if (histo_queue->queue[histo_queue->size].cost_diff >= `0`) return;
658
659	if (histo_queue->queue[histo_queue->size].cost_diff <
660	histo_queue->queue[`0`].cost_diff) {
661	SwapHistogramPairs(histo_queue->queue,
662	histo_queue->queue + histo_queue->size);
663	}
664	++histo_queue->size;
665
666	// We cannot add more elements than the capacity.
667	// The allocation adds an extra element to the official capacity so that
668	// histo_queue->queue[histo_queue->max_size] is read/written within bound.
669	assert(histo_queue->size <= histo_queue->max_size);
670	}
671
672	// -----------------------------------------------------------------------------
673
674	static void PreparePair(VP8LHistogram** histograms, int idx1, int idx2,
675	HistogramPair* const pair) {
676	VP8LHistogram* h1;
677	VP8LHistogram* h2;
678	double sum_cost;
679
680	if (idx1 > idx2) {
681	const int tmp = idx2;
682	idx2 = idx1;
683	idx1 = tmp;
684	}
685	pair->idx1 = idx1;
686	pair->idx2 = idx2;
687	h1 = histograms[idx1];
688	h2 = histograms[idx2];
689	sum_cost = h1->bit_cost_ + h2->bit_cost_;
690	pair->cost_combo = `0.`;
691	GetCombinedHistogramEntropy(h1, h2, sum_cost, &pair->cost_combo);
692	pair->cost_diff = pair->cost_combo - sum_cost;
693	}
694
695	// Combines histograms by continuously choosing the one with the highest cost
696	// reduction.
697	static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo) {
698	int ok = `0`;
699	int image_histo_size = image_histo->size;
700	int i, j;
701	VP8LHistogram** const histograms = image_histo->histograms;
702	// Indexes of remaining histograms.
703	int* const clusters =
704	(int)WebPSafeMalloc(image_histo_size, sizeof(clusters));
705	// Priority queue of histogram pairs.
706	HistoQueue histo_queue;
707
708	if (!HistoQueueInit(&histo_queue, image_histo_size) \|\| clusters == NULL) {
709	goto End;
710	}
711
712	for (i = `0`; i < image_histo_size; ++i) {
713	// Initialize clusters indexes.
714	clusters[i] = i;
715	for (j = i + `1`; j < image_histo_size; ++j) {
716	// Initialize positions array.
717	PreparePair(histograms, i, j, &histo_queue.queue[histo_queue.size]);
718	UpdateQueueFront(&histo_queue);
719	}
720	}
721
722	while (image_histo_size > `1` && histo_queue.size > `0`) {
723	HistogramPair* copy_to;
724	const int idx1 = histo_queue.queue[`0`].idx1;
725	const int idx2 = histo_queue.queue[`0`].idx2;
726	HistogramAdd(histograms[idx2], histograms[idx1], histograms[idx1]);
727	histograms[idx1]->bit_cost_ = histo_queue.queue[`0`].cost_combo;
728	// Remove merged histogram.
729	for (i = `0`; i + `1` < image_histo_size; ++i) {
730	if (clusters[i] >= idx2) {
731	clusters[i] = clusters[i + `1`];
732	}
733	}
734	--image_histo_size;
735
736	// Remove pairs intersecting the just combined best pair. This will
737	// therefore pop the head of the queue.
738	copy_to = histo_queue.queue;
739	for (i = `0`; i < histo_queue.size; ++i) {
740	HistogramPair* const p = histo_queue.queue + i;
741	if (p->idx1 == idx1 \|\| p->idx2 == idx1 \|\|
742	p->idx1 == idx2 \|\| p->idx2 == idx2) {
743	// Do not copy the invalid pair.
744	continue;
745	}
746	if (p->cost_diff < histo_queue.queue[`0`].cost_diff) {
747	// Replace the top of the queue if we found better.
748	SwapHistogramPairs(histo_queue.queue, p);
749	}
750	SwapHistogramPairs(copy_to, p);
751	++copy_to;
752	}
753	histo_queue.size = (int)(copy_to - histo_queue.queue);
754
755	// Push new pairs formed with combined histogram to the queue.
756	for (i = `0`; i < image_histo_size; ++i) {
757	if (clusters[i] != idx1) {
758	PreparePair(histograms, idx1, clusters[i],
759	&histo_queue.queue[histo_queue.size]);
760	UpdateQueueFront(&histo_queue);
761	}
762	}
763	}
764	// Move remaining histograms to the beginning of the array.
765	for (i = `0`; i < image_histo_size; ++i) {
766	if (i != clusters[i]) { // swap the two histograms
767	HistogramSwap(&histograms[i], &histograms[clusters[i]]);
768	}
769	}
770
771	image_histo->size = image_histo_size;
772	ok = `1`;
773
774	End:
775	WebPSafeFree(clusters);
776	HistoQueueClear(&histo_queue);
777	return ok;
778	}
779
780	static void HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
781	VP8LHistogram* tmp_histo,
782	VP8LHistogram* best_combo,
783	int quality, int min_cluster_size) {
784	int iter;
785	uint32_t seed = `0`;
786	int tries_with_no_success = `0`;
787	int image_histo_size = image_histo->size;
788	const int iter_mult = (quality < `25`) ? `2` : `2` + (quality - `25`) / `8`;
789	const int outer_iters = image_histo_size * iter_mult;
790	const int num_pairs = image_histo_size / `2`;
791	const int num_tries_no_success = outer_iters / `2`;
792	int idx2_max = image_histo_size - `1`;
793	int do_brute_dorce = `0`;
794	VP8LHistogram** const histograms = image_histo->histograms;
795
796	// Collapse similar histograms in 'image_histo'.
797	++min_cluster_size;
798	for (iter = `0`;
799	iter < outer_iters && image_histo_size >= min_cluster_size;
800	++iter) {
801	double best_cost_diff = `0.`;
802	int best_idx1 = -`1`, best_idx2 = `1`;
803	int j;
804	int num_tries =
805	(num_pairs < image_histo_size) ? num_pairs : image_histo_size;
806	// Use a brute force approach if:
807	// - stochastic has not worked for a while and
808	// - if the number of iterations for brute force is less than the number of
809	// iterations if we never find a match ever again stochastically (hence
810	// num_tries times the number of remaining outer iterations).
811	do_brute_dorce =
812	(tries_with_no_success > `10`) &&
813	(idx2_max * (idx2_max + `1`) < `2` * num_tries * (outer_iters - iter));
814	if (do_brute_dorce) num_tries = idx2_max;
815
816	seed += iter;
817	for (j = `0`; j < num_tries; ++j) {
818	double curr_cost_diff;
819	// Choose two histograms at random and try to combine them.
820	uint32_t idx1, idx2;
821	if (do_brute_dorce) {
822	// Use a brute force approach.
823	idx1 = (uint32_t)j;
824	idx2 = (uint32_t)idx2_max;
825	} else {
826	const uint32_t tmp = (j & `7`) + `1`;
827	const uint32_t diff =
828	(tmp < `3`) ? tmp : MyRand(&seed) % (image_histo_size - `1`);
829	idx1 = MyRand(&seed) % image_histo_size;
830	idx2 = (idx1 + diff + `1`) % image_histo_size;
831	if (idx1 == idx2) {
832	continue;
833	}
834	}
835
836	// Calculate cost reduction on combining.
837	curr_cost_diff = HistogramAddEval(histograms[idx1], histograms[idx2],
838	tmp_histo, best_cost_diff);
839	if (curr_cost_diff < best_cost_diff) { // found a better pair?
840	HistogramSwap(&best_combo, &tmp_histo);
841	best_cost_diff = curr_cost_diff;
842	best_idx1 = idx1;
843	best_idx2 = idx2;
844	}
845	}
846	if (do_brute_dorce) --idx2_max;
847
848	if (best_idx1 >= `0`) {
849	HistogramSwap(&best_combo, &histograms[best_idx1]);
850	// swap best_idx2 slot with last one (which is now unused)
851	--image_histo_size;
852	if (idx2_max >= image_histo_size) idx2_max = image_histo_size - `1`;
853	if (best_idx2 != image_histo_size) {
854	HistogramSwap(&histograms[image_histo_size], &histograms[best_idx2]);
855	histograms[image_histo_size] = NULL;
856	}
857	tries_with_no_success = `0`;
858	}
859	if (++tries_with_no_success >= num_tries_no_success \|\| idx2_max == `0`) {
860	break;
861	}
862	}
863	image_histo->size = image_histo_size;
864	}
865
866	// -----------------------------------------------------------------------------
867	// Histogram refinement
868
869	// Find the best 'out' histogram for each of the 'in' histograms.
870	// Note: we assume that out[]->bit_cost_ is already up-to-date.
871	static void HistogramRemap(const VP8LHistogramSet* const in,
872	const VP8LHistogramSet* const out,
873	uint16_t* const symbols) {
874	int i;
875	VP8LHistogram** const in_histo = in->histograms;
876	VP8LHistogram** const out_histo = out->histograms;
877	const int in_size = in->size;
878	const int out_size = out->size;
879	if (out_size > `1`) {
880	for (i = `0`; i < in_size; ++i) {
881	int best_out = `0`;
882	double best_bits = MAX_COST;
883	int k;
884	for (k = `0`; k < out_size; ++k) {
885	const double cur_bits =
886	HistogramAddThresh(out_histo[k], in_histo[i], best_bits);
887	if (k == `0` \|\| cur_bits < best_bits) {
888	best_bits = cur_bits;
889	best_out = k;
890	}
891	}
892	symbols[i] = best_out;
893	}
894	} else {
895	assert(out_size == `1`);
896	for (i = `0`; i < in_size; ++i) {
897	symbols[i] = `0`;
898	}
899	}
900
901	// Recompute each out based on raw and symbols.
902	for (i = `0`; i < out_size; ++i) {
903	HistogramClear(out_histo[i]);
904	}
905
906	for (i = `0`; i < in_size; ++i) {
907	const int idx = symbols[i];
908	HistogramAdd(in_histo[i], out_histo[idx], out_histo[idx]);
909	}
910	}
911
912	static double GetCombineCostFactor(int histo_size, int quality) {
913	double combine_cost_factor = `0.16`;
914	if (quality < `90`) {
915	if (histo_size > `256`) combine_cost_factor /= `2.`;
916	if (histo_size > `512`) combine_cost_factor /= `2.`;
917	if (histo_size > `1024`) combine_cost_factor /= `2.`;
918	if (quality <= `50`) combine_cost_factor /= `2.`;
919	}
920	return combine_cost_factor;
921	}
922
923	int VP8LGetHistoImageSymbols(int xsize, int ysize,
924	const VP8LBackwardRefs* const refs,
925	int quality, int low_effort,
926	int histo_bits, int cache_bits,
927	VP8LHistogramSet* const image_histo,
928	VP8LHistogramSet* const tmp_histos,
929	uint16_t* const histogram_symbols) {
930	int ok = `0`;
931	const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : `1`;
932	const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : `1`;
933	const int image_histo_raw_size = histo_xsize * histo_ysize;
934	VP8LHistogramSet* const orig_histo =
935	VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits);
936	VP8LHistogram* cur_combo;
937	// Don't attempt linear bin-partition heuristic for
938	// histograms of small sizes (as bin_map will be very sparse) and
939	// maximum quality q==100 (to preserve the compression gains at that level).
940	const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE;
941	const int entropy_combine =
942	(orig_histo->size > entropy_combine_num_bins * `2`) && (quality < `100`);
943
944	if (orig_histo == NULL) goto Error;
945
946	// Construct the histograms from backward references.
947	HistogramBuild(xsize, histo_bits, refs, orig_histo);
948	// Copies the histograms and computes its bit_cost.
949	HistogramCopyAndAnalyze(orig_histo, image_histo);
950
951	cur_combo = tmp_histos->histograms[`1`]; // pick up working slot
952	if (entropy_combine) {
953	const int bin_map_size = orig_histo->size;
954	// Reuse histogram_symbols storage. By definition, it's guaranteed to be ok.
955	uint16_t* const bin_map = histogram_symbols;
956	const double combine_cost_factor =
957	GetCombineCostFactor(image_histo_raw_size, quality);
958
959	HistogramAnalyzeEntropyBin(orig_histo, bin_map, low_effort);
960	// Collapse histograms with similar entropy.
961	cur_combo = HistogramCombineEntropyBin(image_histo, cur_combo,
962	bin_map, bin_map_size,
963	entropy_combine_num_bins,
964	combine_cost_factor, low_effort);
965	}
966
967	// Don't combine the histograms using stochastic and greedy heuristics for
968	// low-effort compression mode.
969	if (!low_effort \|\| !entropy_combine) {
970	const float x = quality / `100.f`;
971	// cubic ramp between 1 and MAX_HISTO_GREEDY:
972	const int threshold_size = (int)(`1` + (x * x * x) * (MAX_HISTO_GREEDY - `1`));
973	HistogramCombineStochastic(image_histo, tmp_histos->histograms[`0`],
974	cur_combo, quality, threshold_size);
975	if ((image_histo->size <= threshold_size) &&
976	!HistogramCombineGreedy(image_histo)) {
977	goto Error;
978	}
979	}
980
981	// TODO(vikasa): Optimize HistogramRemap for low-effort compression mode also.
982	// Find the optimal map from original histograms to the final ones.
983	HistogramRemap(orig_histo, image_histo, histogram_symbols);
984
985	ok = `1`;
986
987	Error:
988	VP8LFreeHistogramSet(orig_histo);
989	return ok;
990	}
991

Browse the source code of engine/third_party/libwebp/src/enc/histogram_enc.c