enc.c source code [engine/third_party/libwebp/src/dsp/enc.c]

1	// Copyright 2011 Google Inc. All Rights Reserved.
2	//
3	// Use of this source code is governed by a BSD-style license
4	// that can be found in the COPYING file in the root of the source
5	// tree. An additional intellectual property rights grant can be found
6	// in the file PATENTS. All contributing project authors may
7	// be found in the AUTHORS file in the root of the source tree.
8	// -----------------------------------------------------------------------------
9	//
10	// Speed-critical encoding functions.
11	//
12	// Author: Skal (pascal.massimino@gmail.com)
13
14	#include <assert.h>
15	#include <stdlib.h> // for abs()
16
17	#include "./dsp.h"
18	#include "../enc/vp8i_enc.h"
19
20	static WEBP_INLINE uint8_t clip_8b(int v) {
21	return (!(v & ~`0xff`)) ? v : (v < `0`) ? `0` : `255`;
22	}
23
24	static WEBP_INLINE int clip_max(int v, int max) {
25	return (v > max) ? max : v;
26	}
27
28	//------------------------------------------------------------------------------
29	// Compute susceptibility based on DCT-coeff histograms:
30	// the higher, the "easier" the macroblock is to compress.
31
32	const int VP8DspScan[`16` + `4` + `4`] = {
33	// Luma
34	`0` + `0` * BPS, `4` + `0` * BPS, `8` + `0` * BPS, `12` + `0` * BPS,
35	`0` + `4` * BPS, `4` + `4` * BPS, `8` + `4` * BPS, `12` + `4` * BPS,
36	`0` + `8` * BPS, `4` + `8` * BPS, `8` + `8` * BPS, `12` + `8` * BPS,
37	`0` + `12` * BPS, `4` + `12` * BPS, `8` + `12` * BPS, `12` + `12` * BPS,
38
39	`0` + `0` * BPS, `4` + `0` * BPS, `0` + `4` * BPS, `4` + `4` * BPS, // U
40	`8` + `0` * BPS, `12` + `0` * BPS, `8` + `4` * BPS, `12` + `4` * BPS // V
41	};
42
43	// general-purpose util function
44	void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + `1`],
45	VP8Histogram* const histo) {
46	int max_value = `0`, last_non_zero = `1`;
47	int k;
48	for (k = `0`; k <= MAX_COEFF_THRESH; ++k) {
49	const int value = distribution[k];
50	if (value > `0`) {
51	if (value > max_value) max_value = value;
52	last_non_zero = k;
53	}
54	}
55	histo->max_value = max_value;
56	histo->last_non_zero = last_non_zero;
57	}
58
59	static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
60	int start_block, int end_block,
61	VP8Histogram* const histo) {
62	int j;
63	int distribution[MAX_COEFF_THRESH + `1`] = { `0` };
64	for (j = start_block; j < end_block; ++j) {
65	int k;
66	int16_t out[`16`];
67
68	VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
69
70	// Convert coefficients to bin.
71	for (k = `0`; k < `16`; ++k) {
72	const int v = abs(out[k]) >> `3`;
73	const int clipped_value = clip_max(v, MAX_COEFF_THRESH);
74	++distribution[clipped_value];
75	}
76	}
77	VP8SetHistogramData(distribution, histo);
78	}
79
80	//------------------------------------------------------------------------------
81	// run-time tables (~4k)
82
83	static uint8_t clip1[`255` + `510` + `1`]; // clips [-255,510] to [0,255]
84
85	// We declare this variable 'volatile' to prevent instruction reordering
86	// and make sure it's set to true _last_ (so as to be thread-safe)
87	static volatile int tables_ok = `0`;
88
89	static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
90	if (!tables_ok) {
91	int i;
92	for (i = -`255`; i <= `255` + `255`; ++i) {
93	clip1[`255` + i] = clip_8b(i);
94	}
95	tables_ok = `1`;
96	}
97	}
98
99
100	//------------------------------------------------------------------------------
101	// Transforms (Paragraph 14.4)
102
103	#define STORE(x, y, v) \
104	dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
105
106	static const int kC1 = `20091` + (`1` << `16`);
107	static const int kC2 = `35468`;
108	#define MUL(a, b) (((a) * (b)) >> 16)
109
110	static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
111	uint8_t* dst) {
112	int C[`4` * `4`], *tmp;
113	int i;
114	tmp = C;
115	for (i = `0`; i < `4`; ++i) { // vertical pass
116	const int a = in[`0`] + in[`8`];
117	const int b = in[`0`] - in[`8`];
118	const int c = MUL(in[`4`], kC2) - MUL(in[`12`], kC1);
119	const int d = MUL(in[`4`], kC1) + MUL(in[`12`], kC2);
120	tmp[`0`] = a + d;
121	tmp[`1`] = b + c;
122	tmp[`2`] = b - c;
123	tmp[`3`] = a - d;
124	tmp += `4`;
125	in++;
126	}
127
128	tmp = C;
129	for (i = `0`; i < `4`; ++i) { // horizontal pass
130	const int dc = tmp[`0`] + `4`;
131	const int a = dc + tmp[`8`];
132	const int b = dc - tmp[`8`];
133	const int c = MUL(tmp[`4`], kC2) - MUL(tmp[`12`], kC1);
134	const int d = MUL(tmp[`4`], kC1) + MUL(tmp[`12`], kC2);
135	STORE(`0`, i, a + d);
136	STORE(`1`, i, b + c);
137	STORE(`2`, i, b - c);
138	STORE(`3`, i, a - d);
139	tmp++;
140	}
141	}
142
143	static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
144	int do_two) {
145	ITransformOne(ref, in, dst);
146	if (do_two) {
147	ITransformOne(ref + `4`, in + `16`, dst + `4`);
148	}
149	}
150
151	static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
152	int i;
153	int tmp[`16`];
154	for (i = `0`; i < `4`; ++i, src += BPS, ref += BPS) {
155	const int d0 = src[`0`] - ref[`0`]; // 9bit dynamic range ([-255,255])
156	const int d1 = src[`1`] - ref[`1`];
157	const int d2 = src[`2`] - ref[`2`];
158	const int d3 = src[`3`] - ref[`3`];
159	const int a0 = (d0 + d3); // 10b [-510,510]
160	const int a1 = (d1 + d2);
161	const int a2 = (d1 - d2);
162	const int a3 = (d0 - d3);
163	tmp[`0` + i * `4`] = (a0 + a1) * `8`; // 14b [-8160,8160]
164	tmp[`1` + i * `4`] = (a2 * `2217` + a3 * `5352` + `1812`) >> `9`; // [-7536,7542]
165	tmp[`2` + i * `4`] = (a0 - a1) * `8`;
166	tmp[`3` + i * `4`] = (a3 * `2217` - a2 * `5352` + `937`) >> `9`;
167	}
168	for (i = `0`; i < `4`; ++i) {
169	const int a0 = (tmp[`0` + i] + tmp[`12` + i]); // 15b
170	const int a1 = (tmp[`4` + i] + tmp[ `8` + i]);
171	const int a2 = (tmp[`4` + i] - tmp[ `8` + i]);
172	const int a3 = (tmp[`0` + i] - tmp[`12` + i]);
173	out[`0` + i] = (a0 + a1 + `7`) >> `4`; // 12b
174	out[`4` + i] = ((a2 * `2217` + a3 * `5352` + `12000`) >> `16`) + (a3 != `0`);
175	out[`8` + i] = (a0 - a1 + `7`) >> `4`;
176	out[`12`+ i] = ((a3 * `2217` - a2 * `5352` + `51000`) >> `16`);
177	}
178	}
179
180	static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
181	VP8FTransform(src, ref, out);
182	VP8FTransform(src + `4`, ref + `4`, out + `16`);
183	}
184
185	static void FTransformWHT(const int16_t* in, int16_t* out) {
186	// input is 12b signed
187	int32_t tmp[`16`];
188	int i;
189	for (i = `0`; i < `4`; ++i, in += `64`) {
190	const int a0 = (in[`0` * `16`] + in[`2` * `16`]); // 13b
191	const int a1 = (in[`1` * `16`] + in[`3` * `16`]);
192	const int a2 = (in[`1` * `16`] - in[`3` * `16`]);
193	const int a3 = (in[`0` * `16`] - in[`2` * `16`]);
194	tmp[`0` + i * `4`] = a0 + a1; // 14b
195	tmp[`1` + i * `4`] = a3 + a2;
196	tmp[`2` + i * `4`] = a3 - a2;
197	tmp[`3` + i * `4`] = a0 - a1;
198	}
199	for (i = `0`; i < `4`; ++i) {
200	const int a0 = (tmp[`0` + i] + tmp[`8` + i]); // 15b
201	const int a1 = (tmp[`4` + i] + tmp[`12`+ i]);
202	const int a2 = (tmp[`4` + i] - tmp[`12`+ i]);
203	const int a3 = (tmp[`0` + i] - tmp[`8` + i]);
204	const int b0 = a0 + a1; // 16b
205	const int b1 = a3 + a2;
206	const int b2 = a3 - a2;
207	const int b3 = a0 - a1;
208	out[ `0` + i] = b0 >> `1`; // 15b
209	out[ `4` + i] = b1 >> `1`;
210	out[ `8` + i] = b2 >> `1`;
211	out[`12` + i] = b3 >> `1`;
212	}
213	}
214
215	#undef MUL
216	#undef STORE
217
218	//------------------------------------------------------------------------------
219	// Intra predictions
220
221	static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
222	int j;
223	for (j = `0`; j < size; ++j) {
224	memset(dst + j * BPS, value, size);
225	}
226	}
227
228	static WEBP_INLINE void VerticalPred(uint8_t* dst,
229	const uint8_t* top, int size) {
230	int j;
231	if (top != NULL) {
232	for (j = `0`; j < size; ++j) memcpy(dst + j * BPS, top, size);
233	} else {
234	Fill(dst, `127`, size);
235	}
236	}
237
238	static WEBP_INLINE void HorizontalPred(uint8_t* dst,
239	const uint8_t* left, int size) {
240	if (left != NULL) {
241	int j;
242	for (j = `0`; j < size; ++j) {
243	memset(dst + j * BPS, left[j], size);
244	}
245	} else {
246	Fill(dst, `129`, size);
247	}
248	}
249
250	static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
251	const uint8_t* top, int size) {
252	int y;
253	if (left != NULL) {
254	if (top != NULL) {
255	const uint8_t* const clip = clip1 + `255` - left[-`1`];
256	for (y = `0`; y < size; ++y) {
257	const uint8_t* const clip_table = clip + left[y];
258	int x;
259	for (x = `0`; x < size; ++x) {
260	dst[x] = clip_table[top[x]];
261	}
262	dst += BPS;
263	}
264	} else {
265	HorizontalPred(dst, left, size);
266	}
267	} else {
268	// true motion without left samples (hence: with default 129 value)
269	// is equivalent to VE prediction where you just copy the top samples.
270	// Note that if top samples are not available, the default value is
271	// then 129, and not 127 as in the VerticalPred case.
272	if (top != NULL) {
273	VerticalPred(dst, top, size);
274	} else {
275	Fill(dst, `129`, size);
276	}
277	}
278	}
279
280	static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
281	const uint8_t* top,
282	int size, int round, int shift) {
283	int DC = `0`;
284	int j;
285	if (top != NULL) {
286	for (j = `0`; j < size; ++j) DC += top[j];
287	if (left != NULL) { // top and left present
288	for (j = `0`; j < size; ++j) DC += left[j];
289	} else { // top, but no left
290	DC += DC;
291	}
292	DC = (DC + round) >> shift;
293	} else if (left != NULL) { // left but no top
294	for (j = `0`; j < size; ++j) DC += left[j];
295	DC += DC;
296	DC = (DC + round) >> shift;
297	} else { // no top, no left, nothing.
298	DC = `0x80`;
299	}
300	Fill(dst, DC, size);
301	}
302
303	//------------------------------------------------------------------------------
304	// Chroma 8x8 prediction (paragraph 12.2)
305
306	static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
307	const uint8_t* top) {
308	// U block
309	DCMode(C8DC8 + dst, left, top, `8`, `8`, `4`);
310	VerticalPred(C8VE8 + dst, top, `8`);
311	HorizontalPred(C8HE8 + dst, left, `8`);
312	TrueMotion(C8TM8 + dst, left, top, `8`);
313	// V block
314	dst += `8`;
315	if (top != NULL) top += `8`;
316	if (left != NULL) left += `16`;
317	DCMode(C8DC8 + dst, left, top, `8`, `8`, `4`);
318	VerticalPred(C8VE8 + dst, top, `8`);
319	HorizontalPred(C8HE8 + dst, left, `8`);
320	TrueMotion(C8TM8 + dst, left, top, `8`);
321	}
322
323	//------------------------------------------------------------------------------
324	// luma 16x16 prediction (paragraph 12.3)
325
326	static void Intra16Preds(uint8_t* dst,
327	const uint8_t* left, const uint8_t* top) {
328	DCMode(I16DC16 + dst, left, top, `16`, `16`, `5`);
329	VerticalPred(I16VE16 + dst, top, `16`);
330	HorizontalPred(I16HE16 + dst, left, `16`);
331	TrueMotion(I16TM16 + dst, left, top, `16`);
332	}
333
334	//------------------------------------------------------------------------------
335	// luma 4x4 prediction
336
337	#define DST(x, y) dst[(x) + (y) * BPS]
338	#define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
339	#define AVG2(a, b) (((a) + (b) + 1) >> 1)
340
341	static void VE4(uint8_t* dst, const uint8_t* top) { // vertical
342	const uint8_t vals[`4`] = {
343	AVG3(top[-`1`], top[`0`], top[`1`]),
344	AVG3(top[ `0`], top[`1`], top[`2`]),
345	AVG3(top[ `1`], top[`2`], top[`3`]),
346	AVG3(top[ `2`], top[`3`], top[`4`])
347	};
348	int i;
349	for (i = `0`; i < `4`; ++i) {
350	memcpy(dst + i * BPS, vals, `4`);
351	}
352	}
353
354	static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
355	const int X = top[-`1`];
356	const int I = top[-`2`];
357	const int J = top[-`3`];
358	const int K = top[-`4`];
359	const int L = top[-`5`];
360	WebPUint32ToMem(dst + `0` * BPS, `0x01010101U` * AVG3(X, I, J));
361	WebPUint32ToMem(dst + `1` * BPS, `0x01010101U` * AVG3(I, J, K));
362	WebPUint32ToMem(dst + `2` * BPS, `0x01010101U` * AVG3(J, K, L));
363	WebPUint32ToMem(dst + `3` * BPS, `0x01010101U` * AVG3(K, L, L));
364	}
365
366	static void DC4(uint8_t* dst, const uint8_t* top) {
367	uint32_t dc = `4`;
368	int i;
369	for (i = `0`; i < `4`; ++i) dc += top[i] + top[-`5` + i];
370	Fill(dst, dc >> `3`, `4`);
371	}
372
373	static void RD4(uint8_t* dst, const uint8_t* top) {
374	const int X = top[-`1`];
375	const int I = top[-`2`];
376	const int J = top[-`3`];
377	const int K = top[-`4`];
378	const int L = top[-`5`];
379	const int A = top[`0`];
380	const int B = top[`1`];
381	const int C = top[`2`];
382	const int D = top[`3`];
383	DST(`0`, `3`) = AVG3(J, K, L);
384	DST(`0`, `2`) = DST(`1`, `3`) = AVG3(I, J, K);
385	DST(`0`, `1`) = DST(`1`, `2`) = DST(`2`, `3`) = AVG3(X, I, J);
386	DST(`0`, `0`) = DST(`1`, `1`) = DST(`2`, `2`) = DST(`3`, `3`) = AVG3(A, X, I);
387	DST(`1`, `0`) = DST(`2`, `1`) = DST(`3`, `2`) = AVG3(B, A, X);
388	DST(`2`, `0`) = DST(`3`, `1`) = AVG3(C, B, A);
389	DST(`3`, `0`) = AVG3(D, C, B);
390	}
391
392	static void LD4(uint8_t* dst, const uint8_t* top) {
393	const int A = top[`0`];
394	const int B = top[`1`];
395	const int C = top[`2`];
396	const int D = top[`3`];
397	const int E = top[`4`];
398	const int F = top[`5`];
399	const int G = top[`6`];
400	const int H = top[`7`];
401	DST(`0`, `0`) = AVG3(A, B, C);
402	DST(`1`, `0`) = DST(`0`, `1`) = AVG3(B, C, D);
403	DST(`2`, `0`) = DST(`1`, `1`) = DST(`0`, `2`) = AVG3(C, D, E);
404	DST(`3`, `0`) = DST(`2`, `1`) = DST(`1`, `2`) = DST(`0`, `3`) = AVG3(D, E, F);
405	DST(`3`, `1`) = DST(`2`, `2`) = DST(`1`, `3`) = AVG3(E, F, G);
406	DST(`3`, `2`) = DST(`2`, `3`) = AVG3(F, G, H);
407	DST(`3`, `3`) = AVG3(G, H, H);
408	}
409
410	static void VR4(uint8_t* dst, const uint8_t* top) {
411	const int X = top[-`1`];
412	const int I = top[-`2`];
413	const int J = top[-`3`];
414	const int K = top[-`4`];
415	const int A = top[`0`];
416	const int B = top[`1`];
417	const int C = top[`2`];
418	const int D = top[`3`];
419	DST(`0`, `0`) = DST(`1`, `2`) = AVG2(X, A);
420	DST(`1`, `0`) = DST(`2`, `2`) = AVG2(A, B);
421	DST(`2`, `0`) = DST(`3`, `2`) = AVG2(B, C);
422	DST(`3`, `0`) = AVG2(C, D);
423
424	DST(`0`, `3`) = AVG3(K, J, I);
425	DST(`0`, `2`) = AVG3(J, I, X);
426	DST(`0`, `1`) = DST(`1`, `3`) = AVG3(I, X, A);
427	DST(`1`, `1`) = DST(`2`, `3`) = AVG3(X, A, B);
428	DST(`2`, `1`) = DST(`3`, `3`) = AVG3(A, B, C);
429	DST(`3`, `1`) = AVG3(B, C, D);
430	}
431
432	static void VL4(uint8_t* dst, const uint8_t* top) {
433	const int A = top[`0`];
434	const int B = top[`1`];
435	const int C = top[`2`];
436	const int D = top[`3`];
437	const int E = top[`4`];
438	const int F = top[`5`];
439	const int G = top[`6`];
440	const int H = top[`7`];
441	DST(`0`, `0`) = AVG2(A, B);
442	DST(`1`, `0`) = DST(`0`, `2`) = AVG2(B, C);
443	DST(`2`, `0`) = DST(`1`, `2`) = AVG2(C, D);
444	DST(`3`, `0`) = DST(`2`, `2`) = AVG2(D, E);
445
446	DST(`0`, `1`) = AVG3(A, B, C);
447	DST(`1`, `1`) = DST(`0`, `3`) = AVG3(B, C, D);
448	DST(`2`, `1`) = DST(`1`, `3`) = AVG3(C, D, E);
449	DST(`3`, `1`) = DST(`2`, `3`) = AVG3(D, E, F);
450	DST(`3`, `2`) = AVG3(E, F, G);
451	DST(`3`, `3`) = AVG3(F, G, H);
452	}
453
454	static void HU4(uint8_t* dst, const uint8_t* top) {
455	const int I = top[-`2`];
456	const int J = top[-`3`];
457	const int K = top[-`4`];
458	const int L = top[-`5`];
459	DST(`0`, `0`) = AVG2(I, J);
460	DST(`2`, `0`) = DST(`0`, `1`) = AVG2(J, K);
461	DST(`2`, `1`) = DST(`0`, `2`) = AVG2(K, L);
462	DST(`1`, `0`) = AVG3(I, J, K);
463	DST(`3`, `0`) = DST(`1`, `1`) = AVG3(J, K, L);
464	DST(`3`, `1`) = DST(`1`, `2`) = AVG3(K, L, L);
465	DST(`3`, `2`) = DST(`2`, `2`) =
466	DST(`0`, `3`) = DST(`1`, `3`) = DST(`2`, `3`) = DST(`3`, `3`) = L;
467	}
468
469	static void HD4(uint8_t* dst, const uint8_t* top) {
470	const int X = top[-`1`];
471	const int I = top[-`2`];
472	const int J = top[-`3`];
473	const int K = top[-`4`];
474	const int L = top[-`5`];
475	const int A = top[`0`];
476	const int B = top[`1`];
477	const int C = top[`2`];
478
479	DST(`0`, `0`) = DST(`2`, `1`) = AVG2(I, X);
480	DST(`0`, `1`) = DST(`2`, `2`) = AVG2(J, I);
481	DST(`0`, `2`) = DST(`2`, `3`) = AVG2(K, J);
482	DST(`0`, `3`) = AVG2(L, K);
483
484	DST(`3`, `0`) = AVG3(A, B, C);
485	DST(`2`, `0`) = AVG3(X, A, B);
486	DST(`1`, `0`) = DST(`3`, `1`) = AVG3(I, X, A);
487	DST(`1`, `1`) = DST(`3`, `2`) = AVG3(J, I, X);
488	DST(`1`, `2`) = DST(`3`, `3`) = AVG3(K, J, I);
489	DST(`1`, `3`) = AVG3(L, K, J);
490	}
491
492	static void TM4(uint8_t* dst, const uint8_t* top) {
493	int x, y;
494	const uint8_t* const clip = clip1 + `255` - top[-`1`];
495	for (y = `0`; y < `4`; ++y) {
496	const uint8_t* const clip_table = clip + top[-`2` - y];
497	for (x = `0`; x < `4`; ++x) {
498	dst[x] = clip_table[top[x]];
499	}
500	dst += BPS;
501	}
502	}
503
504	#undef DST
505	#undef AVG3
506	#undef AVG2
507
508	// Left samples are top[-5 .. -2], top_left is top[-1], top are
509	// located at top[0..3], and top right is top[4..7]
510	static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
511	DC4(I4DC4 + dst, top);
512	TM4(I4TM4 + dst, top);
513	VE4(I4VE4 + dst, top);
514	HE4(I4HE4 + dst, top);
515	RD4(I4RD4 + dst, top);
516	VR4(I4VR4 + dst, top);
517	LD4(I4LD4 + dst, top);
518	VL4(I4VL4 + dst, top);
519	HD4(I4HD4 + dst, top);
520	HU4(I4HU4 + dst, top);
521	}
522
523	//------------------------------------------------------------------------------
524	// Metric
525
526	static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
527	int w, int h) {
528	int count = `0`;
529	int y, x;
530	for (y = `0`; y < h; ++y) {
531	for (x = `0`; x < w; ++x) {
532	const int diff = (int)a[x] - b[x];
533	count += diff * diff;
534	}
535	a += BPS;
536	b += BPS;
537	}
538	return count;
539	}
540
541	static int SSE16x16(const uint8_t* a, const uint8_t* b) {
542	return GetSSE(a, b, `16`, `16`);
543	}
544	static int SSE16x8(const uint8_t* a, const uint8_t* b) {
545	return GetSSE(a, b, `16`, `8`);
546	}
547	static int SSE8x8(const uint8_t* a, const uint8_t* b) {
548	return GetSSE(a, b, `8`, `8`);
549	}
550	static int SSE4x4(const uint8_t* a, const uint8_t* b) {
551	return GetSSE(a, b, `4`, `4`);
552	}
553
554	static void Mean16x4(const uint8_t* ref, uint32_t dc[`4`]) {
555	int k, x, y;
556	for (k = `0`; k < `4`; ++k) {
557	uint32_t avg = `0`;
558	for (y = `0`; y < `4`; ++y) {
559	for (x = `0`; x < `4`; ++x) {
560	avg += ref[x + y * BPS];
561	}
562	}
563	dc[k] = avg;
564	ref += `4`; // go to next 4x4 block.
565	}
566	}
567
568	//------------------------------------------------------------------------------
569	// Texture distortion
570	//
571	// We try to match the spectral content (weighted) between source and
572	// reconstructed samples.
573
574	// Hadamard transform
575	// Returns the weighted sum of the absolute value of transformed coefficients.
576	// w[] contains a row-major 4 by 4 symmetric matrix.
577	static int TTransform(const uint8_t* in, const uint16_t* w) {
578	int sum = `0`;
579	int tmp[`16`];
580	int i;
581	// horizontal pass
582	for (i = `0`; i < `4`; ++i, in += BPS) {
583	const int a0 = in[`0`] + in[`2`];
584	const int a1 = in[`1`] + in[`3`];
585	const int a2 = in[`1`] - in[`3`];
586	const int a3 = in[`0`] - in[`2`];
587	tmp[`0` + i * `4`] = a0 + a1;
588	tmp[`1` + i * `4`] = a3 + a2;
589	tmp[`2` + i * `4`] = a3 - a2;
590	tmp[`3` + i * `4`] = a0 - a1;
591	}
592	// vertical pass
593	for (i = `0`; i < `4`; ++i, ++w) {
594	const int a0 = tmp[`0` + i] + tmp[`8` + i];
595	const int a1 = tmp[`4` + i] + tmp[`12`+ i];
596	const int a2 = tmp[`4` + i] - tmp[`12`+ i];
597	const int a3 = tmp[`0` + i] - tmp[`8` + i];
598	const int b0 = a0 + a1;
599	const int b1 = a3 + a2;
600	const int b2 = a3 - a2;
601	const int b3 = a0 - a1;
602
603	sum += w[ `0`] * abs(b0);
604	sum += w[ `4`] * abs(b1);
605	sum += w[ `8`] * abs(b2);
606	sum += w[`12`] * abs(b3);
607	}
608	return sum;
609	}
610
611	static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
612	const uint16_t* const w) {
613	const int sum1 = TTransform(a, w);
614	const int sum2 = TTransform(b, w);
615	return abs(sum2 - sum1) >> `5`;
616	}
617
618	static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
619	const uint16_t* const w) {
620	int D = `0`;
621	int x, y;
622	for (y = `0`; y < `16` * BPS; y += `4` * BPS) {
623	for (x = `0`; x < `16`; x += `4`) {
624	D += Disto4x4(a + x + y, b + x + y, w);
625	}
626	}
627	return D;
628	}
629
630	//------------------------------------------------------------------------------
631	// Quantization
632	//
633
634	static const uint8_t kZigzag[`16`] = {
635	`0`, `1`, `4`, `8`, `5`, `2`, `3`, `6`, `9`, `12`, `13`, `10`, `7`, `11`, `14`, `15`
636	};
637
638	// Simple quantization
639	static int QuantizeBlock(int16_t in[`16`], int16_t out[`16`],
640	const VP8Matrix* const mtx) {
641	int last = -`1`;
642	int n;
643	for (n = `0`; n < `16`; ++n) {
644	const int j = kZigzag[n];
645	const int sign = (in[j] < `0`);
646	const uint32_t coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
647	if (coeff > mtx->zthresh_[j]) {
648	const uint32_t Q = mtx->q_[j];
649	const uint32_t iQ = mtx->iq_[j];
650	const uint32_t B = mtx->bias_[j];
651	int level = QUANTDIV(coeff, iQ, B);
652	if (level > MAX_LEVEL) level = MAX_LEVEL;
653	if (sign) level = -level;
654	in[j] = level * (int)Q;
655	out[n] = level;
656	if (level) last = n;
657	} else {
658	out[n] = `0`;
659	in[j] = `0`;
660	}
661	}
662	return (last >= `0`);
663	}
664
665	static int Quantize2Blocks(int16_t in[`32`], int16_t out[`32`],
666	const VP8Matrix* const mtx) {
667	int nz;
668	nz = VP8EncQuantizeBlock(in + `0` * `16`, out + `0` * `16`, mtx) << `0`;
669	nz \|= VP8EncQuantizeBlock(in + `1` * `16`, out + `1` * `16`, mtx) << `1`;
670	return nz;
671	}
672
673	//------------------------------------------------------------------------------
674	// Block copy
675
676	static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) {
677	int y;
678	for (y = `0`; y < h; ++y) {
679	memcpy(dst, src, w);
680	src += BPS;
681	dst += BPS;
682	}
683	}
684
685	static void Copy4x4(const uint8_t* src, uint8_t* dst) {
686	Copy(src, dst, `4`, `4`);
687	}
688
689	static void Copy16x8(const uint8_t* src, uint8_t* dst) {
690	Copy(src, dst, `16`, `8`);
691	}
692
693	//------------------------------------------------------------------------------
694	// SSIM / PSNR
695
696	// hat-shaped filter. Sum of coefficients is equal to 16.
697	static const uint32_t kWeight[`2` * VP8_SSIM_KERNEL + `1`] = {
698	`1`, `2`, `3`, `4`, `3`, `2`, `1`
699	};
700	static const uint32_t kWeightSum = `16` * `16`; // sum{kWeight}^2
701
702	static WEBP_INLINE double SSIMCalculation(
703	const VP8DistoStats* const stats, uint32_t N /num samples/) {
704	const uint32_t w2 = N * N;
705	const uint32_t C1 = `20` * w2;
706	const uint32_t C2 = `60` * w2;
707	const uint32_t C3 = `8` * `8` * w2; // 'dark' limit ~= 6
708	const uint64_t xmxm = (uint64_t)stats->xm * stats->xm;
709	const uint64_t ymym = (uint64_t)stats->ym * stats->ym;
710	if (xmxm + ymym >= C3) {
711	const int64_t xmym = (int64_t)stats->xm * stats->ym;
712	const int64_t sxy = (int64_t)stats->xym * N - xmym; // can be negative
713	const uint64_t sxx = (uint64_t)stats->xxm * N - xmxm;
714	const uint64_t syy = (uint64_t)stats->yym * N - ymym;
715	// we descale by 8 to prevent overflow during the fnum/fden multiply.
716	const uint64_t num_S = (`2` * (uint64_t)(sxy < `0` ? `0` : sxy) + C2) >> `8`;
717	const uint64_t den_S = (sxx + syy + C2) >> `8`;
718	const uint64_t fnum = (`2` * xmym + C1) * num_S;
719	const uint64_t fden = (xmxm + ymym + C1) * den_S;
720	const double r = (double)fnum / fden;
721	assert(r >= `0.` && r <= `1.0`);
722	return r;
723	}
724	return `1.`; // area is too dark to contribute meaningfully
725	}
726
727	double VP8SSIMFromStats(const VP8DistoStats* const stats) {
728	return SSIMCalculation(stats, kWeightSum);
729	}
730
731	double VP8SSIMFromStatsClipped(const VP8DistoStats* const stats) {
732	return SSIMCalculation(stats, stats->w);
733	}
734
735	static double SSIMGetClipped_C(const uint8_t* src1, int stride1,
736	const uint8_t* src2, int stride2,
737	int xo, int yo, int W, int H) {
738	VP8DistoStats stats = { `0`, `0`, `0`, `0`, `0`, `0` };
739	const int ymin = (yo - VP8_SSIM_KERNEL < `0`) ? `0` : yo - VP8_SSIM_KERNEL;
740	const int ymax = (yo + VP8_SSIM_KERNEL > H - `1`) ? H - `1`
741	: yo + VP8_SSIM_KERNEL;
742	const int xmin = (xo - VP8_SSIM_KERNEL < `0`) ? `0` : xo - VP8_SSIM_KERNEL;
743	const int xmax = (xo + VP8_SSIM_KERNEL > W - `1`) ? W - `1`
744	: xo + VP8_SSIM_KERNEL;
745	int x, y;
746	src1 += ymin * stride1;
747	src2 += ymin * stride2;
748	for (y = ymin; y <= ymax; ++y, src1 += stride1, src2 += stride2) {
749	for (x = xmin; x <= xmax; ++x) {
750	const uint32_t w = kWeight[VP8_SSIM_KERNEL + x - xo]
751	* kWeight[VP8_SSIM_KERNEL + y - yo];
752	const uint32_t s1 = src1[x];
753	const uint32_t s2 = src2[x];
754	stats.w += w;
755	stats.xm += w * s1;
756	stats.ym += w * s2;
757	stats.xxm += w * s1 * s1;
758	stats.xym += w * s1 * s2;
759	stats.yym += w * s2 * s2;
760	}
761	}
762	return VP8SSIMFromStatsClipped(&stats);
763	}
764
765	static double SSIMGet_C(const uint8_t* src1, int stride1,
766	const uint8_t* src2, int stride2) {
767	VP8DistoStats stats = { `0`, `0`, `0`, `0`, `0`, `0` };
768	int x, y;
769	for (y = `0`; y <= `2` * VP8_SSIM_KERNEL; ++y, src1 += stride1, src2 += stride2) {
770	for (x = `0`; x <= `2` * VP8_SSIM_KERNEL; ++x) {
771	const uint32_t w = kWeight[x] * kWeight[y];
772	const uint32_t s1 = src1[x];
773	const uint32_t s2 = src2[x];
774	stats.xm += w * s1;
775	stats.ym += w * s2;
776	stats.xxm += w * s1 * s1;
777	stats.xym += w * s1 * s2;
778	stats.yym += w * s2 * s2;
779	}
780	}
781	return VP8SSIMFromStats(&stats);
782	}
783
784	//------------------------------------------------------------------------------
785
786	static uint32_t AccumulateSSE(const uint8_t* src1,
787	const uint8_t* src2, int len) {
788	int i;
789	uint32_t sse2 = `0`;
790	assert(len <= `65535`); // to ensure that accumulation fits within uint32_t
791	for (i = `0`; i < len; ++i) {
792	const int32_t diff = src1[i] - src2[i];
793	sse2 += diff * diff;
794	}
795	return sse2;
796	}
797
798	//------------------------------------------------------------------------------
799
800	VP8SSIMGetFunc VP8SSIMGet;
801	VP8SSIMGetClippedFunc VP8SSIMGetClipped;
802	VP8AccumulateSSEFunc VP8AccumulateSSE;
803
804	extern void VP8SSIMDspInitSSE2(void);
805
806	static volatile VP8CPUInfo ssim_last_cpuinfo_used =
807	(VP8CPUInfo)&ssim_last_cpuinfo_used;
808
809	WEBP_TSAN_IGNORE_FUNCTION void VP8SSIMDspInit(void) {
810	if (ssim_last_cpuinfo_used == VP8GetCPUInfo) return;
811
812	VP8SSIMGetClipped = SSIMGetClipped_C;
813	VP8SSIMGet = SSIMGet_C;
814
815	VP8AccumulateSSE = AccumulateSSE;
816	if (VP8GetCPUInfo != NULL) {
817	#if defined(WEBP_USE_SSE2)
818	if (VP8GetCPUInfo(kSSE2)) {
819	VP8SSIMDspInitSSE2();
820	}
821	#endif
822	}
823
824	ssim_last_cpuinfo_used = VP8GetCPUInfo;
825	}
826
827	//------------------------------------------------------------------------------
828	// Initialization
829
830	// Speed-critical function pointers. We have to initialize them to the default
831	// implementations within VP8EncDspInit().
832	VP8CHisto VP8CollectHistogram;
833	VP8Idct VP8ITransform;
834	VP8Fdct VP8FTransform;
835	VP8Fdct VP8FTransform2;
836	VP8WHT VP8FTransformWHT;
837	VP8Intra4Preds VP8EncPredLuma4;
838	VP8IntraPreds VP8EncPredLuma16;
839	VP8IntraPreds VP8EncPredChroma8;
840	VP8Metric VP8SSE16x16;
841	VP8Metric VP8SSE8x8;
842	VP8Metric VP8SSE16x8;
843	VP8Metric VP8SSE4x4;
844	VP8WMetric VP8TDisto4x4;
845	VP8WMetric VP8TDisto16x16;
846	VP8MeanMetric VP8Mean16x4;
847	VP8QuantizeBlock VP8EncQuantizeBlock;
848	VP8Quantize2Blocks VP8EncQuantize2Blocks;
849	VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
850	VP8BlockCopy VP8Copy4x4;
851	VP8BlockCopy VP8Copy16x8;
852
853	extern void VP8EncDspInitSSE2(void);
854	extern void VP8EncDspInitSSE41(void);
855	extern void VP8EncDspInitAVX2(void);
856	extern void VP8EncDspInitNEON(void);
857	extern void VP8EncDspInitMIPS32(void);
858	extern void VP8EncDspInitMIPSdspR2(void);
859	extern void VP8EncDspInitMSA(void);
860
861	static volatile VP8CPUInfo enc_last_cpuinfo_used =
862	(VP8CPUInfo)&enc_last_cpuinfo_used;
863
864	WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
865	if (enc_last_cpuinfo_used == VP8GetCPUInfo) return;
866
867	VP8DspInit(); // common inverse transforms
868	InitTables();
869
870	// default C implementations
871	VP8CollectHistogram = CollectHistogram;
872	VP8ITransform = ITransform;
873	VP8FTransform = FTransform;
874	VP8FTransform2 = FTransform2;
875	VP8FTransformWHT = FTransformWHT;
876	VP8EncPredLuma4 = Intra4Preds;
877	VP8EncPredLuma16 = Intra16Preds;
878	VP8EncPredChroma8 = IntraChromaPreds;
879	VP8SSE16x16 = SSE16x16;
880	VP8SSE8x8 = SSE8x8;
881	VP8SSE16x8 = SSE16x8;
882	VP8SSE4x4 = SSE4x4;
883	VP8TDisto4x4 = Disto4x4;
884	VP8TDisto16x16 = Disto16x16;
885	VP8Mean16x4 = Mean16x4;
886	VP8EncQuantizeBlock = QuantizeBlock;
887	VP8EncQuantize2Blocks = Quantize2Blocks;
888	VP8EncQuantizeBlockWHT = QuantizeBlock;
889	VP8Copy4x4 = Copy4x4;
890	VP8Copy16x8 = Copy16x8;
891
892	// If defined, use CPUInfo() to overwrite some pointers with faster versions.
893	if (VP8GetCPUInfo != NULL) {
894	#if defined(WEBP_USE_SSE2)
895	if (VP8GetCPUInfo(kSSE2)) {
896	VP8EncDspInitSSE2();
897	#if defined(WEBP_USE_SSE41)
898	if (VP8GetCPUInfo(kSSE4_1)) {
899	VP8EncDspInitSSE41();
900	}
901	#endif
902	}
903	#endif
904	#if defined(WEBP_USE_AVX2)
905	if (VP8GetCPUInfo(kAVX2)) {
906	VP8EncDspInitAVX2();
907	}
908	#endif
909	#if defined(WEBP_USE_NEON)
910	if (VP8GetCPUInfo(kNEON)) {
911	VP8EncDspInitNEON();
912	}
913	#endif
914	#if defined(WEBP_USE_MIPS32)
915	if (VP8GetCPUInfo(kMIPS32)) {
916	VP8EncDspInitMIPS32();
917	}
918	#endif
919	#if defined(WEBP_USE_MIPS_DSP_R2)
920	if (VP8GetCPUInfo(kMIPSdspR2)) {
921	VP8EncDspInitMIPSdspR2();
922	}
923	#endif
924	#if defined(WEBP_USE_MSA)
925	if (VP8GetCPUInfo(kMSA)) {
926	VP8EncDspInitMSA();
927	}
928	#endif
929	}
930	enc_last_cpuinfo_used = VP8GetCPUInfo;
931	}
932

Browse the source code of engine/third_party/libwebp/src/dsp/enc.c