enc.c source code [Godot/thirdparty/libwebp/src/dsp/enc.c]

1	// Copyright 2011 Google Inc. All Rights Reserved.
2	//
3	// Use of this source code is governed by a BSD-style license
4	// that can be found in the COPYING file in the root of the source
5	// tree. An additional intellectual property rights grant can be found
6	// in the file PATENTS. All contributing project authors may
7	// be found in the AUTHORS file in the root of the source tree.
8	// -----------------------------------------------------------------------------
9	//
10	// Speed-critical encoding functions.
11	//
12	// Author: Skal (pascal.massimino@gmail.com)
13
14	#include <assert.h>
15	#include <stdlib.h> // for abs()
16
17	#include "src/dsp/dsp.h"
18	#include "src/enc/vp8i_enc.h"
19
20	static WEBP_INLINE uint8_t clip_8b(int v) {
21	return (!(v & ~`0xff`)) ? v : (v < `0`) ? `0` : `255`;
22	}
23
24	#if !WEBP_NEON_OMIT_C_CODE
25	static WEBP_INLINE int clip_max(int v, int max) {
26	return (v > max) ? max : v;
27	}
28	#endif // !WEBP_NEON_OMIT_C_CODE
29
30	//------------------------------------------------------------------------------
31	// Compute susceptibility based on DCT-coeff histograms:
32	// the higher, the "easier" the macroblock is to compress.
33
34	const int VP8DspScan[`16` + `4` + `4`] = {
35	// Luma
36	`0` + `0` * BPS, `4` + `0` * BPS, `8` + `0` * BPS, `12` + `0` * BPS,
37	`0` + `4` * BPS, `4` + `4` * BPS, `8` + `4` * BPS, `12` + `4` * BPS,
38	`0` + `8` * BPS, `4` + `8` * BPS, `8` + `8` * BPS, `12` + `8` * BPS,
39	`0` + `12` * BPS, `4` + `12` * BPS, `8` + `12` * BPS, `12` + `12` * BPS,
40
41	`0` + `0` * BPS, `4` + `0` * BPS, `0` + `4` * BPS, `4` + `4` * BPS, // U
42	`8` + `0` * BPS, `12` + `0` * BPS, `8` + `4` * BPS, `12` + `4` * BPS // V
43	};
44
45	// general-purpose util function
46	void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + `1`],
47	VP8Histogram* const histo) {
48	int max_value = `0`, last_non_zero = `1`;
49	int k;
50	for (k = `0`; k <= MAX_COEFF_THRESH; ++k) {
51	const int value = distribution[k];
52	if (value > `0`) {
53	if (value > max_value) max_value = value;
54	last_non_zero = k;
55	}
56	}
57	histo->max_value = max_value;
58	histo->last_non_zero = last_non_zero;
59	}
60
61	#if !WEBP_NEON_OMIT_C_CODE
62	static void CollectHistogram_C(const uint8_t* ref, const uint8_t* pred,
63	int start_block, int end_block,
64	VP8Histogram* const histo) {
65	int j;
66	int distribution[MAX_COEFF_THRESH + `1`] = { `0` };
67	for (j = start_block; j < end_block; ++j) {
68	int k;
69	int16_t out[`16`];
70
71	VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
72
73	// Convert coefficients to bin.
74	for (k = `0`; k < `16`; ++k) {
75	const int v = abs(out[k]) >> `3`;
76	const int clipped_value = clip_max(v, MAX_COEFF_THRESH);
77	++distribution[clipped_value];
78	}
79	}
80	VP8SetHistogramData(distribution, histo);
81	}
82	#endif // !WEBP_NEON_OMIT_C_CODE
83
84	//------------------------------------------------------------------------------
85	// run-time tables (~4k)
86
87	static uint8_t clip1[`255` + `510` + `1`]; // clips [-255,510] to [0,255]
88
89	// We declare this variable 'volatile' to prevent instruction reordering
90	// and make sure it's set to true _last_ (so as to be thread-safe)
91	static volatile int tables_ok = `0`;
92
93	static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
94	if (!tables_ok) {
95	int i;
96	for (i = -`255`; i <= `255` + `255`; ++i) {
97	clip1[`255` + i] = clip_8b(i);
98	}
99	tables_ok = `1`;
100	}
101	}
102
103
104	//------------------------------------------------------------------------------
105	// Transforms (Paragraph 14.4)
106
107	#if !WEBP_NEON_OMIT_C_CODE
108
109	#define STORE(x, y, v) \
110	dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
111
112	static const int kC1 = `20091` + (`1` << `16`);
113	static const int kC2 = `35468`;
114	#define MUL(a, b) (((a) * (b)) >> 16)
115
116	static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
117	uint8_t* dst) {
118	int C[`4` * `4`], *tmp;
119	int i;
120	tmp = C;
121	for (i = `0`; i < `4`; ++i) { // vertical pass
122	const int a = in[`0`] + in[`8`];
123	const int b = in[`0`] - in[`8`];
124	const int c = MUL(in[`4`], kC2) - MUL(in[`12`], kC1);
125	const int d = MUL(in[`4`], kC1) + MUL(in[`12`], kC2);
126	tmp[`0`] = a + d;
127	tmp[`1`] = b + c;
128	tmp[`2`] = b - c;
129	tmp[`3`] = a - d;
130	tmp += `4`;
131	in++;
132	}
133
134	tmp = C;
135	for (i = `0`; i < `4`; ++i) { // horizontal pass
136	const int dc = tmp[`0`] + `4`;
137	const int a = dc + tmp[`8`];
138	const int b = dc - tmp[`8`];
139	const int c = MUL(tmp[`4`], kC2) - MUL(tmp[`12`], kC1);
140	const int d = MUL(tmp[`4`], kC1) + MUL(tmp[`12`], kC2);
141	STORE(`0`, i, a + d);
142	STORE(`1`, i, b + c);
143	STORE(`2`, i, b - c);
144	STORE(`3`, i, a - d);
145	tmp++;
146	}
147	}
148
149	static void ITransform_C(const uint8_t* ref, const int16_t* in, uint8_t* dst,
150	int do_two) {
151	ITransformOne(ref, in, dst);
152	if (do_two) {
153	ITransformOne(ref + `4`, in + `16`, dst + `4`);
154	}
155	}
156
157	static void FTransform_C(const uint8_t* src, const uint8_t* ref, int16_t* out) {
158	int i;
159	int tmp[`16`];
160	for (i = `0`; i < `4`; ++i, src += BPS, ref += BPS) {
161	const int d0 = src[`0`] - ref[`0`]; // 9bit dynamic range ([-255,255])
162	const int d1 = src[`1`] - ref[`1`];
163	const int d2 = src[`2`] - ref[`2`];
164	const int d3 = src[`3`] - ref[`3`];
165	const int a0 = (d0 + d3); // 10b [-510,510]
166	const int a1 = (d1 + d2);
167	const int a2 = (d1 - d2);
168	const int a3 = (d0 - d3);
169	tmp[`0` + i * `4`] = (a0 + a1) * `8`; // 14b [-8160,8160]
170	tmp[`1` + i * `4`] = (a2 * `2217` + a3 * `5352` + `1812`) >> `9`; // [-7536,7542]
171	tmp[`2` + i * `4`] = (a0 - a1) * `8`;
172	tmp[`3` + i * `4`] = (a3 * `2217` - a2 * `5352` + `937`) >> `9`;
173	}
174	for (i = `0`; i < `4`; ++i) {
175	const int a0 = (tmp[`0` + i] + tmp[`12` + i]); // 15b
176	const int a1 = (tmp[`4` + i] + tmp[ `8` + i]);
177	const int a2 = (tmp[`4` + i] - tmp[ `8` + i]);
178	const int a3 = (tmp[`0` + i] - tmp[`12` + i]);
179	out[`0` + i] = (a0 + a1 + `7`) >> `4`; // 12b
180	out[`4` + i] = ((a2 * `2217` + a3 * `5352` + `12000`) >> `16`) + (a3 != `0`);
181	out[`8` + i] = (a0 - a1 + `7`) >> `4`;
182	out[`12`+ i] = ((a3 * `2217` - a2 * `5352` + `51000`) >> `16`);
183	}
184	}
185	#endif // !WEBP_NEON_OMIT_C_CODE
186
187	static void FTransform2_C(const uint8_t* src, const uint8_t* ref,
188	int16_t* out) {
189	VP8FTransform(src, ref, out);
190	VP8FTransform(src + `4`, ref + `4`, out + `16`);
191	}
192
193	#if !WEBP_NEON_OMIT_C_CODE
194	static void FTransformWHT_C(const int16_t* in, int16_t* out) {
195	// input is 12b signed
196	int32_t tmp[`16`];
197	int i;
198	for (i = `0`; i < `4`; ++i, in += `64`) {
199	const int a0 = (in[`0` * `16`] + in[`2` * `16`]); // 13b
200	const int a1 = (in[`1` * `16`] + in[`3` * `16`]);
201	const int a2 = (in[`1` * `16`] - in[`3` * `16`]);
202	const int a3 = (in[`0` * `16`] - in[`2` * `16`]);
203	tmp[`0` + i * `4`] = a0 + a1; // 14b
204	tmp[`1` + i * `4`] = a3 + a2;
205	tmp[`2` + i * `4`] = a3 - a2;
206	tmp[`3` + i * `4`] = a0 - a1;
207	}
208	for (i = `0`; i < `4`; ++i) {
209	const int a0 = (tmp[`0` + i] + tmp[`8` + i]); // 15b
210	const int a1 = (tmp[`4` + i] + tmp[`12`+ i]);
211	const int a2 = (tmp[`4` + i] - tmp[`12`+ i]);
212	const int a3 = (tmp[`0` + i] - tmp[`8` + i]);
213	const int b0 = a0 + a1; // 16b
214	const int b1 = a3 + a2;
215	const int b2 = a3 - a2;
216	const int b3 = a0 - a1;
217	out[ `0` + i] = b0 >> `1`; // 15b
218	out[ `4` + i] = b1 >> `1`;
219	out[ `8` + i] = b2 >> `1`;
220	out[`12` + i] = b3 >> `1`;
221	}
222	}
223	#endif // !WEBP_NEON_OMIT_C_CODE
224
225	#undef MUL
226	#undef STORE
227
228	//------------------------------------------------------------------------------
229	// Intra predictions
230
231	static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
232	int j;
233	for (j = `0`; j < size; ++j) {
234	memset(dst + j * BPS, value, size);
235	}
236	}
237
238	static WEBP_INLINE void VerticalPred(uint8_t* dst,
239	const uint8_t* top, int size) {
240	int j;
241	if (top != NULL) {
242	for (j = `0`; j < size; ++j) memcpy(dst + j * BPS, top, size);
243	} else {
244	Fill(dst, `127`, size);
245	}
246	}
247
248	static WEBP_INLINE void HorizontalPred(uint8_t* dst,
249	const uint8_t* left, int size) {
250	if (left != NULL) {
251	int j;
252	for (j = `0`; j < size; ++j) {
253	memset(dst + j * BPS, left[j], size);
254	}
255	} else {
256	Fill(dst, `129`, size);
257	}
258	}
259
260	static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
261	const uint8_t* top, int size) {
262	int y;
263	if (left != NULL) {
264	if (top != NULL) {
265	const uint8_t* const clip = clip1 + `255` - left[-`1`];
266	for (y = `0`; y < size; ++y) {
267	const uint8_t* const clip_table = clip + left[y];
268	int x;
269	for (x = `0`; x < size; ++x) {
270	dst[x] = clip_table[top[x]];
271	}
272	dst += BPS;
273	}
274	} else {
275	HorizontalPred(dst, left, size);
276	}
277	} else {
278	// true motion without left samples (hence: with default 129 value)
279	// is equivalent to VE prediction where you just copy the top samples.
280	// Note that if top samples are not available, the default value is
281	// then 129, and not 127 as in the VerticalPred case.
282	if (top != NULL) {
283	VerticalPred(dst, top, size);
284	} else {
285	Fill(dst, `129`, size);
286	}
287	}
288	}
289
290	static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
291	const uint8_t* top,
292	int size, int round, int shift) {
293	int DC = `0`;
294	int j;
295	if (top != NULL) {
296	for (j = `0`; j < size; ++j) DC += top[j];
297	if (left != NULL) { // top and left present
298	for (j = `0`; j < size; ++j) DC += left[j];
299	} else { // top, but no left
300	DC += DC;
301	}
302	DC = (DC + round) >> shift;
303	} else if (left != NULL) { // left but no top
304	for (j = `0`; j < size; ++j) DC += left[j];
305	DC += DC;
306	DC = (DC + round) >> shift;
307	} else { // no top, no left, nothing.
308	DC = `0x80`;
309	}
310	Fill(dst, DC, size);
311	}
312
313	//------------------------------------------------------------------------------
314	// Chroma 8x8 prediction (paragraph 12.2)
315
316	static void IntraChromaPreds_C(uint8_t* dst, const uint8_t* left,
317	const uint8_t* top) {
318	// U block
319	DCMode(C8DC8 + dst, left, top, `8`, `8`, `4`);
320	VerticalPred(C8VE8 + dst, top, `8`);
321	HorizontalPred(C8HE8 + dst, left, `8`);
322	TrueMotion(C8TM8 + dst, left, top, `8`);
323	// V block
324	dst += `8`;
325	if (top != NULL) top += `8`;
326	if (left != NULL) left += `16`;
327	DCMode(C8DC8 + dst, left, top, `8`, `8`, `4`);
328	VerticalPred(C8VE8 + dst, top, `8`);
329	HorizontalPred(C8HE8 + dst, left, `8`);
330	TrueMotion(C8TM8 + dst, left, top, `8`);
331	}
332
333	//------------------------------------------------------------------------------
334	// luma 16x16 prediction (paragraph 12.3)
335
336	static void Intra16Preds_C(uint8_t* dst,
337	const uint8_t* left, const uint8_t* top) {
338	DCMode(I16DC16 + dst, left, top, `16`, `16`, `5`);
339	VerticalPred(I16VE16 + dst, top, `16`);
340	HorizontalPred(I16HE16 + dst, left, `16`);
341	TrueMotion(I16TM16 + dst, left, top, `16`);
342	}
343
344	//------------------------------------------------------------------------------
345	// luma 4x4 prediction
346
347	#define DST(x, y) dst[(x) + (y) * BPS]
348	#define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
349	#define AVG2(a, b) (((a) + (b) + 1) >> 1)
350
351	static void VE4(uint8_t* dst, const uint8_t* top) { // vertical
352	const uint8_t vals[`4`] = {
353	AVG3(top[-`1`], top[`0`], top[`1`]),
354	AVG3(top[ `0`], top[`1`], top[`2`]),
355	AVG3(top[ `1`], top[`2`], top[`3`]),
356	AVG3(top[ `2`], top[`3`], top[`4`])
357	};
358	int i;
359	for (i = `0`; i < `4`; ++i) {
360	memcpy(dst + i * BPS, vals, `4`);
361	}
362	}
363
364	static void HE4(uint8_t* dst, const uint8_t* top) { // horizontal
365	const int X = top[-`1`];
366	const int I = top[-`2`];
367	const int J = top[-`3`];
368	const int K = top[-`4`];
369	const int L = top[-`5`];
370	WebPUint32ToMem(dst + `0` * BPS, `0x01010101U` * AVG3(X, I, J));
371	WebPUint32ToMem(dst + `1` * BPS, `0x01010101U` * AVG3(I, J, K));
372	WebPUint32ToMem(dst + `2` * BPS, `0x01010101U` * AVG3(J, K, L));
373	WebPUint32ToMem(dst + `3` * BPS, `0x01010101U` * AVG3(K, L, L));
374	}
375
376	static void DC4(uint8_t* dst, const uint8_t* top) {
377	uint32_t dc = `4`;
378	int i;
379	for (i = `0`; i < `4`; ++i) dc += top[i] + top[-`5` + i];
380	Fill(dst, dc >> `3`, `4`);
381	}
382
383	static void RD4(uint8_t* dst, const uint8_t* top) {
384	const int X = top[-`1`];
385	const int I = top[-`2`];
386	const int J = top[-`3`];
387	const int K = top[-`4`];
388	const int L = top[-`5`];
389	const int A = top[`0`];
390	const int B = top[`1`];
391	const int C = top[`2`];
392	const int D = top[`3`];
393	DST(`0`, `3`) = AVG3(J, K, L);
394	DST(`0`, `2`) = DST(`1`, `3`) = AVG3(I, J, K);
395	DST(`0`, `1`) = DST(`1`, `2`) = DST(`2`, `3`) = AVG3(X, I, J);
396	DST(`0`, `0`) = DST(`1`, `1`) = DST(`2`, `2`) = DST(`3`, `3`) = AVG3(A, X, I);
397	DST(`1`, `0`) = DST(`2`, `1`) = DST(`3`, `2`) = AVG3(B, A, X);
398	DST(`2`, `0`) = DST(`3`, `1`) = AVG3(C, B, A);
399	DST(`3`, `0`) = AVG3(D, C, B);
400	}
401
402	static void LD4(uint8_t* dst, const uint8_t* top) {
403	const int A = top[`0`];
404	const int B = top[`1`];
405	const int C = top[`2`];
406	const int D = top[`3`];
407	const int E = top[`4`];
408	const int F = top[`5`];
409	const int G = top[`6`];
410	const int H = top[`7`];
411	DST(`0`, `0`) = AVG3(A, B, C);
412	DST(`1`, `0`) = DST(`0`, `1`) = AVG3(B, C, D);
413	DST(`2`, `0`) = DST(`1`, `1`) = DST(`0`, `2`) = AVG3(C, D, E);
414	DST(`3`, `0`) = DST(`2`, `1`) = DST(`1`, `2`) = DST(`0`, `3`) = AVG3(D, E, F);
415	DST(`3`, `1`) = DST(`2`, `2`) = DST(`1`, `3`) = AVG3(E, F, G);
416	DST(`3`, `2`) = DST(`2`, `3`) = AVG3(F, G, H);
417	DST(`3`, `3`) = AVG3(G, H, H);
418	}
419
420	static void VR4(uint8_t* dst, const uint8_t* top) {
421	const int X = top[-`1`];
422	const int I = top[-`2`];
423	const int J = top[-`3`];
424	const int K = top[-`4`];
425	const int A = top[`0`];
426	const int B = top[`1`];
427	const int C = top[`2`];
428	const int D = top[`3`];
429	DST(`0`, `0`) = DST(`1`, `2`) = AVG2(X, A);
430	DST(`1`, `0`) = DST(`2`, `2`) = AVG2(A, B);
431	DST(`2`, `0`) = DST(`3`, `2`) = AVG2(B, C);
432	DST(`3`, `0`) = AVG2(C, D);
433
434	DST(`0`, `3`) = AVG3(K, J, I);
435	DST(`0`, `2`) = AVG3(J, I, X);
436	DST(`0`, `1`) = DST(`1`, `3`) = AVG3(I, X, A);
437	DST(`1`, `1`) = DST(`2`, `3`) = AVG3(X, A, B);
438	DST(`2`, `1`) = DST(`3`, `3`) = AVG3(A, B, C);
439	DST(`3`, `1`) = AVG3(B, C, D);
440	}
441
442	static void VL4(uint8_t* dst, const uint8_t* top) {
443	const int A = top[`0`];
444	const int B = top[`1`];
445	const int C = top[`2`];
446	const int D = top[`3`];
447	const int E = top[`4`];
448	const int F = top[`5`];
449	const int G = top[`6`];
450	const int H = top[`7`];
451	DST(`0`, `0`) = AVG2(A, B);
452	DST(`1`, `0`) = DST(`0`, `2`) = AVG2(B, C);
453	DST(`2`, `0`) = DST(`1`, `2`) = AVG2(C, D);
454	DST(`3`, `0`) = DST(`2`, `2`) = AVG2(D, E);
455
456	DST(`0`, `1`) = AVG3(A, B, C);
457	DST(`1`, `1`) = DST(`0`, `3`) = AVG3(B, C, D);
458	DST(`2`, `1`) = DST(`1`, `3`) = AVG3(C, D, E);
459	DST(`3`, `1`) = DST(`2`, `3`) = AVG3(D, E, F);
460	DST(`3`, `2`) = AVG3(E, F, G);
461	DST(`3`, `3`) = AVG3(F, G, H);
462	}
463
464	static void HU4(uint8_t* dst, const uint8_t* top) {
465	const int I = top[-`2`];
466	const int J = top[-`3`];
467	const int K = top[-`4`];
468	const int L = top[-`5`];
469	DST(`0`, `0`) = AVG2(I, J);
470	DST(`2`, `0`) = DST(`0`, `1`) = AVG2(J, K);
471	DST(`2`, `1`) = DST(`0`, `2`) = AVG2(K, L);
472	DST(`1`, `0`) = AVG3(I, J, K);
473	DST(`3`, `0`) = DST(`1`, `1`) = AVG3(J, K, L);
474	DST(`3`, `1`) = DST(`1`, `2`) = AVG3(K, L, L);
475	DST(`3`, `2`) = DST(`2`, `2`) =
476	DST(`0`, `3`) = DST(`1`, `3`) = DST(`2`, `3`) = DST(`3`, `3`) = L;
477	}
478
479	static void HD4(uint8_t* dst, const uint8_t* top) {
480	const int X = top[-`1`];
481	const int I = top[-`2`];
482	const int J = top[-`3`];
483	const int K = top[-`4`];
484	const int L = top[-`5`];
485	const int A = top[`0`];
486	const int B = top[`1`];
487	const int C = top[`2`];
488
489	DST(`0`, `0`) = DST(`2`, `1`) = AVG2(I, X);
490	DST(`0`, `1`) = DST(`2`, `2`) = AVG2(J, I);
491	DST(`0`, `2`) = DST(`2`, `3`) = AVG2(K, J);
492	DST(`0`, `3`) = AVG2(L, K);
493
494	DST(`3`, `0`) = AVG3(A, B, C);
495	DST(`2`, `0`) = AVG3(X, A, B);
496	DST(`1`, `0`) = DST(`3`, `1`) = AVG3(I, X, A);
497	DST(`1`, `1`) = DST(`3`, `2`) = AVG3(J, I, X);
498	DST(`1`, `2`) = DST(`3`, `3`) = AVG3(K, J, I);
499	DST(`1`, `3`) = AVG3(L, K, J);
500	}
501
502	static void TM4(uint8_t* dst, const uint8_t* top) {
503	int x, y;
504	const uint8_t* const clip = clip1 + `255` - top[-`1`];
505	for (y = `0`; y < `4`; ++y) {
506	const uint8_t* const clip_table = clip + top[-`2` - y];
507	for (x = `0`; x < `4`; ++x) {
508	dst[x] = clip_table[top[x]];
509	}
510	dst += BPS;
511	}
512	}
513
514	#undef DST
515	#undef AVG3
516	#undef AVG2
517
518	// Left samples are top[-5 .. -2], top_left is top[-1], top are
519	// located at top[0..3], and top right is top[4..7]
520	static void Intra4Preds_C(uint8_t* dst, const uint8_t* top) {
521	DC4(I4DC4 + dst, top);
522	TM4(I4TM4 + dst, top);
523	VE4(I4VE4 + dst, top);
524	HE4(I4HE4 + dst, top);
525	RD4(I4RD4 + dst, top);
526	VR4(I4VR4 + dst, top);
527	LD4(I4LD4 + dst, top);
528	VL4(I4VL4 + dst, top);
529	HD4(I4HD4 + dst, top);
530	HU4(I4HU4 + dst, top);
531	}
532
533	//------------------------------------------------------------------------------
534	// Metric
535
536	#if !WEBP_NEON_OMIT_C_CODE
537	static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
538	int w, int h) {
539	int count = `0`;
540	int y, x;
541	for (y = `0`; y < h; ++y) {
542	for (x = `0`; x < w; ++x) {
543	const int diff = (int)a[x] - b[x];
544	count += diff * diff;
545	}
546	a += BPS;
547	b += BPS;
548	}
549	return count;
550	}
551
552	static int SSE16x16_C(const uint8_t* a, const uint8_t* b) {
553	return GetSSE(a, b, `16`, `16`);
554	}
555	static int SSE16x8_C(const uint8_t* a, const uint8_t* b) {
556	return GetSSE(a, b, `16`, `8`);
557	}
558	static int SSE8x8_C(const uint8_t* a, const uint8_t* b) {
559	return GetSSE(a, b, `8`, `8`);
560	}
561	static int SSE4x4_C(const uint8_t* a, const uint8_t* b) {
562	return GetSSE(a, b, `4`, `4`);
563	}
564	#endif // !WEBP_NEON_OMIT_C_CODE
565
566	static void Mean16x4_C(const uint8_t* ref, uint32_t dc[`4`]) {
567	int k, x, y;
568	for (k = `0`; k < `4`; ++k) {
569	uint32_t avg = `0`;
570	for (y = `0`; y < `4`; ++y) {
571	for (x = `0`; x < `4`; ++x) {
572	avg += ref[x + y * BPS];
573	}
574	}
575	dc[k] = avg;
576	ref += `4`; // go to next 4x4 block.
577	}
578	}
579
580	//------------------------------------------------------------------------------
581	// Texture distortion
582	//
583	// We try to match the spectral content (weighted) between source and
584	// reconstructed samples.
585
586	#if !WEBP_NEON_OMIT_C_CODE
587	// Hadamard transform
588	// Returns the weighted sum of the absolute value of transformed coefficients.
589	// w[] contains a row-major 4 by 4 symmetric matrix.
590	static int TTransform(const uint8_t* in, const uint16_t* w) {
591	int sum = `0`;
592	int tmp[`16`];
593	int i;
594	// horizontal pass
595	for (i = `0`; i < `4`; ++i, in += BPS) {
596	const int a0 = in[`0`] + in[`2`];
597	const int a1 = in[`1`] + in[`3`];
598	const int a2 = in[`1`] - in[`3`];
599	const int a3 = in[`0`] - in[`2`];
600	tmp[`0` + i * `4`] = a0 + a1;
601	tmp[`1` + i * `4`] = a3 + a2;
602	tmp[`2` + i * `4`] = a3 - a2;
603	tmp[`3` + i * `4`] = a0 - a1;
604	}
605	// vertical pass
606	for (i = `0`; i < `4`; ++i, ++w) {
607	const int a0 = tmp[`0` + i] + tmp[`8` + i];
608	const int a1 = tmp[`4` + i] + tmp[`12`+ i];
609	const int a2 = tmp[`4` + i] - tmp[`12`+ i];
610	const int a3 = tmp[`0` + i] - tmp[`8` + i];
611	const int b0 = a0 + a1;
612	const int b1 = a3 + a2;
613	const int b2 = a3 - a2;
614	const int b3 = a0 - a1;
615
616	sum += w[ `0`] * abs(b0);
617	sum += w[ `4`] * abs(b1);
618	sum += w[ `8`] * abs(b2);
619	sum += w[`12`] * abs(b3);
620	}
621	return sum;
622	}
623
624	static int Disto4x4_C(const uint8_t* const a, const uint8_t* const b,
625	const uint16_t* const w) {
626	const int sum1 = TTransform(a, w);
627	const int sum2 = TTransform(b, w);
628	return abs(sum2 - sum1) >> `5`;
629	}
630
631	static int Disto16x16_C(const uint8_t* const a, const uint8_t* const b,
632	const uint16_t* const w) {
633	int D = `0`;
634	int x, y;
635	for (y = `0`; y < `16` * BPS; y += `4` * BPS) {
636	for (x = `0`; x < `16`; x += `4`) {
637	D += Disto4x4_C(a + x + y, b + x + y, w);
638	}
639	}
640	return D;
641	}
642	#endif // !WEBP_NEON_OMIT_C_CODE
643
644	//------------------------------------------------------------------------------
645	// Quantization
646	//
647
648	static const uint8_t kZigzag[`16`] = {
649	`0`, `1`, `4`, `8`, `5`, `2`, `3`, `6`, `9`, `12`, `13`, `10`, `7`, `11`, `14`, `15`
650	};
651
652	// Simple quantization
653	static int QuantizeBlock_C(int16_t in[`16`], int16_t out[`16`],
654	const VP8Matrix* const mtx) {
655	int last = -`1`;
656	int n;
657	for (n = `0`; n < `16`; ++n) {
658	const int j = kZigzag[n];
659	const int sign = (in[j] < `0`);
660	const uint32_t coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
661	if (coeff > mtx->zthresh_[j]) {
662	const uint32_t Q = mtx->q_[j];
663	const uint32_t iQ = mtx->iq_[j];
664	const uint32_t B = mtx->bias_[j];
665	int level = QUANTDIV(coeff, iQ, B);
666	if (level > MAX_LEVEL) level = MAX_LEVEL;
667	if (sign) level = -level;
668	in[j] = level * (int)Q;
669	out[n] = level;
670	if (level) last = n;
671	} else {
672	out[n] = `0`;
673	in[j] = `0`;
674	}
675	}
676	return (last >= `0`);
677	}
678
679	#if !WEBP_NEON_OMIT_C_CODE \|\| WEBP_NEON_WORK_AROUND_GCC
680	static int Quantize2Blocks_C(int16_t in[`32`], int16_t out[`32`],
681	const VP8Matrix* const mtx) {
682	int nz;
683	nz = VP8EncQuantizeBlock(in + `0` * `16`, out + `0` * `16`, mtx) << `0`;
684	nz \|= VP8EncQuantizeBlock(in + `1` * `16`, out + `1` * `16`, mtx) << `1`;
685	return nz;
686	}
687	#endif // !WEBP_NEON_OMIT_C_CODE \|\| WEBP_NEON_WORK_AROUND_GCC
688
689	//------------------------------------------------------------------------------
690	// Block copy
691
692	static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) {
693	int y;
694	for (y = `0`; y < h; ++y) {
695	memcpy(dst, src, w);
696	src += BPS;
697	dst += BPS;
698	}
699	}
700
701	static void Copy4x4_C(const uint8_t* src, uint8_t* dst) {
702	Copy(src, dst, `4`, `4`);
703	}
704
705	static void Copy16x8_C(const uint8_t* src, uint8_t* dst) {
706	Copy(src, dst, `16`, `8`);
707	}
708
709	//------------------------------------------------------------------------------
710	// Initialization
711
712	// Speed-critical function pointers. We have to initialize them to the default
713	// implementations within VP8EncDspInit().
714	VP8CHisto VP8CollectHistogram;
715	VP8Idct VP8ITransform;
716	VP8Fdct VP8FTransform;
717	VP8Fdct VP8FTransform2;
718	VP8WHT VP8FTransformWHT;
719	VP8Intra4Preds VP8EncPredLuma4;
720	VP8IntraPreds VP8EncPredLuma16;
721	VP8IntraPreds VP8EncPredChroma8;
722	VP8Metric VP8SSE16x16;
723	VP8Metric VP8SSE8x8;
724	VP8Metric VP8SSE16x8;
725	VP8Metric VP8SSE4x4;
726	VP8WMetric VP8TDisto4x4;
727	VP8WMetric VP8TDisto16x16;
728	VP8MeanMetric VP8Mean16x4;
729	VP8QuantizeBlock VP8EncQuantizeBlock;
730	VP8Quantize2Blocks VP8EncQuantize2Blocks;
731	VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
732	VP8BlockCopy VP8Copy4x4;
733	VP8BlockCopy VP8Copy16x8;
734
735	extern VP8CPUInfo VP8GetCPUInfo;
736	extern void VP8EncDspInitSSE2(void);
737	extern void VP8EncDspInitSSE41(void);
738	extern void VP8EncDspInitNEON(void);
739	extern void VP8EncDspInitMIPS32(void);
740	extern void VP8EncDspInitMIPSdspR2(void);
741	extern void VP8EncDspInitMSA(void);
742
743	WEBP_DSP_INIT_FUNC(VP8EncDspInit) {
744	VP8DspInit(); // common inverse transforms
745	InitTables();
746
747	// default C implementations
748	#if !WEBP_NEON_OMIT_C_CODE
749	VP8ITransform = ITransform_C;
750	VP8FTransform = FTransform_C;
751	VP8FTransformWHT = FTransformWHT_C;
752	VP8TDisto4x4 = Disto4x4_C;
753	VP8TDisto16x16 = Disto16x16_C;
754	VP8CollectHistogram = CollectHistogram_C;
755	VP8SSE16x16 = SSE16x16_C;
756	VP8SSE16x8 = SSE16x8_C;
757	VP8SSE8x8 = SSE8x8_C;
758	VP8SSE4x4 = SSE4x4_C;
759	#endif
760
761	#if !WEBP_NEON_OMIT_C_CODE \|\| WEBP_NEON_WORK_AROUND_GCC
762	VP8EncQuantizeBlock = QuantizeBlock_C;
763	VP8EncQuantize2Blocks = Quantize2Blocks_C;
764	#endif
765
766	VP8FTransform2 = FTransform2_C;
767	VP8EncPredLuma4 = Intra4Preds_C;
768	VP8EncPredLuma16 = Intra16Preds_C;
769	VP8EncPredChroma8 = IntraChromaPreds_C;
770	VP8Mean16x4 = Mean16x4_C;
771	VP8EncQuantizeBlockWHT = QuantizeBlock_C;
772	VP8Copy4x4 = Copy4x4_C;
773	VP8Copy16x8 = Copy16x8_C;
774
775	// If defined, use CPUInfo() to overwrite some pointers with faster versions.
776	if (VP8GetCPUInfo != NULL) {
777	#if defined(WEBP_HAVE_SSE2)
778	if (VP8GetCPUInfo(kSSE2)) {
779	VP8EncDspInitSSE2();
780	#if defined(WEBP_HAVE_SSE41)
781	if (VP8GetCPUInfo(kSSE4_1)) {
782	VP8EncDspInitSSE41();
783	}
784	#endif
785	}
786	#endif
787	#if defined(WEBP_USE_MIPS32)
788	if (VP8GetCPUInfo(kMIPS32)) {
789	VP8EncDspInitMIPS32();
790	}
791	#endif
792	#if defined(WEBP_USE_MIPS_DSP_R2)
793	if (VP8GetCPUInfo(kMIPSdspR2)) {
794	VP8EncDspInitMIPSdspR2();
795	}
796	#endif
797	#if defined(WEBP_USE_MSA)
798	if (VP8GetCPUInfo(kMSA)) {
799	VP8EncDspInitMSA();
800	}
801	#endif
802	}
803
804	#if defined(WEBP_HAVE_NEON)
805	if (WEBP_NEON_OMIT_C_CODE \|\|
806	(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
807	VP8EncDspInitNEON();
808	}
809	#endif
810
811	assert(VP8ITransform != NULL);
812	assert(VP8FTransform != NULL);
813	assert(VP8FTransformWHT != NULL);
814	assert(VP8TDisto4x4 != NULL);
815	assert(VP8TDisto16x16 != NULL);
816	assert(VP8CollectHistogram != NULL);
817	assert(VP8SSE16x16 != NULL);
818	assert(VP8SSE16x8 != NULL);
819	assert(VP8SSE8x8 != NULL);
820	assert(VP8SSE4x4 != NULL);
821	assert(VP8EncQuantizeBlock != NULL);
822	assert(VP8EncQuantize2Blocks != NULL);
823	assert(VP8FTransform2 != NULL);
824	assert(VP8EncPredLuma4 != NULL);
825	assert(VP8EncPredLuma16 != NULL);
826	assert(VP8EncPredChroma8 != NULL);
827	assert(VP8Mean16x4 != NULL);
828	assert(VP8EncQuantizeBlockWHT != NULL);
829	assert(VP8Copy4x4 != NULL);
830	assert(VP8Copy16x8 != NULL);
831	}
832

Browse the source code of Godot/thirdparty/libwebp/src/dsp/enc.c