dec.c source code [Skia/third_party/externals/libwebp/src/dsp/dec.c]

1	// Copyright 2010 Google Inc. All Rights Reserved.
2	//
3	// Use of this source code is governed by a BSD-style license
4	// that can be found in the COPYING file in the root of the source
5	// tree. An additional intellectual property rights grant can be found
6	// in the file PATENTS. All contributing project authors may
7	// be found in the AUTHORS file in the root of the source tree.
8	// -----------------------------------------------------------------------------
9	//
10	// Speed-critical decoding functions, default plain-C implementations.
11	//
12	// Author: Skal (pascal.massimino@gmail.com)
13
14	#include <assert.h>
15
16	#include "src/dsp/dsp.h"
17	#include "src/dec/vp8i_dec.h"
18	#include "src/utils/utils.h"
19
20	//------------------------------------------------------------------------------
21
22	static WEBP_INLINE uint8_t clip_8b(int v) {
23	return (!(v & ~`0xff`)) ? v : (v < `0`) ? `0` : `255`;
24	}
25
26	//------------------------------------------------------------------------------
27	// Transforms (Paragraph 14.4)
28
29	#define STORE(x, y, v) \
30	dst[(x) + (y) * BPS] = clip_8b(dst[(x) + (y) * BPS] + ((v) >> 3))
31
32	#define STORE2(y, dc, d, c) do { \
33	const int DC = (dc); \
34	STORE(0, y, DC + (d)); \
35	STORE(1, y, DC + (c)); \
36	STORE(2, y, DC - (c)); \
37	STORE(3, y, DC - (d)); \
38	} while (0)
39
40	#define MUL1(a) ((((a) * 20091) >> 16) + (a))
41	#define MUL2(a) (((a) * 35468) >> 16)
42
43	#if !WEBP_NEON_OMIT_C_CODE
44	static void TransformOne_C(const int16_t* in, uint8_t* dst) {
45	int C[`4` * `4`], *tmp;
46	int i;
47	tmp = C;
48	for (i = `0`; i < `4`; ++i) { // vertical pass
49	const int a = in[`0`] + in[`8`]; // [-4096, 4094]
50	const int b = in[`0`] - in[`8`]; // [-4095, 4095]
51	const int c = MUL2(in[`4`]) - MUL1(in[`12`]); // [-3783, 3783]
52	const int d = MUL1(in[`4`]) + MUL2(in[`12`]); // [-3785, 3781]
53	tmp[`0`] = a + d; // [-7881, 7875]
54	tmp[`1`] = b + c; // [-7878, 7878]
55	tmp[`2`] = b - c; // [-7878, 7878]
56	tmp[`3`] = a - d; // [-7877, 7879]
57	tmp += `4`;
58	in++;
59	}
60	// Each pass is expanding the dynamic range by ~3.85 (upper bound).
61	// The exact value is (2. + (20091 + 35468) / 65536).
62	// After the second pass, maximum interval is [-3794, 3794], assuming
63	// an input in [-2048, 2047] interval. We then need to add a dst value
64	// in the [0, 255] range.
65	// In the worst case scenario, the input to clip_8b() can be as large as
66	// [-60713, 60968].
67	tmp = C;
68	for (i = `0`; i < `4`; ++i) { // horizontal pass
69	const int dc = tmp[`0`] + `4`;
70	const int a = dc + tmp[`8`];
71	const int b = dc - tmp[`8`];
72	const int c = MUL2(tmp[`4`]) - MUL1(tmp[`12`]);
73	const int d = MUL1(tmp[`4`]) + MUL2(tmp[`12`]);
74	STORE(`0`, `0`, a + d);
75	STORE(`1`, `0`, b + c);
76	STORE(`2`, `0`, b - c);
77	STORE(`3`, `0`, a - d);
78	tmp++;
79	dst += BPS;
80	}
81	}
82
83	// Simplified transform when only in[0], in[1] and in[4] are non-zero
84	static void TransformAC3_C(const int16_t* in, uint8_t* dst) {
85	const int a = in[`0`] + `4`;
86	const int c4 = MUL2(in[`4`]);
87	const int d4 = MUL1(in[`4`]);
88	const int c1 = MUL2(in[`1`]);
89	const int d1 = MUL1(in[`1`]);
90	STORE2(`0`, a + d4, d1, c1);
91	STORE2(`1`, a + c4, d1, c1);
92	STORE2(`2`, a - c4, d1, c1);
93	STORE2(`3`, a - d4, d1, c1);
94	}
95	#undef MUL1
96	#undef MUL2
97	#undef STORE2
98
99	static void TransformTwo_C(const int16_t* in, uint8_t* dst, int do_two) {
100	TransformOne_C(in, dst);
101	if (do_two) {
102	TransformOne_C(in + `16`, dst + `4`);
103	}
104	}
105	#endif // !WEBP_NEON_OMIT_C_CODE
106
107	static void TransformUV_C(const int16_t* in, uint8_t* dst) {
108	VP8Transform(in + `0` * `16`, dst, `1`);
109	VP8Transform(in + `2` * `16`, dst + `4` * BPS, `1`);
110	}
111
112	#if !WEBP_NEON_OMIT_C_CODE
113	static void TransformDC_C(const int16_t* in, uint8_t* dst) {
114	const int DC = in[`0`] + `4`;
115	int i, j;
116	for (j = `0`; j < `4`; ++j) {
117	for (i = `0`; i < `4`; ++i) {
118	STORE(i, j, DC);
119	}
120	}
121	}
122	#endif // !WEBP_NEON_OMIT_C_CODE
123
124	static void TransformDCUV_C(const int16_t* in, uint8_t* dst) {
125	if (in[`0` * `16`]) VP8TransformDC(in + `0` * `16`, dst);
126	if (in[`1` * `16`]) VP8TransformDC(in + `1` * `16`, dst + `4`);
127	if (in[`2` * `16`]) VP8TransformDC(in + `2` * `16`, dst + `4` * BPS);
128	if (in[`3` * `16`]) VP8TransformDC(in + `3` * `16`, dst + `4` * BPS + `4`);
129	}
130
131	#undef STORE
132
133	//------------------------------------------------------------------------------
134	// Paragraph 14.3
135
136	#if !WEBP_NEON_OMIT_C_CODE
137	static void TransformWHT_C(const int16_t* in, int16_t* out) {
138	int tmp[`16`];
139	int i;
140	for (i = `0`; i < `4`; ++i) {
141	const int a0 = in[`0` + i] + in[`12` + i];
142	const int a1 = in[`4` + i] + in[ `8` + i];
143	const int a2 = in[`4` + i] - in[ `8` + i];
144	const int a3 = in[`0` + i] - in[`12` + i];
145	tmp[`0` + i] = a0 + a1;
146	tmp[`8` + i] = a0 - a1;
147	tmp[`4` + i] = a3 + a2;
148	tmp[`12` + i] = a3 - a2;
149	}
150	for (i = `0`; i < `4`; ++i) {
151	const int dc = tmp[`0` + i * `4`] + `3`; // w/ rounder
152	const int a0 = dc + tmp[`3` + i * `4`];
153	const int a1 = tmp[`1` + i * `4`] + tmp[`2` + i * `4`];
154	const int a2 = tmp[`1` + i * `4`] - tmp[`2` + i * `4`];
155	const int a3 = dc - tmp[`3` + i * `4`];
156	out[ `0`] = (a0 + a1) >> `3`;
157	out[`16`] = (a3 + a2) >> `3`;
158	out[`32`] = (a0 - a1) >> `3`;
159	out[`48`] = (a3 - a2) >> `3`;
160	out += `64`;
161	}
162	}
163	#endif // !WEBP_NEON_OMIT_C_CODE
164
165	void (VP8TransformWHT)(const* int16_t* in, int16_t* out);
166
167	//------------------------------------------------------------------------------
168	// Intra predictions
169
170	#define DST(x, y) dst[(x) + (y) * BPS]
171
172	#if !WEBP_NEON_OMIT_C_CODE
173	static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
174	const uint8_t* top = dst - BPS;
175	const uint8_t* const clip0 = VP8kclip1 - top[-`1`];
176	int y;
177	for (y = `0`; y < size; ++y) {
178	const uint8_t* const clip = clip0 + dst[-`1`];
179	int x;
180	for (x = `0`; x < size; ++x) {
181	dst[x] = clip[top[x]];
182	}
183	dst += BPS;
184	}
185	}
186	static void TM4_C(uint8_t* dst) { TrueMotion(dst, `4`); }
187	static void TM8uv_C(uint8_t* dst) { TrueMotion(dst, `8`); }
188	static void TM16_C(uint8_t* dst) { TrueMotion(dst, `16`); }
189
190	//------------------------------------------------------------------------------
191	// 16x16
192
193	static void VE16_C(uint8_t* dst) { // vertical
194	int j;
195	for (j = `0`; j < `16`; ++j) {
196	memcpy(dst + j * BPS, dst - BPS, `16`);
197	}
198	}
199
200	static void HE16_C(uint8_t* dst) { // horizontal
201	int j;
202	for (j = `16`; j > `0`; --j) {
203	memset(dst, dst[-`1`], `16`);
204	dst += BPS;
205	}
206	}
207
208	static WEBP_INLINE void Put16(int v, uint8_t* dst) {
209	int j;
210	for (j = `0`; j < `16`; ++j) {
211	memset(dst + j * BPS, v, `16`);
212	}
213	}
214
215	static void DC16_C(uint8_t* dst) { // DC
216	int DC = `16`;
217	int j;
218	for (j = `0`; j < `16`; ++j) {
219	DC += dst[-`1` + j * BPS] + dst[j - BPS];
220	}
221	Put16(DC >> `5`, dst);
222	}
223
224	static void DC16NoTop_C(uint8_t* dst) { // DC with top samples not available
225	int DC = `8`;
226	int j;
227	for (j = `0`; j < `16`; ++j) {
228	DC += dst[-`1` + j * BPS];
229	}
230	Put16(DC >> `4`, dst);
231	}
232
233	static void DC16NoLeft_C(uint8_t* dst) { // DC with left samples not available
234	int DC = `8`;
235	int i;
236	for (i = `0`; i < `16`; ++i) {
237	DC += dst[i - BPS];
238	}
239	Put16(DC >> `4`, dst);
240	}
241
242	static void DC16NoTopLeft_C(uint8_t* dst) { // DC with no top and left samples
243	Put16(`0x80`, dst);
244	}
245	#endif // !WEBP_NEON_OMIT_C_CODE
246
247	VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
248
249	//------------------------------------------------------------------------------
250	// 4x4
251
252	#define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
253	#define AVG2(a, b) (((a) + (b) + 1) >> 1)
254
255	#if !WEBP_NEON_OMIT_C_CODE
256	static void VE4_C(uint8_t* dst) { // vertical
257	const uint8_t* top = dst - BPS;
258	const uint8_t vals[`4`] = {
259	AVG3(top[-`1`], top[`0`], top[`1`]),
260	AVG3(top[ `0`], top[`1`], top[`2`]),
261	AVG3(top[ `1`], top[`2`], top[`3`]),
262	AVG3(top[ `2`], top[`3`], top[`4`])
263	};
264	int i;
265	for (i = `0`; i < `4`; ++i) {
266	memcpy(dst + i * BPS, vals, sizeof(vals));
267	}
268	}
269	#endif // !WEBP_NEON_OMIT_C_CODE
270
271	static void HE4_C(uint8_t* dst) { // horizontal
272	const int A = dst[-`1` - BPS];
273	const int B = dst[-`1`];
274	const int C = dst[-`1` + BPS];
275	const int D = dst[-`1` + `2` * BPS];
276	const int E = dst[-`1` + `3` * BPS];
277	WebPUint32ToMem(dst + `0` * BPS, `0x01010101U` * AVG3(A, B, C));
278	WebPUint32ToMem(dst + `1` * BPS, `0x01010101U` * AVG3(B, C, D));
279	WebPUint32ToMem(dst + `2` * BPS, `0x01010101U` * AVG3(C, D, E));
280	WebPUint32ToMem(dst + `3` * BPS, `0x01010101U` * AVG3(D, E, E));
281	}
282
283	#if !WEBP_NEON_OMIT_C_CODE
284	static void DC4_C(uint8_t* dst) { // DC
285	uint32_t dc = `4`;
286	int i;
287	for (i = `0`; i < `4`; ++i) dc += dst[i - BPS] + dst[-`1` + i * BPS];
288	dc >>= `3`;
289	for (i = `0`; i < `4`; ++i) memset(dst + i * BPS, dc, `4`);
290	}
291
292	static void RD4_C(uint8_t* dst) { // Down-right
293	const int I = dst[-`1` + `0` * BPS];
294	const int J = dst[-`1` + `1` * BPS];
295	const int K = dst[-`1` + `2` * BPS];
296	const int L = dst[-`1` + `3` * BPS];
297	const int X = dst[-`1` - BPS];
298	const int A = dst[`0` - BPS];
299	const int B = dst[`1` - BPS];
300	const int C = dst[`2` - BPS];
301	const int D = dst[`3` - BPS];
302	DST(`0`, `3`) = AVG3(J, K, L);
303	DST(`1`, `3`) = DST(`0`, `2`) = AVG3(I, J, K);
304	DST(`2`, `3`) = DST(`1`, `2`) = DST(`0`, `1`) = AVG3(X, I, J);
305	DST(`3`, `3`) = DST(`2`, `2`) = DST(`1`, `1`) = DST(`0`, `0`) = AVG3(A, X, I);
306	DST(`3`, `2`) = DST(`2`, `1`) = DST(`1`, `0`) = AVG3(B, A, X);
307	DST(`3`, `1`) = DST(`2`, `0`) = AVG3(C, B, A);
308	DST(`3`, `0`) = AVG3(D, C, B);
309	}
310
311	static void LD4_C(uint8_t* dst) { // Down-Left
312	const int A = dst[`0` - BPS];
313	const int B = dst[`1` - BPS];
314	const int C = dst[`2` - BPS];
315	const int D = dst[`3` - BPS];
316	const int E = dst[`4` - BPS];
317	const int F = dst[`5` - BPS];
318	const int G = dst[`6` - BPS];
319	const int H = dst[`7` - BPS];
320	DST(`0`, `0`) = AVG3(A, B, C);
321	DST(`1`, `0`) = DST(`0`, `1`) = AVG3(B, C, D);
322	DST(`2`, `0`) = DST(`1`, `1`) = DST(`0`, `2`) = AVG3(C, D, E);
323	DST(`3`, `0`) = DST(`2`, `1`) = DST(`1`, `2`) = DST(`0`, `3`) = AVG3(D, E, F);
324	DST(`3`, `1`) = DST(`2`, `2`) = DST(`1`, `3`) = AVG3(E, F, G);
325	DST(`3`, `2`) = DST(`2`, `3`) = AVG3(F, G, H);
326	DST(`3`, `3`) = AVG3(G, H, H);
327	}
328	#endif // !WEBP_NEON_OMIT_C_CODE
329
330	static void VR4_C(uint8_t* dst) { // Vertical-Right
331	const int I = dst[-`1` + `0` * BPS];
332	const int J = dst[-`1` + `1` * BPS];
333	const int K = dst[-`1` + `2` * BPS];
334	const int X = dst[-`1` - BPS];
335	const int A = dst[`0` - BPS];
336	const int B = dst[`1` - BPS];
337	const int C = dst[`2` - BPS];
338	const int D = dst[`3` - BPS];
339	DST(`0`, `0`) = DST(`1`, `2`) = AVG2(X, A);
340	DST(`1`, `0`) = DST(`2`, `2`) = AVG2(A, B);
341	DST(`2`, `0`) = DST(`3`, `2`) = AVG2(B, C);
342	DST(`3`, `0`) = AVG2(C, D);
343
344	DST(`0`, `3`) = AVG3(K, J, I);
345	DST(`0`, `2`) = AVG3(J, I, X);
346	DST(`0`, `1`) = DST(`1`, `3`) = AVG3(I, X, A);
347	DST(`1`, `1`) = DST(`2`, `3`) = AVG3(X, A, B);
348	DST(`2`, `1`) = DST(`3`, `3`) = AVG3(A, B, C);
349	DST(`3`, `1`) = AVG3(B, C, D);
350	}
351
352	static void VL4_C(uint8_t* dst) { // Vertical-Left
353	const int A = dst[`0` - BPS];
354	const int B = dst[`1` - BPS];
355	const int C = dst[`2` - BPS];
356	const int D = dst[`3` - BPS];
357	const int E = dst[`4` - BPS];
358	const int F = dst[`5` - BPS];
359	const int G = dst[`6` - BPS];
360	const int H = dst[`7` - BPS];
361	DST(`0`, `0`) = AVG2(A, B);
362	DST(`1`, `0`) = DST(`0`, `2`) = AVG2(B, C);
363	DST(`2`, `0`) = DST(`1`, `2`) = AVG2(C, D);
364	DST(`3`, `0`) = DST(`2`, `2`) = AVG2(D, E);
365
366	DST(`0`, `1`) = AVG3(A, B, C);
367	DST(`1`, `1`) = DST(`0`, `3`) = AVG3(B, C, D);
368	DST(`2`, `1`) = DST(`1`, `3`) = AVG3(C, D, E);
369	DST(`3`, `1`) = DST(`2`, `3`) = AVG3(D, E, F);
370	DST(`3`, `2`) = AVG3(E, F, G);
371	DST(`3`, `3`) = AVG3(F, G, H);
372	}
373
374	static void HU4_C(uint8_t* dst) { // Horizontal-Up
375	const int I = dst[-`1` + `0` * BPS];
376	const int J = dst[-`1` + `1` * BPS];
377	const int K = dst[-`1` + `2` * BPS];
378	const int L = dst[-`1` + `3` * BPS];
379	DST(`0`, `0`) = AVG2(I, J);
380	DST(`2`, `0`) = DST(`0`, `1`) = AVG2(J, K);
381	DST(`2`, `1`) = DST(`0`, `2`) = AVG2(K, L);
382	DST(`1`, `0`) = AVG3(I, J, K);
383	DST(`3`, `0`) = DST(`1`, `1`) = AVG3(J, K, L);
384	DST(`3`, `1`) = DST(`1`, `2`) = AVG3(K, L, L);
385	DST(`3`, `2`) = DST(`2`, `2`) =
386	DST(`0`, `3`) = DST(`1`, `3`) = DST(`2`, `3`) = DST(`3`, `3`) = L;
387	}
388
389	static void HD4_C(uint8_t* dst) { // Horizontal-Down
390	const int I = dst[-`1` + `0` * BPS];
391	const int J = dst[-`1` + `1` * BPS];
392	const int K = dst[-`1` + `2` * BPS];
393	const int L = dst[-`1` + `3` * BPS];
394	const int X = dst[-`1` - BPS];
395	const int A = dst[`0` - BPS];
396	const int B = dst[`1` - BPS];
397	const int C = dst[`2` - BPS];
398
399	DST(`0`, `0`) = DST(`2`, `1`) = AVG2(I, X);
400	DST(`0`, `1`) = DST(`2`, `2`) = AVG2(J, I);
401	DST(`0`, `2`) = DST(`2`, `3`) = AVG2(K, J);
402	DST(`0`, `3`) = AVG2(L, K);
403
404	DST(`3`, `0`) = AVG3(A, B, C);
405	DST(`2`, `0`) = AVG3(X, A, B);
406	DST(`1`, `0`) = DST(`3`, `1`) = AVG3(I, X, A);
407	DST(`1`, `1`) = DST(`3`, `2`) = AVG3(J, I, X);
408	DST(`1`, `2`) = DST(`3`, `3`) = AVG3(K, J, I);
409	DST(`1`, `3`) = AVG3(L, K, J);
410	}
411
412	#undef DST
413	#undef AVG3
414	#undef AVG2
415
416	VP8PredFunc VP8PredLuma4[NUM_BMODES];
417
418	//------------------------------------------------------------------------------
419	// Chroma
420
421	#if !WEBP_NEON_OMIT_C_CODE
422	static void VE8uv_C(uint8_t* dst) { // vertical
423	int j;
424	for (j = `0`; j < `8`; ++j) {
425	memcpy(dst + j * BPS, dst - BPS, `8`);
426	}
427	}
428
429	static void HE8uv_C(uint8_t* dst) { // horizontal
430	int j;
431	for (j = `0`; j < `8`; ++j) {
432	memset(dst, dst[-`1`], `8`);
433	dst += BPS;
434	}
435	}
436
437	// helper for chroma-DC predictions
438	static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
439	int j;
440	for (j = `0`; j < `8`; ++j) {
441	memset(dst + j * BPS, value, `8`);
442	}
443	}
444
445	static void DC8uv_C(uint8_t* dst) { // DC
446	int dc0 = `8`;
447	int i;
448	for (i = `0`; i < `8`; ++i) {
449	dc0 += dst[i - BPS] + dst[-`1` + i * BPS];
450	}
451	Put8x8uv(dc0 >> `4`, dst);
452	}
453
454	static void DC8uvNoLeft_C(uint8_t* dst) { // DC with no left samples
455	int dc0 = `4`;
456	int i;
457	for (i = `0`; i < `8`; ++i) {
458	dc0 += dst[i - BPS];
459	}
460	Put8x8uv(dc0 >> `3`, dst);
461	}
462
463	static void DC8uvNoTop_C(uint8_t* dst) { // DC with no top samples
464	int dc0 = `4`;
465	int i;
466	for (i = `0`; i < `8`; ++i) {
467	dc0 += dst[-`1` + i * BPS];
468	}
469	Put8x8uv(dc0 >> `3`, dst);
470	}
471
472	static void DC8uvNoTopLeft_C(uint8_t* dst) { // DC with nothing
473	Put8x8uv(`0x80`, dst);
474	}
475	#endif // !WEBP_NEON_OMIT_C_CODE
476
477	VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
478
479	//------------------------------------------------------------------------------
480	// Edge filtering functions
481
482	#if !WEBP_NEON_OMIT_C_CODE \|\| WEBP_NEON_WORK_AROUND_GCC
483	// 4 pixels in, 2 pixels out
484	static WEBP_INLINE void DoFilter2_C(uint8_t* p, int step) {
485	const int p1 = p[-`2`*step], p0 = p[-step], q0 = p[`0`], q1 = p[step];
486	const int a = `3` * (q0 - p0) + VP8ksclip1[p1 - q1]; // in [-893,892]
487	const int a1 = VP8ksclip2[(a + `4`) >> `3`]; // in [-16,15]
488	const int a2 = VP8ksclip2[(a + `3`) >> `3`];
489	p[-step] = VP8kclip1[p0 + a2];
490	p[ `0`] = VP8kclip1[q0 - a1];
491	}
492
493	// 4 pixels in, 4 pixels out
494	static WEBP_INLINE void DoFilter4_C(uint8_t* p, int step) {
495	const int p1 = p[-`2`*step], p0 = p[-step], q0 = p[`0`], q1 = p[step];
496	const int a = `3` * (q0 - p0);
497	const int a1 = VP8ksclip2[(a + `4`) >> `3`];
498	const int a2 = VP8ksclip2[(a + `3`) >> `3`];
499	const int a3 = (a1 + `1`) >> `1`;
500	p[-`2`*step] = VP8kclip1[p1 + a3];
501	p[- step] = VP8kclip1[p0 + a2];
502	p[ `0`] = VP8kclip1[q0 - a1];
503	p[ step] = VP8kclip1[q1 - a3];
504	}
505
506	// 6 pixels in, 6 pixels out
507	static WEBP_INLINE void DoFilter6_C(uint8_t* p, int step) {
508	const int p2 = p[-`3`step], p1 = p[-`2`step], p0 = p[-step];
509	const int q0 = p[`0`], q1 = p[step], q2 = p[`2`*step];
510	const int a = VP8ksclip1[`3` * (q0 - p0) + VP8ksclip1[p1 - q1]];
511	// a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
512	const int a1 = (`27` * a + `63`) >> `7`; // eq. to ((3 a + 7) * 9) >> 7*
513	const int a2 = (`18` * a + `63`) >> `7`; // eq. to ((2 a + 7) * 9) >> 7*
514	const int a3 = (`9` * a + `63`) >> `7`; // eq. to ((1 a + 7) * 9) >> 7*
515	p[-`3`*step] = VP8kclip1[p2 + a3];
516	p[-`2`*step] = VP8kclip1[p1 + a2];
517	p[- step] = VP8kclip1[p0 + a1];
518	p[ `0`] = VP8kclip1[q0 - a1];
519	p[ step] = VP8kclip1[q1 - a2];
520	p[ `2`*step] = VP8kclip1[q2 - a3];
521	}
522
523	static WEBP_INLINE int Hev(const uint8_t* p, int step, int thresh) {
524	const int p1 = p[-`2`*step], p0 = p[-step], q0 = p[`0`], q1 = p[step];
525	return (VP8kabs0[p1 - p0] > thresh) \|\| (VP8kabs0[q1 - q0] > thresh);
526	}
527	#endif // !WEBP_NEON_OMIT_C_CODE \|\| WEBP_NEON_WORK_AROUND_GCC
528
529	#if !WEBP_NEON_OMIT_C_CODE
530	static WEBP_INLINE int NeedsFilter_C(const uint8_t* p, int step, int t) {
531	const int p1 = p[-`2` * step], p0 = p[-step], q0 = p[`0`], q1 = p[step];
532	return ((`4` * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t);
533	}
534	#endif // !WEBP_NEON_OMIT_C_CODE
535
536	#if !WEBP_NEON_OMIT_C_CODE \|\| WEBP_NEON_WORK_AROUND_GCC
537	static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p,
538	int step, int t, int it) {
539	const int p3 = p[-`4` * step], p2 = p[-`3` * step], p1 = p[-`2` * step];
540	const int p0 = p[-step], q0 = p[`0`];
541	const int q1 = p[step], q2 = p[`2` * step], q3 = p[`3` * step];
542	if ((`4` * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) > t) return `0`;
543	return VP8kabs0[p3 - p2] <= it && VP8kabs0[p2 - p1] <= it &&
544	VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it &&
545	VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it;
546	}
547	#endif // !WEBP_NEON_OMIT_C_CODE \|\| WEBP_NEON_WORK_AROUND_GCC
548
549	//------------------------------------------------------------------------------
550	// Simple In-loop filtering (Paragraph 15.2)
551
552	#if !WEBP_NEON_OMIT_C_CODE
553	static void SimpleVFilter16_C(uint8_t* p, int stride, int thresh) {
554	int i;
555	const int thresh2 = `2` * thresh + `1`;
556	for (i = `0`; i < `16`; ++i) {
557	if (NeedsFilter_C(p + i, stride, thresh2)) {
558	DoFilter2_C(p + i, stride);
559	}
560	}
561	}
562
563	static void SimpleHFilter16_C(uint8_t* p, int stride, int thresh) {
564	int i;
565	const int thresh2 = `2` * thresh + `1`;
566	for (i = `0`; i < `16`; ++i) {
567	if (NeedsFilter_C(p + i * stride, `1`, thresh2)) {
568	DoFilter2_C(p + i * stride, `1`);
569	}
570	}
571	}
572
573	static void SimpleVFilter16i_C(uint8_t* p, int stride, int thresh) {
574	int k;
575	for (k = `3`; k > `0`; --k) {
576	p += `4` * stride;
577	SimpleVFilter16_C(p, stride, thresh);
578	}
579	}
580
581	static void SimpleHFilter16i_C(uint8_t* p, int stride, int thresh) {
582	int k;
583	for (k = `3`; k > `0`; --k) {
584	p += `4`;
585	SimpleHFilter16_C(p, stride, thresh);
586	}
587	}
588	#endif // !WEBP_NEON_OMIT_C_CODE
589
590	//------------------------------------------------------------------------------
591	// Complex In-loop filtering (Paragraph 15.3)
592
593	#if !WEBP_NEON_OMIT_C_CODE \|\| WEBP_NEON_WORK_AROUND_GCC
594	static WEBP_INLINE void FilterLoop26_C(uint8_t* p,
595	int hstride, int vstride, int size,
596	int thresh, int ithresh,
597	int hev_thresh) {
598	const int thresh2 = `2` * thresh + `1`;
599	while (size-- > `0`) {
600	if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) {
601	if (Hev(p, hstride, hev_thresh)) {
602	DoFilter2_C(p, hstride);
603	} else {
604	DoFilter6_C(p, hstride);
605	}
606	}
607	p += vstride;
608	}
609	}
610
611	static WEBP_INLINE void FilterLoop24_C(uint8_t* p,
612	int hstride, int vstride, int size,
613	int thresh, int ithresh,
614	int hev_thresh) {
615	const int thresh2 = `2` * thresh + `1`;
616	while (size-- > `0`) {
617	if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) {
618	if (Hev(p, hstride, hev_thresh)) {
619	DoFilter2_C(p, hstride);
620	} else {
621	DoFilter4_C(p, hstride);
622	}
623	}
624	p += vstride;
625	}
626	}
627	#endif // !WEBP_NEON_OMIT_C_CODE \|\| WEBP_NEON_WORK_AROUND_GCC
628
629	#if !WEBP_NEON_OMIT_C_CODE
630	// on macroblock edges
631	static void VFilter16_C(uint8_t* p, int stride,
632	int thresh, int ithresh, int hev_thresh) {
633	FilterLoop26_C(p, stride, `1`, `16`, thresh, ithresh, hev_thresh);
634	}
635
636	static void HFilter16_C(uint8_t* p, int stride,
637	int thresh, int ithresh, int hev_thresh) {
638	FilterLoop26_C(p, `1`, stride, `16`, thresh, ithresh, hev_thresh);
639	}
640
641	// on three inner edges
642	static void VFilter16i_C(uint8_t* p, int stride,
643	int thresh, int ithresh, int hev_thresh) {
644	int k;
645	for (k = `3`; k > `0`; --k) {
646	p += `4` * stride;
647	FilterLoop24_C(p, stride, `1`, `16`, thresh, ithresh, hev_thresh);
648	}
649	}
650	#endif // !WEBP_NEON_OMIT_C_CODE
651
652	#if !WEBP_NEON_OMIT_C_CODE \|\| WEBP_NEON_WORK_AROUND_GCC
653	static void HFilter16i_C(uint8_t* p, int stride,
654	int thresh, int ithresh, int hev_thresh) {
655	int k;
656	for (k = `3`; k > `0`; --k) {
657	p += `4`;
658	FilterLoop24_C(p, `1`, stride, `16`, thresh, ithresh, hev_thresh);
659	}
660	}
661	#endif // !WEBP_NEON_OMIT_C_CODE \|\| WEBP_NEON_WORK_AROUND_GCC
662
663	#if !WEBP_NEON_OMIT_C_CODE
664	// 8-pixels wide variant, for chroma filtering
665	static void VFilter8_C(uint8_t* u, uint8_t* v, int stride,
666	int thresh, int ithresh, int hev_thresh) {
667	FilterLoop26_C(u, stride, `1`, `8`, thresh, ithresh, hev_thresh);
668	FilterLoop26_C(v, stride, `1`, `8`, thresh, ithresh, hev_thresh);
669	}
670	#endif // !WEBP_NEON_OMIT_C_CODE
671
672	#if !WEBP_NEON_OMIT_C_CODE \|\| WEBP_NEON_WORK_AROUND_GCC
673	static void HFilter8_C(uint8_t* u, uint8_t* v, int stride,
674	int thresh, int ithresh, int hev_thresh) {
675	FilterLoop26_C(u, `1`, stride, `8`, thresh, ithresh, hev_thresh);
676	FilterLoop26_C(v, `1`, stride, `8`, thresh, ithresh, hev_thresh);
677	}
678	#endif // !WEBP_NEON_OMIT_C_CODE \|\| WEBP_NEON_WORK_AROUND_GCC
679
680	#if !WEBP_NEON_OMIT_C_CODE
681	static void VFilter8i_C(uint8_t* u, uint8_t* v, int stride,
682	int thresh, int ithresh, int hev_thresh) {
683	FilterLoop24_C(u + `4` * stride, stride, `1`, `8`, thresh, ithresh, hev_thresh);
684	FilterLoop24_C(v + `4` * stride, stride, `1`, `8`, thresh, ithresh, hev_thresh);
685	}
686	#endif // !WEBP_NEON_OMIT_C_CODE
687
688	#if !WEBP_NEON_OMIT_C_CODE \|\| WEBP_NEON_WORK_AROUND_GCC
689	static void HFilter8i_C(uint8_t* u, uint8_t* v, int stride,
690	int thresh, int ithresh, int hev_thresh) {
691	FilterLoop24_C(u + `4`, `1`, stride, `8`, thresh, ithresh, hev_thresh);
692	FilterLoop24_C(v + `4`, `1`, stride, `8`, thresh, ithresh, hev_thresh);
693	}
694	#endif // !WEBP_NEON_OMIT_C_CODE \|\| WEBP_NEON_WORK_AROUND_GCC
695
696	//------------------------------------------------------------------------------
697
698	static void DitherCombine8x8_C(const uint8_t* dither, uint8_t* dst,
699	int dst_stride) {
700	int i, j;
701	for (j = `0`; j < `8`; ++j) {
702	for (i = `0`; i < `8`; ++i) {
703	const int delta0 = dither[i] - VP8_DITHER_AMP_CENTER;
704	const int delta1 =
705	(delta0 + VP8_DITHER_DESCALE_ROUNDER) >> VP8_DITHER_DESCALE;
706	dst[i] = clip_8b((int)dst[i] + delta1);
707	}
708	dst += dst_stride;
709	dither += `8`;
710	}
711	}
712
713	//------------------------------------------------------------------------------
714
715	VP8DecIdct2 VP8Transform;
716	VP8DecIdct VP8TransformAC3;
717	VP8DecIdct VP8TransformUV;
718	VP8DecIdct VP8TransformDC;
719	VP8DecIdct VP8TransformDCUV;
720
721	VP8LumaFilterFunc VP8VFilter16;
722	VP8LumaFilterFunc VP8HFilter16;
723	VP8ChromaFilterFunc VP8VFilter8;
724	VP8ChromaFilterFunc VP8HFilter8;
725	VP8LumaFilterFunc VP8VFilter16i;
726	VP8LumaFilterFunc VP8HFilter16i;
727	VP8ChromaFilterFunc VP8VFilter8i;
728	VP8ChromaFilterFunc VP8HFilter8i;
729	VP8SimpleFilterFunc VP8SimpleVFilter16;
730	VP8SimpleFilterFunc VP8SimpleHFilter16;
731	VP8SimpleFilterFunc VP8SimpleVFilter16i;
732	VP8SimpleFilterFunc VP8SimpleHFilter16i;
733
734	void (VP8DitherCombine8x8)(const* uint8_t* dither, uint8_t* dst,
735	int dst_stride);
736
737	extern void VP8DspInitSSE2(void);
738	extern void VP8DspInitSSE41(void);
739	extern void VP8DspInitNEON(void);
740	extern void VP8DspInitMIPS32(void);
741	extern void VP8DspInitMIPSdspR2(void);
742	extern void VP8DspInitMSA(void);
743
744	WEBP_DSP_INIT_FUNC(VP8DspInit) {
745	VP8InitClipTables();
746
747	#if !WEBP_NEON_OMIT_C_CODE
748	VP8TransformWHT = TransformWHT_C;
749	VP8Transform = TransformTwo_C;
750	VP8TransformDC = TransformDC_C;
751	VP8TransformAC3 = TransformAC3_C;
752	#endif
753	VP8TransformUV = TransformUV_C;
754	VP8TransformDCUV = TransformDCUV_C;
755
756	#if !WEBP_NEON_OMIT_C_CODE
757	VP8VFilter16 = VFilter16_C;
758	VP8VFilter16i = VFilter16i_C;
759	VP8HFilter16 = HFilter16_C;
760	VP8VFilter8 = VFilter8_C;
761	VP8VFilter8i = VFilter8i_C;
762	VP8SimpleVFilter16 = SimpleVFilter16_C;
763	VP8SimpleHFilter16 = SimpleHFilter16_C;
764	VP8SimpleVFilter16i = SimpleVFilter16i_C;
765	VP8SimpleHFilter16i = SimpleHFilter16i_C;
766	#endif
767
768	#if !WEBP_NEON_OMIT_C_CODE \|\| WEBP_NEON_WORK_AROUND_GCC
769	VP8HFilter16i = HFilter16i_C;
770	VP8HFilter8 = HFilter8_C;
771	VP8HFilter8i = HFilter8i_C;
772	#endif
773
774	#if !WEBP_NEON_OMIT_C_CODE
775	VP8PredLuma4[`0`] = DC4_C;
776	VP8PredLuma4[`1`] = TM4_C;
777	VP8PredLuma4[`2`] = VE4_C;
778	VP8PredLuma4[`4`] = RD4_C;
779	VP8PredLuma4[`6`] = LD4_C;
780	#endif
781
782	VP8PredLuma4[`3`] = HE4_C;
783	VP8PredLuma4[`5`] = VR4_C;
784	VP8PredLuma4[`7`] = VL4_C;
785	VP8PredLuma4[`8`] = HD4_C;
786	VP8PredLuma4[`9`] = HU4_C;
787
788	#if !WEBP_NEON_OMIT_C_CODE
789	VP8PredLuma16[`0`] = DC16_C;
790	VP8PredLuma16[`1`] = TM16_C;
791	VP8PredLuma16[`2`] = VE16_C;
792	VP8PredLuma16[`3`] = HE16_C;
793	VP8PredLuma16[`4`] = DC16NoTop_C;
794	VP8PredLuma16[`5`] = DC16NoLeft_C;
795	VP8PredLuma16[`6`] = DC16NoTopLeft_C;
796
797	VP8PredChroma8[`0`] = DC8uv_C;
798	VP8PredChroma8[`1`] = TM8uv_C;
799	VP8PredChroma8[`2`] = VE8uv_C;
800	VP8PredChroma8[`3`] = HE8uv_C;
801	VP8PredChroma8[`4`] = DC8uvNoTop_C;
802	VP8PredChroma8[`5`] = DC8uvNoLeft_C;
803	VP8PredChroma8[`6`] = DC8uvNoTopLeft_C;
804	#endif
805
806	VP8DitherCombine8x8 = DitherCombine8x8_C;
807
808	// If defined, use CPUInfo() to overwrite some pointers with faster versions.
809	if (VP8GetCPUInfo != NULL) {
810	#if defined(WEBP_USE_SSE2)
811	if (VP8GetCPUInfo(kSSE2)) {
812	VP8DspInitSSE2();
813	#if defined(WEBP_USE_SSE41)
814	if (VP8GetCPUInfo(kSSE4_1)) {
815	VP8DspInitSSE41();
816	}
817	#endif
818	}
819	#endif
820	#if defined(WEBP_USE_MIPS32)
821	if (VP8GetCPUInfo(kMIPS32)) {
822	VP8DspInitMIPS32();
823	}
824	#endif
825	#if defined(WEBP_USE_MIPS_DSP_R2)
826	if (VP8GetCPUInfo(kMIPSdspR2)) {
827	VP8DspInitMIPSdspR2();
828	}
829	#endif
830	#if defined(WEBP_USE_MSA)
831	if (VP8GetCPUInfo(kMSA)) {
832	VP8DspInitMSA();
833	}
834	#endif
835	}
836
837	#if defined(WEBP_USE_NEON)
838	if (WEBP_NEON_OMIT_C_CODE \|\|
839	(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
840	VP8DspInitNEON();
841	}
842	#endif
843
844	assert(VP8TransformWHT != NULL);
845	assert(VP8Transform != NULL);
846	assert(VP8TransformDC != NULL);
847	assert(VP8TransformAC3 != NULL);
848	assert(VP8TransformUV != NULL);
849	assert(VP8TransformDCUV != NULL);
850	assert(VP8VFilter16 != NULL);
851	assert(VP8HFilter16 != NULL);
852	assert(VP8VFilter8 != NULL);
853	assert(VP8HFilter8 != NULL);
854	assert(VP8VFilter16i != NULL);
855	assert(VP8HFilter16i != NULL);
856	assert(VP8VFilter8i != NULL);
857	assert(VP8HFilter8i != NULL);
858	assert(VP8SimpleVFilter16 != NULL);
859	assert(VP8SimpleHFilter16 != NULL);
860	assert(VP8SimpleVFilter16i != NULL);
861	assert(VP8SimpleHFilter16i != NULL);
862	assert(VP8PredLuma4[`0`] != NULL);
863	assert(VP8PredLuma4[`1`] != NULL);
864	assert(VP8PredLuma4[`2`] != NULL);
865	assert(VP8PredLuma4[`3`] != NULL);
866	assert(VP8PredLuma4[`4`] != NULL);
867	assert(VP8PredLuma4[`5`] != NULL);
868	assert(VP8PredLuma4[`6`] != NULL);
869	assert(VP8PredLuma4[`7`] != NULL);
870	assert(VP8PredLuma4[`8`] != NULL);
871	assert(VP8PredLuma4[`9`] != NULL);
872	assert(VP8PredLuma16[`0`] != NULL);
873	assert(VP8PredLuma16[`1`] != NULL);
874	assert(VP8PredLuma16[`2`] != NULL);
875	assert(VP8PredLuma16[`3`] != NULL);
876	assert(VP8PredLuma16[`4`] != NULL);
877	assert(VP8PredLuma16[`5`] != NULL);
878	assert(VP8PredLuma16[`6`] != NULL);
879	assert(VP8PredChroma8[`0`] != NULL);
880	assert(VP8PredChroma8[`1`] != NULL);
881	assert(VP8PredChroma8[`2`] != NULL);
882	assert(VP8PredChroma8[`3`] != NULL);
883	assert(VP8PredChroma8[`4`] != NULL);
884	assert(VP8PredChroma8[`5`] != NULL);
885	assert(VP8PredChroma8[`6`] != NULL);
886	assert(VP8DitherCombine8x8 != NULL);
887	}
888

Browse the source code of Skia/third_party/externals/libwebp/src/dsp/dec.c