dec.c source code [engine/third_party/libwebp/src/dsp/dec.c]

1	// Copyright 2010 Google Inc. All Rights Reserved.
2	//
3	// Use of this source code is governed by a BSD-style license
4	// that can be found in the COPYING file in the root of the source
5	// tree. An additional intellectual property rights grant can be found
6	// in the file PATENTS. All contributing project authors may
7	// be found in the AUTHORS file in the root of the source tree.
8	// -----------------------------------------------------------------------------
9	//
10	// Speed-critical decoding functions, default plain-C implementations.
11	//
12	// Author: Skal (pascal.massimino@gmail.com)
13
14	#include "./dsp.h"
15	#include "../dec/vp8i_dec.h"
16	#include "../utils/utils.h"
17
18	//------------------------------------------------------------------------------
19
20	static WEBP_INLINE uint8_t clip_8b(int v) {
21	return (!(v & ~`0xff`)) ? v : (v < `0`) ? `0` : `255`;
22	}
23
24	//------------------------------------------------------------------------------
25	// Transforms (Paragraph 14.4)
26
27	#define STORE(x, y, v) \
28	dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3))
29
30	#define STORE2(y, dc, d, c) do { \
31	const int DC = (dc); \
32	STORE(0, y, DC + (d)); \
33	STORE(1, y, DC + (c)); \
34	STORE(2, y, DC - (c)); \
35	STORE(3, y, DC - (d)); \
36	} while (0)
37
38	#define MUL1(a) ((((a) * 20091) >> 16) + (a))
39	#define MUL2(a) (((a) * 35468) >> 16)
40
41	static void TransformOne(const int16_t* in, uint8_t* dst) {
42	int C[`4` * `4`], *tmp;
43	int i;
44	tmp = C;
45	for (i = `0`; i < `4`; ++i) { // vertical pass
46	const int a = in[`0`] + in[`8`]; // [-4096, 4094]
47	const int b = in[`0`] - in[`8`]; // [-4095, 4095]
48	const int c = MUL2(in[`4`]) - MUL1(in[`12`]); // [-3783, 3783]
49	const int d = MUL1(in[`4`]) + MUL2(in[`12`]); // [-3785, 3781]
50	tmp[`0`] = a + d; // [-7881, 7875]
51	tmp[`1`] = b + c; // [-7878, 7878]
52	tmp[`2`] = b - c; // [-7878, 7878]
53	tmp[`3`] = a - d; // [-7877, 7879]
54	tmp += `4`;
55	in++;
56	}
57	// Each pass is expanding the dynamic range by ~3.85 (upper bound).
58	// The exact value is (2. + (20091 + 35468) / 65536).
59	// After the second pass, maximum interval is [-3794, 3794], assuming
60	// an input in [-2048, 2047] interval. We then need to add a dst value
61	// in the [0, 255] range.
62	// In the worst case scenario, the input to clip_8b() can be as large as
63	// [-60713, 60968].
64	tmp = C;
65	for (i = `0`; i < `4`; ++i) { // horizontal pass
66	const int dc = tmp[`0`] + `4`;
67	const int a = dc + tmp[`8`];
68	const int b = dc - tmp[`8`];
69	const int c = MUL2(tmp[`4`]) - MUL1(tmp[`12`]);
70	const int d = MUL1(tmp[`4`]) + MUL2(tmp[`12`]);
71	STORE(`0`, `0`, a + d);
72	STORE(`1`, `0`, b + c);
73	STORE(`2`, `0`, b - c);
74	STORE(`3`, `0`, a - d);
75	tmp++;
76	dst += BPS;
77	}
78	}
79
80	// Simplified transform when only in[0], in[1] and in[4] are non-zero
81	static void TransformAC3(const int16_t* in, uint8_t* dst) {
82	const int a = in[`0`] + `4`;
83	const int c4 = MUL2(in[`4`]);
84	const int d4 = MUL1(in[`4`]);
85	const int c1 = MUL2(in[`1`]);
86	const int d1 = MUL1(in[`1`]);
87	STORE2(`0`, a + d4, d1, c1);
88	STORE2(`1`, a + c4, d1, c1);
89	STORE2(`2`, a - c4, d1, c1);
90	STORE2(`3`, a - d4, d1, c1);
91	}
92	#undef MUL1
93	#undef MUL2
94	#undef STORE2
95
96	static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
97	TransformOne(in, dst);
98	if (do_two) {
99	TransformOne(in + `16`, dst + `4`);
100	}
101	}
102
103	static void TransformUV(const int16_t* in, uint8_t* dst) {
104	VP8Transform(in + `0` * `16`, dst, `1`);
105	VP8Transform(in + `2` * `16`, dst + `4` * BPS, `1`);
106	}
107
108	static void TransformDC(const int16_t* in, uint8_t* dst) {
109	const int DC = in[`0`] + `4`;
110	int i, j;
111	for (j = `0`; j < `4`; ++j) {
112	for (i = `0`; i < `4`; ++i) {
113	STORE(i, j, DC);
114	}
115	}
116	}
117
118	static void TransformDCUV(const int16_t* in, uint8_t* dst) {
119	if (in[`0` * `16`]) VP8TransformDC(in + `0` * `16`, dst);
120	if (in[`1` * `16`]) VP8TransformDC(in + `1` * `16`, dst + `4`);
121	if (in[`2` * `16`]) VP8TransformDC(in + `2` * `16`, dst + `4` * BPS);
122	if (in[`3` * `16`]) VP8TransformDC(in + `3` * `16`, dst + `4` * BPS + `4`);
123	}
124
125	#undef STORE
126
127	//------------------------------------------------------------------------------
128	// Paragraph 14.3
129
130	static void TransformWHT(const int16_t* in, int16_t* out) {
131	int tmp[`16`];
132	int i;
133	for (i = `0`; i < `4`; ++i) {
134	const int a0 = in[`0` + i] + in[`12` + i];
135	const int a1 = in[`4` + i] + in[ `8` + i];
136	const int a2 = in[`4` + i] - in[ `8` + i];
137	const int a3 = in[`0` + i] - in[`12` + i];
138	tmp[`0` + i] = a0 + a1;
139	tmp[`8` + i] = a0 - a1;
140	tmp[`4` + i] = a3 + a2;
141	tmp[`12` + i] = a3 - a2;
142	}
143	for (i = `0`; i < `4`; ++i) {
144	const int dc = tmp[`0` + i * `4`] + `3`; // w/ rounder
145	const int a0 = dc + tmp[`3` + i * `4`];
146	const int a1 = tmp[`1` + i * `4`] + tmp[`2` + i * `4`];
147	const int a2 = tmp[`1` + i * `4`] - tmp[`2` + i * `4`];
148	const int a3 = dc - tmp[`3` + i * `4`];
149	out[ `0`] = (a0 + a1) >> `3`;
150	out[`16`] = (a3 + a2) >> `3`;
151	out[`32`] = (a0 - a1) >> `3`;
152	out[`48`] = (a3 - a2) >> `3`;
153	out += `64`;
154	}
155	}
156
157	void (VP8TransformWHT)(const* int16_t* in, int16_t* out);
158
159	//------------------------------------------------------------------------------
160	// Intra predictions
161
162	#define DST(x, y) dst[(x) + (y) * BPS]
163
164	static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
165	const uint8_t* top = dst - BPS;
166	const uint8_t* const clip0 = VP8kclip1 - top[-`1`];
167	int y;
168	for (y = `0`; y < size; ++y) {
169	const uint8_t* const clip = clip0 + dst[-`1`];
170	int x;
171	for (x = `0`; x < size; ++x) {
172	dst[x] = clip[top[x]];
173	}
174	dst += BPS;
175	}
176	}
177	static void TM4(uint8_t* dst) { TrueMotion(dst, `4`); }
178	static void TM8uv(uint8_t* dst) { TrueMotion(dst, `8`); }
179	static void TM16(uint8_t* dst) { TrueMotion(dst, `16`); }
180
181	//------------------------------------------------------------------------------
182	// 16x16
183
184	static void VE16(uint8_t* dst) { // vertical
185	int j;
186	for (j = `0`; j < `16`; ++j) {
187	memcpy(dst + j * BPS, dst - BPS, `16`);
188	}
189	}
190
191	static void HE16(uint8_t* dst) { // horizontal
192	int j;
193	for (j = `16`; j > `0`; --j) {
194	memset(dst, dst[-`1`], `16`);
195	dst += BPS;
196	}
197	}
198
199	static WEBP_INLINE void Put16(int v, uint8_t* dst) {
200	int j;
201	for (j = `0`; j < `16`; ++j) {
202	memset(dst + j * BPS, v, `16`);
203	}
204	}
205
206	static void DC16(uint8_t* dst) { // DC
207	int DC = `16`;
208	int j;
209	for (j = `0`; j < `16`; ++j) {
210	DC += dst[-`1` + j * BPS] + dst[j - BPS];
211	}
212	Put16(DC >> `5`, dst);
213	}
214
215	static void DC16NoTop(uint8_t* dst) { // DC with top samples not available
216	int DC = `8`;
217	int j;
218	for (j = `0`; j < `16`; ++j) {
219	DC += dst[-`1` + j * BPS];
220	}
221	Put16(DC >> `4`, dst);
222	}
223
224	static void DC16NoLeft(uint8_t* dst) { // DC with left samples not available
225	int DC = `8`;
226	int i;
227	for (i = `0`; i < `16`; ++i) {
228	DC += dst[i - BPS];
229	}
230	Put16(DC >> `4`, dst);
231	}
232
233	static void DC16NoTopLeft(uint8_t* dst) { // DC with no top and left samples
234	Put16(`0x80`, dst);
235	}
236
237	VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
238
239	//------------------------------------------------------------------------------
240	// 4x4
241
242	#define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
243	#define AVG2(a, b) (((a) + (b) + 1) >> 1)
244
245	static void VE4(uint8_t* dst) { // vertical
246	const uint8_t* top = dst - BPS;
247	const uint8_t vals[`4`] = {
248	AVG3(top[-`1`], top[`0`], top[`1`]),
249	AVG3(top[ `0`], top[`1`], top[`2`]),
250	AVG3(top[ `1`], top[`2`], top[`3`]),
251	AVG3(top[ `2`], top[`3`], top[`4`])
252	};
253	int i;
254	for (i = `0`; i < `4`; ++i) {
255	memcpy(dst + i * BPS, vals, sizeof(vals));
256	}
257	}
258
259	static void HE4(uint8_t* dst) { // horizontal
260	const int A = dst[-`1` - BPS];
261	const int B = dst[-`1`];
262	const int C = dst[-`1` + BPS];
263	const int D = dst[-`1` + `2` * BPS];
264	const int E = dst[-`1` + `3` * BPS];
265	WebPUint32ToMem(dst + `0` * BPS, `0x01010101U` * AVG3(A, B, C));
266	WebPUint32ToMem(dst + `1` * BPS, `0x01010101U` * AVG3(B, C, D));
267	WebPUint32ToMem(dst + `2` * BPS, `0x01010101U` * AVG3(C, D, E));
268	WebPUint32ToMem(dst + `3` * BPS, `0x01010101U` * AVG3(D, E, E));
269	}
270
271	static void DC4(uint8_t* dst) { // DC
272	uint32_t dc = `4`;
273	int i;
274	for (i = `0`; i < `4`; ++i) dc += dst[i - BPS] + dst[-`1` + i * BPS];
275	dc >>= `3`;
276	for (i = `0`; i < `4`; ++i) memset(dst + i * BPS, dc, `4`);
277	}
278
279	static void RD4(uint8_t* dst) { // Down-right
280	const int I = dst[-`1` + `0` * BPS];
281	const int J = dst[-`1` + `1` * BPS];
282	const int K = dst[-`1` + `2` * BPS];
283	const int L = dst[-`1` + `3` * BPS];
284	const int X = dst[-`1` - BPS];
285	const int A = dst[`0` - BPS];
286	const int B = dst[`1` - BPS];
287	const int C = dst[`2` - BPS];
288	const int D = dst[`3` - BPS];
289	DST(`0`, `3`) = AVG3(J, K, L);
290	DST(`1`, `3`) = DST(`0`, `2`) = AVG3(I, J, K);
291	DST(`2`, `3`) = DST(`1`, `2`) = DST(`0`, `1`) = AVG3(X, I, J);
292	DST(`3`, `3`) = DST(`2`, `2`) = DST(`1`, `1`) = DST(`0`, `0`) = AVG3(A, X, I);
293	DST(`3`, `2`) = DST(`2`, `1`) = DST(`1`, `0`) = AVG3(B, A, X);
294	DST(`3`, `1`) = DST(`2`, `0`) = AVG3(C, B, A);
295	DST(`3`, `0`) = AVG3(D, C, B);
296	}
297
298	static void LD4(uint8_t* dst) { // Down-Left
299	const int A = dst[`0` - BPS];
300	const int B = dst[`1` - BPS];
301	const int C = dst[`2` - BPS];
302	const int D = dst[`3` - BPS];
303	const int E = dst[`4` - BPS];
304	const int F = dst[`5` - BPS];
305	const int G = dst[`6` - BPS];
306	const int H = dst[`7` - BPS];
307	DST(`0`, `0`) = AVG3(A, B, C);
308	DST(`1`, `0`) = DST(`0`, `1`) = AVG3(B, C, D);
309	DST(`2`, `0`) = DST(`1`, `1`) = DST(`0`, `2`) = AVG3(C, D, E);
310	DST(`3`, `0`) = DST(`2`, `1`) = DST(`1`, `2`) = DST(`0`, `3`) = AVG3(D, E, F);
311	DST(`3`, `1`) = DST(`2`, `2`) = DST(`1`, `3`) = AVG3(E, F, G);
312	DST(`3`, `2`) = DST(`2`, `3`) = AVG3(F, G, H);
313	DST(`3`, `3`) = AVG3(G, H, H);
314	}
315
316	static void VR4(uint8_t* dst) { // Vertical-Right
317	const int I = dst[-`1` + `0` * BPS];
318	const int J = dst[-`1` + `1` * BPS];
319	const int K = dst[-`1` + `2` * BPS];
320	const int X = dst[-`1` - BPS];
321	const int A = dst[`0` - BPS];
322	const int B = dst[`1` - BPS];
323	const int C = dst[`2` - BPS];
324	const int D = dst[`3` - BPS];
325	DST(`0`, `0`) = DST(`1`, `2`) = AVG2(X, A);
326	DST(`1`, `0`) = DST(`2`, `2`) = AVG2(A, B);
327	DST(`2`, `0`) = DST(`3`, `2`) = AVG2(B, C);
328	DST(`3`, `0`) = AVG2(C, D);
329
330	DST(`0`, `3`) = AVG3(K, J, I);
331	DST(`0`, `2`) = AVG3(J, I, X);
332	DST(`0`, `1`) = DST(`1`, `3`) = AVG3(I, X, A);
333	DST(`1`, `1`) = DST(`2`, `3`) = AVG3(X, A, B);
334	DST(`2`, `1`) = DST(`3`, `3`) = AVG3(A, B, C);
335	DST(`3`, `1`) = AVG3(B, C, D);
336	}
337
338	static void VL4(uint8_t* dst) { // Vertical-Left
339	const int A = dst[`0` - BPS];
340	const int B = dst[`1` - BPS];
341	const int C = dst[`2` - BPS];
342	const int D = dst[`3` - BPS];
343	const int E = dst[`4` - BPS];
344	const int F = dst[`5` - BPS];
345	const int G = dst[`6` - BPS];
346	const int H = dst[`7` - BPS];
347	DST(`0`, `0`) = AVG2(A, B);
348	DST(`1`, `0`) = DST(`0`, `2`) = AVG2(B, C);
349	DST(`2`, `0`) = DST(`1`, `2`) = AVG2(C, D);
350	DST(`3`, `0`) = DST(`2`, `2`) = AVG2(D, E);
351
352	DST(`0`, `1`) = AVG3(A, B, C);
353	DST(`1`, `1`) = DST(`0`, `3`) = AVG3(B, C, D);
354	DST(`2`, `1`) = DST(`1`, `3`) = AVG3(C, D, E);
355	DST(`3`, `1`) = DST(`2`, `3`) = AVG3(D, E, F);
356	DST(`3`, `2`) = AVG3(E, F, G);
357	DST(`3`, `3`) = AVG3(F, G, H);
358	}
359
360	static void HU4(uint8_t* dst) { // Horizontal-Up
361	const int I = dst[-`1` + `0` * BPS];
362	const int J = dst[-`1` + `1` * BPS];
363	const int K = dst[-`1` + `2` * BPS];
364	const int L = dst[-`1` + `3` * BPS];
365	DST(`0`, `0`) = AVG2(I, J);
366	DST(`2`, `0`) = DST(`0`, `1`) = AVG2(J, K);
367	DST(`2`, `1`) = DST(`0`, `2`) = AVG2(K, L);
368	DST(`1`, `0`) = AVG3(I, J, K);
369	DST(`3`, `0`) = DST(`1`, `1`) = AVG3(J, K, L);
370	DST(`3`, `1`) = DST(`1`, `2`) = AVG3(K, L, L);
371	DST(`3`, `2`) = DST(`2`, `2`) =
372	DST(`0`, `3`) = DST(`1`, `3`) = DST(`2`, `3`) = DST(`3`, `3`) = L;
373	}
374
375	static void HD4(uint8_t* dst) { // Horizontal-Down
376	const int I = dst[-`1` + `0` * BPS];
377	const int J = dst[-`1` + `1` * BPS];
378	const int K = dst[-`1` + `2` * BPS];
379	const int L = dst[-`1` + `3` * BPS];
380	const int X = dst[-`1` - BPS];
381	const int A = dst[`0` - BPS];
382	const int B = dst[`1` - BPS];
383	const int C = dst[`2` - BPS];
384
385	DST(`0`, `0`) = DST(`2`, `1`) = AVG2(I, X);
386	DST(`0`, `1`) = DST(`2`, `2`) = AVG2(J, I);
387	DST(`0`, `2`) = DST(`2`, `3`) = AVG2(K, J);
388	DST(`0`, `3`) = AVG2(L, K);
389
390	DST(`3`, `0`) = AVG3(A, B, C);
391	DST(`2`, `0`) = AVG3(X, A, B);
392	DST(`1`, `0`) = DST(`3`, `1`) = AVG3(I, X, A);
393	DST(`1`, `1`) = DST(`3`, `2`) = AVG3(J, I, X);
394	DST(`1`, `2`) = DST(`3`, `3`) = AVG3(K, J, I);
395	DST(`1`, `3`) = AVG3(L, K, J);
396	}
397
398	#undef DST
399	#undef AVG3
400	#undef AVG2
401
402	VP8PredFunc VP8PredLuma4[NUM_BMODES];
403
404	//------------------------------------------------------------------------------
405	// Chroma
406
407	static void VE8uv(uint8_t* dst) { // vertical
408	int j;
409	for (j = `0`; j < `8`; ++j) {
410	memcpy(dst + j * BPS, dst - BPS, `8`);
411	}
412	}
413
414	static void HE8uv(uint8_t* dst) { // horizontal
415	int j;
416	for (j = `0`; j < `8`; ++j) {
417	memset(dst, dst[-`1`], `8`);
418	dst += BPS;
419	}
420	}
421
422	// helper for chroma-DC predictions
423	static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
424	int j;
425	for (j = `0`; j < `8`; ++j) {
426	memset(dst + j * BPS, value, `8`);
427	}
428	}
429
430	static void DC8uv(uint8_t* dst) { // DC
431	int dc0 = `8`;
432	int i;
433	for (i = `0`; i < `8`; ++i) {
434	dc0 += dst[i - BPS] + dst[-`1` + i * BPS];
435	}
436	Put8x8uv(dc0 >> `4`, dst);
437	}
438
439	static void DC8uvNoLeft(uint8_t* dst) { // DC with no left samples
440	int dc0 = `4`;
441	int i;
442	for (i = `0`; i < `8`; ++i) {
443	dc0 += dst[i - BPS];
444	}
445	Put8x8uv(dc0 >> `3`, dst);
446	}
447
448	static void DC8uvNoTop(uint8_t* dst) { // DC with no top samples
449	int dc0 = `4`;
450	int i;
451	for (i = `0`; i < `8`; ++i) {
452	dc0 += dst[-`1` + i * BPS];
453	}
454	Put8x8uv(dc0 >> `3`, dst);
455	}
456
457	static void DC8uvNoTopLeft(uint8_t* dst) { // DC with nothing
458	Put8x8uv(`0x80`, dst);
459	}
460
461	VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
462
463	//------------------------------------------------------------------------------
464	// Edge filtering functions
465
466	// 4 pixels in, 2 pixels out
467	static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
468	const int p1 = p[-`2`*step], p0 = p[-step], q0 = p[`0`], q1 = p[step];
469	const int a = `3` * (q0 - p0) + VP8ksclip1[p1 - q1]; // in [-893,892]
470	const int a1 = VP8ksclip2[(a + `4`) >> `3`]; // in [-16,15]
471	const int a2 = VP8ksclip2[(a + `3`) >> `3`];
472	p[-step] = VP8kclip1[p0 + a2];
473	p[ `0`] = VP8kclip1[q0 - a1];
474	}
475
476	// 4 pixels in, 4 pixels out
477	static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
478	const int p1 = p[-`2`*step], p0 = p[-step], q0 = p[`0`], q1 = p[step];
479	const int a = `3` * (q0 - p0);
480	const int a1 = VP8ksclip2[(a + `4`) >> `3`];
481	const int a2 = VP8ksclip2[(a + `3`) >> `3`];
482	const int a3 = (a1 + `1`) >> `1`;
483	p[-`2`*step] = VP8kclip1[p1 + a3];
484	p[- step] = VP8kclip1[p0 + a2];
485	p[ `0`] = VP8kclip1[q0 - a1];
486	p[ step] = VP8kclip1[q1 - a3];
487	}
488
489	// 6 pixels in, 6 pixels out
490	static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
491	const int p2 = p[-`3`step], p1 = p[-`2`step], p0 = p[-step];
492	const int q0 = p[`0`], q1 = p[step], q2 = p[`2`*step];
493	const int a = VP8ksclip1[`3` * (q0 - p0) + VP8ksclip1[p1 - q1]];
494	// a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
495	const int a1 = (`27` * a + `63`) >> `7`; // eq. to ((3 a + 7) * 9) >> 7*
496	const int a2 = (`18` * a + `63`) >> `7`; // eq. to ((2 a + 7) * 9) >> 7*
497	const int a3 = (`9` * a + `63`) >> `7`; // eq. to ((1 a + 7) * 9) >> 7*
498	p[-`3`*step] = VP8kclip1[p2 + a3];
499	p[-`2`*step] = VP8kclip1[p1 + a2];
500	p[- step] = VP8kclip1[p0 + a1];
501	p[ `0`] = VP8kclip1[q0 - a1];
502	p[ step] = VP8kclip1[q1 - a2];
503	p[ `2`*step] = VP8kclip1[q2 - a3];
504	}
505
506	static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
507	const int p1 = p[-`2`*step], p0 = p[-step], q0 = p[`0`], q1 = p[step];
508	return (VP8kabs0[p1 - p0] > thresh) \|\| (VP8kabs0[q1 - q0] > thresh);
509	}
510
511	static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
512	const int p1 = p[-`2` * step], p0 = p[-step], q0 = p[`0`], q1 = p[step];
513	return ((`4` * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t);
514	}
515
516	static WEBP_INLINE int needs_filter2(const uint8_t* p,
517	int step, int t, int it) {
518	const int p3 = p[-`4` * step], p2 = p[-`3` * step], p1 = p[-`2` * step];
519	const int p0 = p[-step], q0 = p[`0`];
520	const int q1 = p[step], q2 = p[`2` * step], q3 = p[`3` * step];
521	if ((`4` * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) > t) return `0`;
522	return VP8kabs0[p3 - p2] <= it && VP8kabs0[p2 - p1] <= it &&
523	VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it &&
524	VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it;
525	}
526
527	//------------------------------------------------------------------------------
528	// Simple In-loop filtering (Paragraph 15.2)
529
530	static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
531	int i;
532	const int thresh2 = `2` * thresh + `1`;
533	for (i = `0`; i < `16`; ++i) {
534	if (needs_filter(p + i, stride, thresh2)) {
535	do_filter2(p + i, stride);
536	}
537	}
538	}
539
540	static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
541	int i;
542	const int thresh2 = `2` * thresh + `1`;
543	for (i = `0`; i < `16`; ++i) {
544	if (needs_filter(p + i * stride, `1`, thresh2)) {
545	do_filter2(p + i * stride, `1`);
546	}
547	}
548	}
549
550	static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
551	int k;
552	for (k = `3`; k > `0`; --k) {
553	p += `4` * stride;
554	SimpleVFilter16(p, stride, thresh);
555	}
556	}
557
558	static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
559	int k;
560	for (k = `3`; k > `0`; --k) {
561	p += `4`;
562	SimpleHFilter16(p, stride, thresh);
563	}
564	}
565
566	//------------------------------------------------------------------------------
567	// Complex In-loop filtering (Paragraph 15.3)
568
569	static WEBP_INLINE void FilterLoop26(uint8_t* p,
570	int hstride, int vstride, int size,
571	int thresh, int ithresh, int hev_thresh) {
572	const int thresh2 = `2` * thresh + `1`;
573	while (size-- > `0`) {
574	if (needs_filter2(p, hstride, thresh2, ithresh)) {
575	if (hev(p, hstride, hev_thresh)) {
576	do_filter2(p, hstride);
577	} else {
578	do_filter6(p, hstride);
579	}
580	}
581	p += vstride;
582	}
583	}
584
585	static WEBP_INLINE void FilterLoop24(uint8_t* p,
586	int hstride, int vstride, int size,
587	int thresh, int ithresh, int hev_thresh) {
588	const int thresh2 = `2` * thresh + `1`;
589	while (size-- > `0`) {
590	if (needs_filter2(p, hstride, thresh2, ithresh)) {
591	if (hev(p, hstride, hev_thresh)) {
592	do_filter2(p, hstride);
593	} else {
594	do_filter4(p, hstride);
595	}
596	}
597	p += vstride;
598	}
599	}
600
601	// on macroblock edges
602	static void VFilter16(uint8_t* p, int stride,
603	int thresh, int ithresh, int hev_thresh) {
604	FilterLoop26(p, stride, `1`, `16`, thresh, ithresh, hev_thresh);
605	}
606
607	static void HFilter16(uint8_t* p, int stride,
608	int thresh, int ithresh, int hev_thresh) {
609	FilterLoop26(p, `1`, stride, `16`, thresh, ithresh, hev_thresh);
610	}
611
612	// on three inner edges
613	static void VFilter16i(uint8_t* p, int stride,
614	int thresh, int ithresh, int hev_thresh) {
615	int k;
616	for (k = `3`; k > `0`; --k) {
617	p += `4` * stride;
618	FilterLoop24(p, stride, `1`, `16`, thresh, ithresh, hev_thresh);
619	}
620	}
621
622	static void HFilter16i(uint8_t* p, int stride,
623	int thresh, int ithresh, int hev_thresh) {
624	int k;
625	for (k = `3`; k > `0`; --k) {
626	p += `4`;
627	FilterLoop24(p, `1`, stride, `16`, thresh, ithresh, hev_thresh);
628	}
629	}
630
631	// 8-pixels wide variant, for chroma filtering
632	static void VFilter8(uint8_t* u, uint8_t* v, int stride,
633	int thresh, int ithresh, int hev_thresh) {
634	FilterLoop26(u, stride, `1`, `8`, thresh, ithresh, hev_thresh);
635	FilterLoop26(v, stride, `1`, `8`, thresh, ithresh, hev_thresh);
636	}
637
638	static void HFilter8(uint8_t* u, uint8_t* v, int stride,
639	int thresh, int ithresh, int hev_thresh) {
640	FilterLoop26(u, `1`, stride, `8`, thresh, ithresh, hev_thresh);
641	FilterLoop26(v, `1`, stride, `8`, thresh, ithresh, hev_thresh);
642	}
643
644	static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
645	int thresh, int ithresh, int hev_thresh) {
646	FilterLoop24(u + `4` * stride, stride, `1`, `8`, thresh, ithresh, hev_thresh);
647	FilterLoop24(v + `4` * stride, stride, `1`, `8`, thresh, ithresh, hev_thresh);
648	}
649
650	static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
651	int thresh, int ithresh, int hev_thresh) {
652	FilterLoop24(u + `4`, `1`, stride, `8`, thresh, ithresh, hev_thresh);
653	FilterLoop24(v + `4`, `1`, stride, `8`, thresh, ithresh, hev_thresh);
654	}
655
656	//------------------------------------------------------------------------------
657
658	static void DitherCombine8x8(const uint8_t* dither, uint8_t* dst,
659	int dst_stride) {
660	int i, j;
661	for (j = `0`; j < `8`; ++j) {
662	for (i = `0`; i < `8`; ++i) {
663	const int delta0 = dither[i] - VP8_DITHER_AMP_CENTER;
664	const int delta1 =
665	(delta0 + VP8_DITHER_DESCALE_ROUNDER) >> VP8_DITHER_DESCALE;
666	dst[i] = clip_8b((int)dst[i] + delta1);
667	}
668	dst += dst_stride;
669	dither += `8`;
670	}
671	}
672
673	//------------------------------------------------------------------------------
674
675	VP8DecIdct2 VP8Transform;
676	VP8DecIdct VP8TransformAC3;
677	VP8DecIdct VP8TransformUV;
678	VP8DecIdct VP8TransformDC;
679	VP8DecIdct VP8TransformDCUV;
680
681	VP8LumaFilterFunc VP8VFilter16;
682	VP8LumaFilterFunc VP8HFilter16;
683	VP8ChromaFilterFunc VP8VFilter8;
684	VP8ChromaFilterFunc VP8HFilter8;
685	VP8LumaFilterFunc VP8VFilter16i;
686	VP8LumaFilterFunc VP8HFilter16i;
687	VP8ChromaFilterFunc VP8VFilter8i;
688	VP8ChromaFilterFunc VP8HFilter8i;
689	VP8SimpleFilterFunc VP8SimpleVFilter16;
690	VP8SimpleFilterFunc VP8SimpleHFilter16;
691	VP8SimpleFilterFunc VP8SimpleVFilter16i;
692	VP8SimpleFilterFunc VP8SimpleHFilter16i;
693
694	void (VP8DitherCombine8x8)(const* uint8_t* dither, uint8_t* dst,
695	int dst_stride);
696
697	extern void VP8DspInitSSE2(void);
698	extern void VP8DspInitSSE41(void);
699	extern void VP8DspInitNEON(void);
700	extern void VP8DspInitMIPS32(void);
701	extern void VP8DspInitMIPSdspR2(void);
702	extern void VP8DspInitMSA(void);
703
704	static volatile VP8CPUInfo dec_last_cpuinfo_used =
705	(VP8CPUInfo)&dec_last_cpuinfo_used;
706
707	WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
708	if (dec_last_cpuinfo_used == VP8GetCPUInfo) return;
709
710	VP8InitClipTables();
711
712	VP8TransformWHT = TransformWHT;
713	VP8Transform = TransformTwo;
714	VP8TransformUV = TransformUV;
715	VP8TransformDC = TransformDC;
716	VP8TransformDCUV = TransformDCUV;
717	VP8TransformAC3 = TransformAC3;
718
719	VP8VFilter16 = VFilter16;
720	VP8HFilter16 = HFilter16;
721	VP8VFilter8 = VFilter8;
722	VP8HFilter8 = HFilter8;
723	VP8VFilter16i = VFilter16i;
724	VP8HFilter16i = HFilter16i;
725	VP8VFilter8i = VFilter8i;
726	VP8HFilter8i = HFilter8i;
727	VP8SimpleVFilter16 = SimpleVFilter16;
728	VP8SimpleHFilter16 = SimpleHFilter16;
729	VP8SimpleVFilter16i = SimpleVFilter16i;
730	VP8SimpleHFilter16i = SimpleHFilter16i;
731
732	VP8PredLuma4[`0`] = DC4;
733	VP8PredLuma4[`1`] = TM4;
734	VP8PredLuma4[`2`] = VE4;
735	VP8PredLuma4[`3`] = HE4;
736	VP8PredLuma4[`4`] = RD4;
737	VP8PredLuma4[`5`] = VR4;
738	VP8PredLuma4[`6`] = LD4;
739	VP8PredLuma4[`7`] = VL4;
740	VP8PredLuma4[`8`] = HD4;
741	VP8PredLuma4[`9`] = HU4;
742
743	VP8PredLuma16[`0`] = DC16;
744	VP8PredLuma16[`1`] = TM16;
745	VP8PredLuma16[`2`] = VE16;
746	VP8PredLuma16[`3`] = HE16;
747	VP8PredLuma16[`4`] = DC16NoTop;
748	VP8PredLuma16[`5`] = DC16NoLeft;
749	VP8PredLuma16[`6`] = DC16NoTopLeft;
750
751	VP8PredChroma8[`0`] = DC8uv;
752	VP8PredChroma8[`1`] = TM8uv;
753	VP8PredChroma8[`2`] = VE8uv;
754	VP8PredChroma8[`3`] = HE8uv;
755	VP8PredChroma8[`4`] = DC8uvNoTop;
756	VP8PredChroma8[`5`] = DC8uvNoLeft;
757	VP8PredChroma8[`6`] = DC8uvNoTopLeft;
758
759	VP8DitherCombine8x8 = DitherCombine8x8;
760
761	// If defined, use CPUInfo() to overwrite some pointers with faster versions.
762	if (VP8GetCPUInfo != NULL) {
763	#if defined(WEBP_USE_SSE2)
764	if (VP8GetCPUInfo(kSSE2)) {
765	VP8DspInitSSE2();
766	#if defined(WEBP_USE_SSE41)
767	if (VP8GetCPUInfo(kSSE4_1)) {
768	VP8DspInitSSE41();
769	}
770	#endif
771	}
772	#endif
773	#if defined(WEBP_USE_NEON)
774	if (VP8GetCPUInfo(kNEON)) {
775	VP8DspInitNEON();
776	}
777	#endif
778	#if defined(WEBP_USE_MIPS32)
779	if (VP8GetCPUInfo(kMIPS32)) {
780	VP8DspInitMIPS32();
781	}
782	#endif
783	#if defined(WEBP_USE_MIPS_DSP_R2)
784	if (VP8GetCPUInfo(kMIPSdspR2)) {
785	VP8DspInitMIPSdspR2();
786	}
787	#endif
788	#if defined(WEBP_USE_MSA)
789	if (VP8GetCPUInfo(kMSA)) {
790	VP8DspInitMSA();
791	}
792	#endif
793	}
794	dec_last_cpuinfo_used = VP8GetCPUInfo;
795	}
796

Browse the source code of engine/third_party/libwebp/src/dsp/dec.c