1 | // Copyright 2010 Google Inc. All Rights Reserved. |
2 | // |
3 | // Use of this source code is governed by a BSD-style license |
4 | // that can be found in the COPYING file in the root of the source |
5 | // tree. An additional intellectual property rights grant can be found |
6 | // in the file PATENTS. All contributing project authors may |
7 | // be found in the AUTHORS file in the root of the source tree. |
8 | // ----------------------------------------------------------------------------- |
9 | // |
10 | // Speed-critical decoding functions, default plain-C implementations. |
11 | // |
12 | // Author: Skal (pascal.massimino@gmail.com) |
13 | |
14 | #include <assert.h> |
15 | |
16 | #include "src/dsp/dsp.h" |
17 | #include "src/dec/vp8i_dec.h" |
18 | #include "src/utils/utils.h" |
19 | |
20 | //------------------------------------------------------------------------------ |
21 | |
22 | static WEBP_INLINE uint8_t clip_8b(int v) { |
23 | return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; |
24 | } |
25 | |
26 | //------------------------------------------------------------------------------ |
27 | // Transforms (Paragraph 14.4) |
28 | |
29 | #define STORE(x, y, v) \ |
30 | dst[(x) + (y) * BPS] = clip_8b(dst[(x) + (y) * BPS] + ((v) >> 3)) |
31 | |
32 | #define STORE2(y, dc, d, c) do { \ |
33 | const int DC = (dc); \ |
34 | STORE(0, y, DC + (d)); \ |
35 | STORE(1, y, DC + (c)); \ |
36 | STORE(2, y, DC - (c)); \ |
37 | STORE(3, y, DC - (d)); \ |
38 | } while (0) |
39 | |
40 | #define MUL1(a) ((((a) * 20091) >> 16) + (a)) |
41 | #define MUL2(a) (((a) * 35468) >> 16) |
42 | |
43 | #if !WEBP_NEON_OMIT_C_CODE |
44 | static void TransformOne_C(const int16_t* in, uint8_t* dst) { |
45 | int C[4 * 4], *tmp; |
46 | int i; |
47 | tmp = C; |
48 | for (i = 0; i < 4; ++i) { // vertical pass |
49 | const int a = in[0] + in[8]; // [-4096, 4094] |
50 | const int b = in[0] - in[8]; // [-4095, 4095] |
51 | const int c = MUL2(in[4]) - MUL1(in[12]); // [-3783, 3783] |
52 | const int d = MUL1(in[4]) + MUL2(in[12]); // [-3785, 3781] |
53 | tmp[0] = a + d; // [-7881, 7875] |
54 | tmp[1] = b + c; // [-7878, 7878] |
55 | tmp[2] = b - c; // [-7878, 7878] |
56 | tmp[3] = a - d; // [-7877, 7879] |
57 | tmp += 4; |
58 | in++; |
59 | } |
60 | // Each pass is expanding the dynamic range by ~3.85 (upper bound). |
61 | // The exact value is (2. + (20091 + 35468) / 65536). |
62 | // After the second pass, maximum interval is [-3794, 3794], assuming |
63 | // an input in [-2048, 2047] interval. We then need to add a dst value |
64 | // in the [0, 255] range. |
65 | // In the worst case scenario, the input to clip_8b() can be as large as |
66 | // [-60713, 60968]. |
67 | tmp = C; |
68 | for (i = 0; i < 4; ++i) { // horizontal pass |
69 | const int dc = tmp[0] + 4; |
70 | const int a = dc + tmp[8]; |
71 | const int b = dc - tmp[8]; |
72 | const int c = MUL2(tmp[4]) - MUL1(tmp[12]); |
73 | const int d = MUL1(tmp[4]) + MUL2(tmp[12]); |
74 | STORE(0, 0, a + d); |
75 | STORE(1, 0, b + c); |
76 | STORE(2, 0, b - c); |
77 | STORE(3, 0, a - d); |
78 | tmp++; |
79 | dst += BPS; |
80 | } |
81 | } |
82 | |
83 | // Simplified transform when only in[0], in[1] and in[4] are non-zero |
84 | static void TransformAC3_C(const int16_t* in, uint8_t* dst) { |
85 | const int a = in[0] + 4; |
86 | const int c4 = MUL2(in[4]); |
87 | const int d4 = MUL1(in[4]); |
88 | const int c1 = MUL2(in[1]); |
89 | const int d1 = MUL1(in[1]); |
90 | STORE2(0, a + d4, d1, c1); |
91 | STORE2(1, a + c4, d1, c1); |
92 | STORE2(2, a - c4, d1, c1); |
93 | STORE2(3, a - d4, d1, c1); |
94 | } |
95 | #undef MUL1 |
96 | #undef MUL2 |
97 | #undef STORE2 |
98 | |
99 | static void TransformTwo_C(const int16_t* in, uint8_t* dst, int do_two) { |
100 | TransformOne_C(in, dst); |
101 | if (do_two) { |
102 | TransformOne_C(in + 16, dst + 4); |
103 | } |
104 | } |
105 | #endif // !WEBP_NEON_OMIT_C_CODE |
106 | |
107 | static void TransformUV_C(const int16_t* in, uint8_t* dst) { |
108 | VP8Transform(in + 0 * 16, dst, 1); |
109 | VP8Transform(in + 2 * 16, dst + 4 * BPS, 1); |
110 | } |
111 | |
112 | #if !WEBP_NEON_OMIT_C_CODE |
113 | static void TransformDC_C(const int16_t* in, uint8_t* dst) { |
114 | const int DC = in[0] + 4; |
115 | int i, j; |
116 | for (j = 0; j < 4; ++j) { |
117 | for (i = 0; i < 4; ++i) { |
118 | STORE(i, j, DC); |
119 | } |
120 | } |
121 | } |
122 | #endif // !WEBP_NEON_OMIT_C_CODE |
123 | |
124 | static void TransformDCUV_C(const int16_t* in, uint8_t* dst) { |
125 | if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst); |
126 | if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4); |
127 | if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS); |
128 | if (in[3 * 16]) VP8TransformDC(in + 3 * 16, dst + 4 * BPS + 4); |
129 | } |
130 | |
131 | #undef STORE |
132 | |
133 | //------------------------------------------------------------------------------ |
134 | // Paragraph 14.3 |
135 | |
136 | #if !WEBP_NEON_OMIT_C_CODE |
137 | static void TransformWHT_C(const int16_t* in, int16_t* out) { |
138 | int tmp[16]; |
139 | int i; |
140 | for (i = 0; i < 4; ++i) { |
141 | const int a0 = in[0 + i] + in[12 + i]; |
142 | const int a1 = in[4 + i] + in[ 8 + i]; |
143 | const int a2 = in[4 + i] - in[ 8 + i]; |
144 | const int a3 = in[0 + i] - in[12 + i]; |
145 | tmp[0 + i] = a0 + a1; |
146 | tmp[8 + i] = a0 - a1; |
147 | tmp[4 + i] = a3 + a2; |
148 | tmp[12 + i] = a3 - a2; |
149 | } |
150 | for (i = 0; i < 4; ++i) { |
151 | const int dc = tmp[0 + i * 4] + 3; // w/ rounder |
152 | const int a0 = dc + tmp[3 + i * 4]; |
153 | const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4]; |
154 | const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4]; |
155 | const int a3 = dc - tmp[3 + i * 4]; |
156 | out[ 0] = (a0 + a1) >> 3; |
157 | out[16] = (a3 + a2) >> 3; |
158 | out[32] = (a0 - a1) >> 3; |
159 | out[48] = (a3 - a2) >> 3; |
160 | out += 64; |
161 | } |
162 | } |
163 | #endif // !WEBP_NEON_OMIT_C_CODE |
164 | |
165 | void (*VP8TransformWHT)(const int16_t* in, int16_t* out); |
166 | |
167 | //------------------------------------------------------------------------------ |
168 | // Intra predictions |
169 | |
170 | #define DST(x, y) dst[(x) + (y) * BPS] |
171 | |
172 | #if !WEBP_NEON_OMIT_C_CODE |
173 | static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) { |
174 | const uint8_t* top = dst - BPS; |
175 | const uint8_t* const clip0 = VP8kclip1 - top[-1]; |
176 | int y; |
177 | for (y = 0; y < size; ++y) { |
178 | const uint8_t* const clip = clip0 + dst[-1]; |
179 | int x; |
180 | for (x = 0; x < size; ++x) { |
181 | dst[x] = clip[top[x]]; |
182 | } |
183 | dst += BPS; |
184 | } |
185 | } |
186 | static void TM4_C(uint8_t* dst) { TrueMotion(dst, 4); } |
187 | static void TM8uv_C(uint8_t* dst) { TrueMotion(dst, 8); } |
188 | static void TM16_C(uint8_t* dst) { TrueMotion(dst, 16); } |
189 | |
190 | //------------------------------------------------------------------------------ |
191 | // 16x16 |
192 | |
193 | static void VE16_C(uint8_t* dst) { // vertical |
194 | int j; |
195 | for (j = 0; j < 16; ++j) { |
196 | memcpy(dst + j * BPS, dst - BPS, 16); |
197 | } |
198 | } |
199 | |
200 | static void HE16_C(uint8_t* dst) { // horizontal |
201 | int j; |
202 | for (j = 16; j > 0; --j) { |
203 | memset(dst, dst[-1], 16); |
204 | dst += BPS; |
205 | } |
206 | } |
207 | |
208 | static WEBP_INLINE void Put16(int v, uint8_t* dst) { |
209 | int j; |
210 | for (j = 0; j < 16; ++j) { |
211 | memset(dst + j * BPS, v, 16); |
212 | } |
213 | } |
214 | |
215 | static void DC16_C(uint8_t* dst) { // DC |
216 | int DC = 16; |
217 | int j; |
218 | for (j = 0; j < 16; ++j) { |
219 | DC += dst[-1 + j * BPS] + dst[j - BPS]; |
220 | } |
221 | Put16(DC >> 5, dst); |
222 | } |
223 | |
224 | static void DC16NoTop_C(uint8_t* dst) { // DC with top samples not available |
225 | int DC = 8; |
226 | int j; |
227 | for (j = 0; j < 16; ++j) { |
228 | DC += dst[-1 + j * BPS]; |
229 | } |
230 | Put16(DC >> 4, dst); |
231 | } |
232 | |
233 | static void DC16NoLeft_C(uint8_t* dst) { // DC with left samples not available |
234 | int DC = 8; |
235 | int i; |
236 | for (i = 0; i < 16; ++i) { |
237 | DC += dst[i - BPS]; |
238 | } |
239 | Put16(DC >> 4, dst); |
240 | } |
241 | |
242 | static void DC16NoTopLeft_C(uint8_t* dst) { // DC with no top and left samples |
243 | Put16(0x80, dst); |
244 | } |
245 | #endif // !WEBP_NEON_OMIT_C_CODE |
246 | |
247 | VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES]; |
248 | |
249 | //------------------------------------------------------------------------------ |
250 | // 4x4 |
251 | |
252 | #define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2)) |
253 | #define AVG2(a, b) (((a) + (b) + 1) >> 1) |
254 | |
255 | #if !WEBP_NEON_OMIT_C_CODE |
256 | static void VE4_C(uint8_t* dst) { // vertical |
257 | const uint8_t* top = dst - BPS; |
258 | const uint8_t vals[4] = { |
259 | AVG3(top[-1], top[0], top[1]), |
260 | AVG3(top[ 0], top[1], top[2]), |
261 | AVG3(top[ 1], top[2], top[3]), |
262 | AVG3(top[ 2], top[3], top[4]) |
263 | }; |
264 | int i; |
265 | for (i = 0; i < 4; ++i) { |
266 | memcpy(dst + i * BPS, vals, sizeof(vals)); |
267 | } |
268 | } |
269 | #endif // !WEBP_NEON_OMIT_C_CODE |
270 | |
271 | static void HE4_C(uint8_t* dst) { // horizontal |
272 | const int A = dst[-1 - BPS]; |
273 | const int B = dst[-1]; |
274 | const int C = dst[-1 + BPS]; |
275 | const int D = dst[-1 + 2 * BPS]; |
276 | const int E = dst[-1 + 3 * BPS]; |
277 | WebPUint32ToMem(dst + 0 * BPS, 0x01010101U * AVG3(A, B, C)); |
278 | WebPUint32ToMem(dst + 1 * BPS, 0x01010101U * AVG3(B, C, D)); |
279 | WebPUint32ToMem(dst + 2 * BPS, 0x01010101U * AVG3(C, D, E)); |
280 | WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(D, E, E)); |
281 | } |
282 | |
283 | #if !WEBP_NEON_OMIT_C_CODE |
284 | static void DC4_C(uint8_t* dst) { // DC |
285 | uint32_t dc = 4; |
286 | int i; |
287 | for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS]; |
288 | dc >>= 3; |
289 | for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4); |
290 | } |
291 | |
292 | static void RD4_C(uint8_t* dst) { // Down-right |
293 | const int I = dst[-1 + 0 * BPS]; |
294 | const int J = dst[-1 + 1 * BPS]; |
295 | const int K = dst[-1 + 2 * BPS]; |
296 | const int L = dst[-1 + 3 * BPS]; |
297 | const int X = dst[-1 - BPS]; |
298 | const int A = dst[0 - BPS]; |
299 | const int B = dst[1 - BPS]; |
300 | const int C = dst[2 - BPS]; |
301 | const int D = dst[3 - BPS]; |
302 | DST(0, 3) = AVG3(J, K, L); |
303 | DST(1, 3) = DST(0, 2) = AVG3(I, J, K); |
304 | DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J); |
305 | DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I); |
306 | DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X); |
307 | DST(3, 1) = DST(2, 0) = AVG3(C, B, A); |
308 | DST(3, 0) = AVG3(D, C, B); |
309 | } |
310 | |
311 | static void LD4_C(uint8_t* dst) { // Down-Left |
312 | const int A = dst[0 - BPS]; |
313 | const int B = dst[1 - BPS]; |
314 | const int C = dst[2 - BPS]; |
315 | const int D = dst[3 - BPS]; |
316 | const int E = dst[4 - BPS]; |
317 | const int F = dst[5 - BPS]; |
318 | const int G = dst[6 - BPS]; |
319 | const int H = dst[7 - BPS]; |
320 | DST(0, 0) = AVG3(A, B, C); |
321 | DST(1, 0) = DST(0, 1) = AVG3(B, C, D); |
322 | DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); |
323 | DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); |
324 | DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); |
325 | DST(3, 2) = DST(2, 3) = AVG3(F, G, H); |
326 | DST(3, 3) = AVG3(G, H, H); |
327 | } |
328 | #endif // !WEBP_NEON_OMIT_C_CODE |
329 | |
330 | static void VR4_C(uint8_t* dst) { // Vertical-Right |
331 | const int I = dst[-1 + 0 * BPS]; |
332 | const int J = dst[-1 + 1 * BPS]; |
333 | const int K = dst[-1 + 2 * BPS]; |
334 | const int X = dst[-1 - BPS]; |
335 | const int A = dst[0 - BPS]; |
336 | const int B = dst[1 - BPS]; |
337 | const int C = dst[2 - BPS]; |
338 | const int D = dst[3 - BPS]; |
339 | DST(0, 0) = DST(1, 2) = AVG2(X, A); |
340 | DST(1, 0) = DST(2, 2) = AVG2(A, B); |
341 | DST(2, 0) = DST(3, 2) = AVG2(B, C); |
342 | DST(3, 0) = AVG2(C, D); |
343 | |
344 | DST(0, 3) = AVG3(K, J, I); |
345 | DST(0, 2) = AVG3(J, I, X); |
346 | DST(0, 1) = DST(1, 3) = AVG3(I, X, A); |
347 | DST(1, 1) = DST(2, 3) = AVG3(X, A, B); |
348 | DST(2, 1) = DST(3, 3) = AVG3(A, B, C); |
349 | DST(3, 1) = AVG3(B, C, D); |
350 | } |
351 | |
352 | static void VL4_C(uint8_t* dst) { // Vertical-Left |
353 | const int A = dst[0 - BPS]; |
354 | const int B = dst[1 - BPS]; |
355 | const int C = dst[2 - BPS]; |
356 | const int D = dst[3 - BPS]; |
357 | const int E = dst[4 - BPS]; |
358 | const int F = dst[5 - BPS]; |
359 | const int G = dst[6 - BPS]; |
360 | const int H = dst[7 - BPS]; |
361 | DST(0, 0) = AVG2(A, B); |
362 | DST(1, 0) = DST(0, 2) = AVG2(B, C); |
363 | DST(2, 0) = DST(1, 2) = AVG2(C, D); |
364 | DST(3, 0) = DST(2, 2) = AVG2(D, E); |
365 | |
366 | DST(0, 1) = AVG3(A, B, C); |
367 | DST(1, 1) = DST(0, 3) = AVG3(B, C, D); |
368 | DST(2, 1) = DST(1, 3) = AVG3(C, D, E); |
369 | DST(3, 1) = DST(2, 3) = AVG3(D, E, F); |
370 | DST(3, 2) = AVG3(E, F, G); |
371 | DST(3, 3) = AVG3(F, G, H); |
372 | } |
373 | |
374 | static void HU4_C(uint8_t* dst) { // Horizontal-Up |
375 | const int I = dst[-1 + 0 * BPS]; |
376 | const int J = dst[-1 + 1 * BPS]; |
377 | const int K = dst[-1 + 2 * BPS]; |
378 | const int L = dst[-1 + 3 * BPS]; |
379 | DST(0, 0) = AVG2(I, J); |
380 | DST(2, 0) = DST(0, 1) = AVG2(J, K); |
381 | DST(2, 1) = DST(0, 2) = AVG2(K, L); |
382 | DST(1, 0) = AVG3(I, J, K); |
383 | DST(3, 0) = DST(1, 1) = AVG3(J, K, L); |
384 | DST(3, 1) = DST(1, 2) = AVG3(K, L, L); |
385 | DST(3, 2) = DST(2, 2) = |
386 | DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; |
387 | } |
388 | |
389 | static void HD4_C(uint8_t* dst) { // Horizontal-Down |
390 | const int I = dst[-1 + 0 * BPS]; |
391 | const int J = dst[-1 + 1 * BPS]; |
392 | const int K = dst[-1 + 2 * BPS]; |
393 | const int L = dst[-1 + 3 * BPS]; |
394 | const int X = dst[-1 - BPS]; |
395 | const int A = dst[0 - BPS]; |
396 | const int B = dst[1 - BPS]; |
397 | const int C = dst[2 - BPS]; |
398 | |
399 | DST(0, 0) = DST(2, 1) = AVG2(I, X); |
400 | DST(0, 1) = DST(2, 2) = AVG2(J, I); |
401 | DST(0, 2) = DST(2, 3) = AVG2(K, J); |
402 | DST(0, 3) = AVG2(L, K); |
403 | |
404 | DST(3, 0) = AVG3(A, B, C); |
405 | DST(2, 0) = AVG3(X, A, B); |
406 | DST(1, 0) = DST(3, 1) = AVG3(I, X, A); |
407 | DST(1, 1) = DST(3, 2) = AVG3(J, I, X); |
408 | DST(1, 2) = DST(3, 3) = AVG3(K, J, I); |
409 | DST(1, 3) = AVG3(L, K, J); |
410 | } |
411 | |
412 | #undef DST |
413 | #undef AVG3 |
414 | #undef AVG2 |
415 | |
416 | VP8PredFunc VP8PredLuma4[NUM_BMODES]; |
417 | |
418 | //------------------------------------------------------------------------------ |
419 | // Chroma |
420 | |
421 | #if !WEBP_NEON_OMIT_C_CODE |
422 | static void VE8uv_C(uint8_t* dst) { // vertical |
423 | int j; |
424 | for (j = 0; j < 8; ++j) { |
425 | memcpy(dst + j * BPS, dst - BPS, 8); |
426 | } |
427 | } |
428 | |
429 | static void HE8uv_C(uint8_t* dst) { // horizontal |
430 | int j; |
431 | for (j = 0; j < 8; ++j) { |
432 | memset(dst, dst[-1], 8); |
433 | dst += BPS; |
434 | } |
435 | } |
436 | |
437 | // helper for chroma-DC predictions |
438 | static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) { |
439 | int j; |
440 | for (j = 0; j < 8; ++j) { |
441 | memset(dst + j * BPS, value, 8); |
442 | } |
443 | } |
444 | |
445 | static void DC8uv_C(uint8_t* dst) { // DC |
446 | int dc0 = 8; |
447 | int i; |
448 | for (i = 0; i < 8; ++i) { |
449 | dc0 += dst[i - BPS] + dst[-1 + i * BPS]; |
450 | } |
451 | Put8x8uv(dc0 >> 4, dst); |
452 | } |
453 | |
454 | static void DC8uvNoLeft_C(uint8_t* dst) { // DC with no left samples |
455 | int dc0 = 4; |
456 | int i; |
457 | for (i = 0; i < 8; ++i) { |
458 | dc0 += dst[i - BPS]; |
459 | } |
460 | Put8x8uv(dc0 >> 3, dst); |
461 | } |
462 | |
463 | static void DC8uvNoTop_C(uint8_t* dst) { // DC with no top samples |
464 | int dc0 = 4; |
465 | int i; |
466 | for (i = 0; i < 8; ++i) { |
467 | dc0 += dst[-1 + i * BPS]; |
468 | } |
469 | Put8x8uv(dc0 >> 3, dst); |
470 | } |
471 | |
472 | static void DC8uvNoTopLeft_C(uint8_t* dst) { // DC with nothing |
473 | Put8x8uv(0x80, dst); |
474 | } |
475 | #endif // !WEBP_NEON_OMIT_C_CODE |
476 | |
477 | VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES]; |
478 | |
479 | //------------------------------------------------------------------------------ |
480 | // Edge filtering functions |
481 | |
482 | #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC |
483 | // 4 pixels in, 2 pixels out |
484 | static WEBP_INLINE void DoFilter2_C(uint8_t* p, int step) { |
485 | const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; |
486 | const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1]; // in [-893,892] |
487 | const int a1 = VP8ksclip2[(a + 4) >> 3]; // in [-16,15] |
488 | const int a2 = VP8ksclip2[(a + 3) >> 3]; |
489 | p[-step] = VP8kclip1[p0 + a2]; |
490 | p[ 0] = VP8kclip1[q0 - a1]; |
491 | } |
492 | |
493 | // 4 pixels in, 4 pixels out |
494 | static WEBP_INLINE void DoFilter4_C(uint8_t* p, int step) { |
495 | const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; |
496 | const int a = 3 * (q0 - p0); |
497 | const int a1 = VP8ksclip2[(a + 4) >> 3]; |
498 | const int a2 = VP8ksclip2[(a + 3) >> 3]; |
499 | const int a3 = (a1 + 1) >> 1; |
500 | p[-2*step] = VP8kclip1[p1 + a3]; |
501 | p[- step] = VP8kclip1[p0 + a2]; |
502 | p[ 0] = VP8kclip1[q0 - a1]; |
503 | p[ step] = VP8kclip1[q1 - a3]; |
504 | } |
505 | |
506 | // 6 pixels in, 6 pixels out |
507 | static WEBP_INLINE void DoFilter6_C(uint8_t* p, int step) { |
508 | const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step]; |
509 | const int q0 = p[0], q1 = p[step], q2 = p[2*step]; |
510 | const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]]; |
511 | // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9] |
512 | const int a1 = (27 * a + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7 |
513 | const int a2 = (18 * a + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7 |
514 | const int a3 = (9 * a + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7 |
515 | p[-3*step] = VP8kclip1[p2 + a3]; |
516 | p[-2*step] = VP8kclip1[p1 + a2]; |
517 | p[- step] = VP8kclip1[p0 + a1]; |
518 | p[ 0] = VP8kclip1[q0 - a1]; |
519 | p[ step] = VP8kclip1[q1 - a2]; |
520 | p[ 2*step] = VP8kclip1[q2 - a3]; |
521 | } |
522 | |
523 | static WEBP_INLINE int Hev(const uint8_t* p, int step, int thresh) { |
524 | const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step]; |
525 | return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh); |
526 | } |
527 | #endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC |
528 | |
529 | #if !WEBP_NEON_OMIT_C_CODE |
530 | static WEBP_INLINE int NeedsFilter_C(const uint8_t* p, int step, int t) { |
531 | const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step]; |
532 | return ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t); |
533 | } |
534 | #endif // !WEBP_NEON_OMIT_C_CODE |
535 | |
536 | #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC |
537 | static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p, |
538 | int step, int t, int it) { |
539 | const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step]; |
540 | const int p0 = p[-step], q0 = p[0]; |
541 | const int q1 = p[step], q2 = p[2 * step], q3 = p[3 * step]; |
542 | if ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) > t) return 0; |
543 | return VP8kabs0[p3 - p2] <= it && VP8kabs0[p2 - p1] <= it && |
544 | VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it && |
545 | VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it; |
546 | } |
547 | #endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC |
548 | |
549 | //------------------------------------------------------------------------------ |
550 | // Simple In-loop filtering (Paragraph 15.2) |
551 | |
552 | #if !WEBP_NEON_OMIT_C_CODE |
553 | static void SimpleVFilter16_C(uint8_t* p, int stride, int thresh) { |
554 | int i; |
555 | const int thresh2 = 2 * thresh + 1; |
556 | for (i = 0; i < 16; ++i) { |
557 | if (NeedsFilter_C(p + i, stride, thresh2)) { |
558 | DoFilter2_C(p + i, stride); |
559 | } |
560 | } |
561 | } |
562 | |
563 | static void SimpleHFilter16_C(uint8_t* p, int stride, int thresh) { |
564 | int i; |
565 | const int thresh2 = 2 * thresh + 1; |
566 | for (i = 0; i < 16; ++i) { |
567 | if (NeedsFilter_C(p + i * stride, 1, thresh2)) { |
568 | DoFilter2_C(p + i * stride, 1); |
569 | } |
570 | } |
571 | } |
572 | |
573 | static void SimpleVFilter16i_C(uint8_t* p, int stride, int thresh) { |
574 | int k; |
575 | for (k = 3; k > 0; --k) { |
576 | p += 4 * stride; |
577 | SimpleVFilter16_C(p, stride, thresh); |
578 | } |
579 | } |
580 | |
581 | static void SimpleHFilter16i_C(uint8_t* p, int stride, int thresh) { |
582 | int k; |
583 | for (k = 3; k > 0; --k) { |
584 | p += 4; |
585 | SimpleHFilter16_C(p, stride, thresh); |
586 | } |
587 | } |
588 | #endif // !WEBP_NEON_OMIT_C_CODE |
589 | |
590 | //------------------------------------------------------------------------------ |
591 | // Complex In-loop filtering (Paragraph 15.3) |
592 | |
593 | #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC |
594 | static WEBP_INLINE void FilterLoop26_C(uint8_t* p, |
595 | int hstride, int vstride, int size, |
596 | int thresh, int ithresh, |
597 | int hev_thresh) { |
598 | const int thresh2 = 2 * thresh + 1; |
599 | while (size-- > 0) { |
600 | if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) { |
601 | if (Hev(p, hstride, hev_thresh)) { |
602 | DoFilter2_C(p, hstride); |
603 | } else { |
604 | DoFilter6_C(p, hstride); |
605 | } |
606 | } |
607 | p += vstride; |
608 | } |
609 | } |
610 | |
611 | static WEBP_INLINE void FilterLoop24_C(uint8_t* p, |
612 | int hstride, int vstride, int size, |
613 | int thresh, int ithresh, |
614 | int hev_thresh) { |
615 | const int thresh2 = 2 * thresh + 1; |
616 | while (size-- > 0) { |
617 | if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) { |
618 | if (Hev(p, hstride, hev_thresh)) { |
619 | DoFilter2_C(p, hstride); |
620 | } else { |
621 | DoFilter4_C(p, hstride); |
622 | } |
623 | } |
624 | p += vstride; |
625 | } |
626 | } |
627 | #endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC |
628 | |
629 | #if !WEBP_NEON_OMIT_C_CODE |
630 | // on macroblock edges |
631 | static void VFilter16_C(uint8_t* p, int stride, |
632 | int thresh, int ithresh, int hev_thresh) { |
633 | FilterLoop26_C(p, stride, 1, 16, thresh, ithresh, hev_thresh); |
634 | } |
635 | |
636 | static void HFilter16_C(uint8_t* p, int stride, |
637 | int thresh, int ithresh, int hev_thresh) { |
638 | FilterLoop26_C(p, 1, stride, 16, thresh, ithresh, hev_thresh); |
639 | } |
640 | |
641 | // on three inner edges |
642 | static void VFilter16i_C(uint8_t* p, int stride, |
643 | int thresh, int ithresh, int hev_thresh) { |
644 | int k; |
645 | for (k = 3; k > 0; --k) { |
646 | p += 4 * stride; |
647 | FilterLoop24_C(p, stride, 1, 16, thresh, ithresh, hev_thresh); |
648 | } |
649 | } |
650 | #endif // !WEBP_NEON_OMIT_C_CODE |
651 | |
652 | #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC |
653 | static void HFilter16i_C(uint8_t* p, int stride, |
654 | int thresh, int ithresh, int hev_thresh) { |
655 | int k; |
656 | for (k = 3; k > 0; --k) { |
657 | p += 4; |
658 | FilterLoop24_C(p, 1, stride, 16, thresh, ithresh, hev_thresh); |
659 | } |
660 | } |
661 | #endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC |
662 | |
663 | #if !WEBP_NEON_OMIT_C_CODE |
664 | // 8-pixels wide variant, for chroma filtering |
665 | static void VFilter8_C(uint8_t* u, uint8_t* v, int stride, |
666 | int thresh, int ithresh, int hev_thresh) { |
667 | FilterLoop26_C(u, stride, 1, 8, thresh, ithresh, hev_thresh); |
668 | FilterLoop26_C(v, stride, 1, 8, thresh, ithresh, hev_thresh); |
669 | } |
670 | #endif // !WEBP_NEON_OMIT_C_CODE |
671 | |
672 | #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC |
673 | static void HFilter8_C(uint8_t* u, uint8_t* v, int stride, |
674 | int thresh, int ithresh, int hev_thresh) { |
675 | FilterLoop26_C(u, 1, stride, 8, thresh, ithresh, hev_thresh); |
676 | FilterLoop26_C(v, 1, stride, 8, thresh, ithresh, hev_thresh); |
677 | } |
678 | #endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC |
679 | |
680 | #if !WEBP_NEON_OMIT_C_CODE |
681 | static void VFilter8i_C(uint8_t* u, uint8_t* v, int stride, |
682 | int thresh, int ithresh, int hev_thresh) { |
683 | FilterLoop24_C(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh); |
684 | FilterLoop24_C(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh); |
685 | } |
686 | #endif // !WEBP_NEON_OMIT_C_CODE |
687 | |
688 | #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC |
689 | static void HFilter8i_C(uint8_t* u, uint8_t* v, int stride, |
690 | int thresh, int ithresh, int hev_thresh) { |
691 | FilterLoop24_C(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh); |
692 | FilterLoop24_C(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh); |
693 | } |
694 | #endif // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC |
695 | |
696 | //------------------------------------------------------------------------------ |
697 | |
698 | static void DitherCombine8x8_C(const uint8_t* dither, uint8_t* dst, |
699 | int dst_stride) { |
700 | int i, j; |
701 | for (j = 0; j < 8; ++j) { |
702 | for (i = 0; i < 8; ++i) { |
703 | const int delta0 = dither[i] - VP8_DITHER_AMP_CENTER; |
704 | const int delta1 = |
705 | (delta0 + VP8_DITHER_DESCALE_ROUNDER) >> VP8_DITHER_DESCALE; |
706 | dst[i] = clip_8b((int)dst[i] + delta1); |
707 | } |
708 | dst += dst_stride; |
709 | dither += 8; |
710 | } |
711 | } |
712 | |
713 | //------------------------------------------------------------------------------ |
714 | |
715 | VP8DecIdct2 VP8Transform; |
716 | VP8DecIdct VP8TransformAC3; |
717 | VP8DecIdct VP8TransformUV; |
718 | VP8DecIdct VP8TransformDC; |
719 | VP8DecIdct VP8TransformDCUV; |
720 | |
721 | VP8LumaFilterFunc VP8VFilter16; |
722 | VP8LumaFilterFunc VP8HFilter16; |
723 | VP8ChromaFilterFunc VP8VFilter8; |
724 | VP8ChromaFilterFunc VP8HFilter8; |
725 | VP8LumaFilterFunc VP8VFilter16i; |
726 | VP8LumaFilterFunc VP8HFilter16i; |
727 | VP8ChromaFilterFunc VP8VFilter8i; |
728 | VP8ChromaFilterFunc VP8HFilter8i; |
729 | VP8SimpleFilterFunc VP8SimpleVFilter16; |
730 | VP8SimpleFilterFunc VP8SimpleHFilter16; |
731 | VP8SimpleFilterFunc VP8SimpleVFilter16i; |
732 | VP8SimpleFilterFunc VP8SimpleHFilter16i; |
733 | |
734 | void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst, |
735 | int dst_stride); |
736 | |
737 | extern void VP8DspInitSSE2(void); |
738 | extern void VP8DspInitSSE41(void); |
739 | extern void VP8DspInitNEON(void); |
740 | extern void VP8DspInitMIPS32(void); |
741 | extern void VP8DspInitMIPSdspR2(void); |
742 | extern void VP8DspInitMSA(void); |
743 | |
744 | WEBP_DSP_INIT_FUNC(VP8DspInit) { |
745 | VP8InitClipTables(); |
746 | |
747 | #if !WEBP_NEON_OMIT_C_CODE |
748 | VP8TransformWHT = TransformWHT_C; |
749 | VP8Transform = TransformTwo_C; |
750 | VP8TransformDC = TransformDC_C; |
751 | VP8TransformAC3 = TransformAC3_C; |
752 | #endif |
753 | VP8TransformUV = TransformUV_C; |
754 | VP8TransformDCUV = TransformDCUV_C; |
755 | |
756 | #if !WEBP_NEON_OMIT_C_CODE |
757 | VP8VFilter16 = VFilter16_C; |
758 | VP8VFilter16i = VFilter16i_C; |
759 | VP8HFilter16 = HFilter16_C; |
760 | VP8VFilter8 = VFilter8_C; |
761 | VP8VFilter8i = VFilter8i_C; |
762 | VP8SimpleVFilter16 = SimpleVFilter16_C; |
763 | VP8SimpleHFilter16 = SimpleHFilter16_C; |
764 | VP8SimpleVFilter16i = SimpleVFilter16i_C; |
765 | VP8SimpleHFilter16i = SimpleHFilter16i_C; |
766 | #endif |
767 | |
768 | #if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC |
769 | VP8HFilter16i = HFilter16i_C; |
770 | VP8HFilter8 = HFilter8_C; |
771 | VP8HFilter8i = HFilter8i_C; |
772 | #endif |
773 | |
774 | #if !WEBP_NEON_OMIT_C_CODE |
775 | VP8PredLuma4[0] = DC4_C; |
776 | VP8PredLuma4[1] = TM4_C; |
777 | VP8PredLuma4[2] = VE4_C; |
778 | VP8PredLuma4[4] = RD4_C; |
779 | VP8PredLuma4[6] = LD4_C; |
780 | #endif |
781 | |
782 | VP8PredLuma4[3] = HE4_C; |
783 | VP8PredLuma4[5] = VR4_C; |
784 | VP8PredLuma4[7] = VL4_C; |
785 | VP8PredLuma4[8] = HD4_C; |
786 | VP8PredLuma4[9] = HU4_C; |
787 | |
788 | #if !WEBP_NEON_OMIT_C_CODE |
789 | VP8PredLuma16[0] = DC16_C; |
790 | VP8PredLuma16[1] = TM16_C; |
791 | VP8PredLuma16[2] = VE16_C; |
792 | VP8PredLuma16[3] = HE16_C; |
793 | VP8PredLuma16[4] = DC16NoTop_C; |
794 | VP8PredLuma16[5] = DC16NoLeft_C; |
795 | VP8PredLuma16[6] = DC16NoTopLeft_C; |
796 | |
797 | VP8PredChroma8[0] = DC8uv_C; |
798 | VP8PredChroma8[1] = TM8uv_C; |
799 | VP8PredChroma8[2] = VE8uv_C; |
800 | VP8PredChroma8[3] = HE8uv_C; |
801 | VP8PredChroma8[4] = DC8uvNoTop_C; |
802 | VP8PredChroma8[5] = DC8uvNoLeft_C; |
803 | VP8PredChroma8[6] = DC8uvNoTopLeft_C; |
804 | #endif |
805 | |
806 | VP8DitherCombine8x8 = DitherCombine8x8_C; |
807 | |
808 | // If defined, use CPUInfo() to overwrite some pointers with faster versions. |
809 | if (VP8GetCPUInfo != NULL) { |
810 | #if defined(WEBP_USE_SSE2) |
811 | if (VP8GetCPUInfo(kSSE2)) { |
812 | VP8DspInitSSE2(); |
813 | #if defined(WEBP_USE_SSE41) |
814 | if (VP8GetCPUInfo(kSSE4_1)) { |
815 | VP8DspInitSSE41(); |
816 | } |
817 | #endif |
818 | } |
819 | #endif |
820 | #if defined(WEBP_USE_MIPS32) |
821 | if (VP8GetCPUInfo(kMIPS32)) { |
822 | VP8DspInitMIPS32(); |
823 | } |
824 | #endif |
825 | #if defined(WEBP_USE_MIPS_DSP_R2) |
826 | if (VP8GetCPUInfo(kMIPSdspR2)) { |
827 | VP8DspInitMIPSdspR2(); |
828 | } |
829 | #endif |
830 | #if defined(WEBP_USE_MSA) |
831 | if (VP8GetCPUInfo(kMSA)) { |
832 | VP8DspInitMSA(); |
833 | } |
834 | #endif |
835 | } |
836 | |
837 | #if defined(WEBP_USE_NEON) |
838 | if (WEBP_NEON_OMIT_C_CODE || |
839 | (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { |
840 | VP8DspInitNEON(); |
841 | } |
842 | #endif |
843 | |
844 | assert(VP8TransformWHT != NULL); |
845 | assert(VP8Transform != NULL); |
846 | assert(VP8TransformDC != NULL); |
847 | assert(VP8TransformAC3 != NULL); |
848 | assert(VP8TransformUV != NULL); |
849 | assert(VP8TransformDCUV != NULL); |
850 | assert(VP8VFilter16 != NULL); |
851 | assert(VP8HFilter16 != NULL); |
852 | assert(VP8VFilter8 != NULL); |
853 | assert(VP8HFilter8 != NULL); |
854 | assert(VP8VFilter16i != NULL); |
855 | assert(VP8HFilter16i != NULL); |
856 | assert(VP8VFilter8i != NULL); |
857 | assert(VP8HFilter8i != NULL); |
858 | assert(VP8SimpleVFilter16 != NULL); |
859 | assert(VP8SimpleHFilter16 != NULL); |
860 | assert(VP8SimpleVFilter16i != NULL); |
861 | assert(VP8SimpleHFilter16i != NULL); |
862 | assert(VP8PredLuma4[0] != NULL); |
863 | assert(VP8PredLuma4[1] != NULL); |
864 | assert(VP8PredLuma4[2] != NULL); |
865 | assert(VP8PredLuma4[3] != NULL); |
866 | assert(VP8PredLuma4[4] != NULL); |
867 | assert(VP8PredLuma4[5] != NULL); |
868 | assert(VP8PredLuma4[6] != NULL); |
869 | assert(VP8PredLuma4[7] != NULL); |
870 | assert(VP8PredLuma4[8] != NULL); |
871 | assert(VP8PredLuma4[9] != NULL); |
872 | assert(VP8PredLuma16[0] != NULL); |
873 | assert(VP8PredLuma16[1] != NULL); |
874 | assert(VP8PredLuma16[2] != NULL); |
875 | assert(VP8PredLuma16[3] != NULL); |
876 | assert(VP8PredLuma16[4] != NULL); |
877 | assert(VP8PredLuma16[5] != NULL); |
878 | assert(VP8PredLuma16[6] != NULL); |
879 | assert(VP8PredChroma8[0] != NULL); |
880 | assert(VP8PredChroma8[1] != NULL); |
881 | assert(VP8PredChroma8[2] != NULL); |
882 | assert(VP8PredChroma8[3] != NULL); |
883 | assert(VP8PredChroma8[4] != NULL); |
884 | assert(VP8PredChroma8[5] != NULL); |
885 | assert(VP8PredChroma8[6] != NULL); |
886 | assert(VP8DitherCombine8x8 != NULL); |
887 | } |
888 | |