dec_mips32.c source code [engine/third_party/libwebp/src/dsp/dec_mips32.c]

1	// Copyright 2014 Google Inc. All Rights Reserved.
2	//
3	// Use of this source code is governed by a BSD-style license
4	// that can be found in the COPYING file in the root of the source
5	// tree. An additional intellectual property rights grant can be found
6	// in the file PATENTS. All contributing project authors may
7	// be found in the AUTHORS file in the root of the source tree.
8	// -----------------------------------------------------------------------------
9	//
10	// MIPS version of dsp functions
11	//
12	// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
13	// Jovan Zelincevic (jovan.zelincevic@imgtec.com)
14
15	#include "./dsp.h"
16
17	#if defined(WEBP_USE_MIPS32)
18
19	#include "./mips_macro.h"
20
21	static const int kC1 = `20091` + (`1` << `16`);
22	static const int kC2 = `35468`;
23
24	static WEBP_INLINE int abs_mips32(int x) {
25	const int sign = x >> `31`;
26	return (x ^ sign) - sign;
27	}
28
29	// 4 pixels in, 2 pixels out
30	static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
31	const int p1 = p[-`2` * step], p0 = p[-step], q0 = p[`0`], q1 = p[step];
32	const int a = `3` * (q0 - p0) + VP8ksclip1[p1 - q1];
33	const int a1 = VP8ksclip2[(a + `4`) >> `3`];
34	const int a2 = VP8ksclip2[(a + `3`) >> `3`];
35	p[-step] = VP8kclip1[p0 + a2];
36	p[ `0`] = VP8kclip1[q0 - a1];
37	}
38
39	// 4 pixels in, 4 pixels out
40	static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
41	const int p1 = p[-`2` * step], p0 = p[-step], q0 = p[`0`], q1 = p[step];
42	const int a = `3` * (q0 - p0);
43	const int a1 = VP8ksclip2[(a + `4`) >> `3`];
44	const int a2 = VP8ksclip2[(a + `3`) >> `3`];
45	const int a3 = (a1 + `1`) >> `1`;
46	p[-`2` * step] = VP8kclip1[p1 + a3];
47	p[- step] = VP8kclip1[p0 + a2];
48	p[ `0`] = VP8kclip1[q0 - a1];
49	p[ step] = VP8kclip1[q1 - a3];
50	}
51
52	// 6 pixels in, 6 pixels out
53	static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
54	const int p2 = p[-`3` * step], p1 = p[-`2` * step], p0 = p[-step];
55	const int q0 = p[`0`], q1 = p[step], q2 = p[`2` * step];
56	const int a = VP8ksclip1[`3` * (q0 - p0) + VP8ksclip1[p1 - q1]];
57	// a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
58	const int a1 = (`27` * a + `63`) >> `7`; // eq. to ((3 a + 7) * 9) >> 7*
59	const int a2 = (`18` * a + `63`) >> `7`; // eq. to ((2 a + 7) * 9) >> 7*
60	const int a3 = (`9` * a + `63`) >> `7`; // eq. to ((1 a + 7) * 9) >> 7*
61	p[-`3` * step] = VP8kclip1[p2 + a3];
62	p[-`2` * step] = VP8kclip1[p1 + a2];
63	p[- step] = VP8kclip1[p0 + a1];
64	p[ `0`] = VP8kclip1[q0 - a1];
65	p[ step] = VP8kclip1[q1 - a2];
66	p[ `2` * step] = VP8kclip1[q2 - a3];
67	}
68
69	static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
70	const int p1 = p[-`2` * step], p0 = p[-step], q0 = p[`0`], q1 = p[step];
71	return (abs_mips32(p1 - p0) > thresh) \|\| (abs_mips32(q1 - q0) > thresh);
72	}
73
74	static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
75	const int p1 = p[-`2` * step], p0 = p[-step], q0 = p[`0`], q1 = p[step];
76	return ((`4` * abs_mips32(p0 - q0) + abs_mips32(p1 - q1)) <= t);
77	}
78
79	static WEBP_INLINE int needs_filter2(const uint8_t* p,
80	int step, int t, int it) {
81	const int p3 = p[-`4` * step], p2 = p[-`3` * step];
82	const int p1 = p[-`2` * step], p0 = p[-step];
83	const int q0 = p[`0`], q1 = p[step], q2 = p[`2` * step], q3 = p[`3` * step];
84	if ((`4` * abs_mips32(p0 - q0) + abs_mips32(p1 - q1)) > t) {
85	return `0`;
86	}
87	return abs_mips32(p3 - p2) <= it && abs_mips32(p2 - p1) <= it &&
88	abs_mips32(p1 - p0) <= it && abs_mips32(q3 - q2) <= it &&
89	abs_mips32(q2 - q1) <= it && abs_mips32(q1 - q0) <= it;
90	}
91
92	static WEBP_INLINE void FilterLoop26(uint8_t* p,
93	int hstride, int vstride, int size,
94	int thresh, int ithresh, int hev_thresh) {
95	const int thresh2 = `2` * thresh + `1`;
96	while (size-- > `0`) {
97	if (needs_filter2(p, hstride, thresh2, ithresh)) {
98	if (hev(p, hstride, hev_thresh)) {
99	do_filter2(p, hstride);
100	} else {
101	do_filter6(p, hstride);
102	}
103	}
104	p += vstride;
105	}
106	}
107
108	static WEBP_INLINE void FilterLoop24(uint8_t* p,
109	int hstride, int vstride, int size,
110	int thresh, int ithresh, int hev_thresh) {
111	const int thresh2 = `2` * thresh + `1`;
112	while (size-- > `0`) {
113	if (needs_filter2(p, hstride, thresh2, ithresh)) {
114	if (hev(p, hstride, hev_thresh)) {
115	do_filter2(p, hstride);
116	} else {
117	do_filter4(p, hstride);
118	}
119	}
120	p += vstride;
121	}
122	}
123
124	// on macroblock edges
125	static void VFilter16(uint8_t* p, int stride,
126	int thresh, int ithresh, int hev_thresh) {
127	FilterLoop26(p, stride, `1`, `16`, thresh, ithresh, hev_thresh);
128	}
129
130	static void HFilter16(uint8_t* p, int stride,
131	int thresh, int ithresh, int hev_thresh) {
132	FilterLoop26(p, `1`, stride, `16`, thresh, ithresh, hev_thresh);
133	}
134
135	// 8-pixels wide variant, for chroma filtering
136	static void VFilter8(uint8_t* u, uint8_t* v, int stride,
137	int thresh, int ithresh, int hev_thresh) {
138	FilterLoop26(u, stride, `1`, `8`, thresh, ithresh, hev_thresh);
139	FilterLoop26(v, stride, `1`, `8`, thresh, ithresh, hev_thresh);
140	}
141
142	static void HFilter8(uint8_t* u, uint8_t* v, int stride,
143	int thresh, int ithresh, int hev_thresh) {
144	FilterLoop26(u, `1`, stride, `8`, thresh, ithresh, hev_thresh);
145	FilterLoop26(v, `1`, stride, `8`, thresh, ithresh, hev_thresh);
146	}
147
148	static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
149	int thresh, int ithresh, int hev_thresh) {
150	FilterLoop24(u + `4` * stride, stride, `1`, `8`, thresh, ithresh, hev_thresh);
151	FilterLoop24(v + `4` * stride, stride, `1`, `8`, thresh, ithresh, hev_thresh);
152	}
153
154	static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
155	int thresh, int ithresh, int hev_thresh) {
156	FilterLoop24(u + `4`, `1`, stride, `8`, thresh, ithresh, hev_thresh);
157	FilterLoop24(v + `4`, `1`, stride, `8`, thresh, ithresh, hev_thresh);
158	}
159
160	// on three inner edges
161	static void VFilter16i(uint8_t* p, int stride,
162	int thresh, int ithresh, int hev_thresh) {
163	int k;
164	for (k = `3`; k > `0`; --k) {
165	p += `4` * stride;
166	FilterLoop24(p, stride, `1`, `16`, thresh, ithresh, hev_thresh);
167	}
168	}
169
170	static void HFilter16i(uint8_t* p, int stride,
171	int thresh, int ithresh, int hev_thresh) {
172	int k;
173	for (k = `3`; k > `0`; --k) {
174	p += `4`;
175	FilterLoop24(p, `1`, stride, `16`, thresh, ithresh, hev_thresh);
176	}
177	}
178
179	//------------------------------------------------------------------------------
180	// Simple In-loop filtering (Paragraph 15.2)
181
182	static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
183	int i;
184	const int thresh2 = `2` * thresh + `1`;
185	for (i = `0`; i < `16`; ++i) {
186	if (needs_filter(p + i, stride, thresh2)) {
187	do_filter2(p + i, stride);
188	}
189	}
190	}
191
192	static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
193	int i;
194	const int thresh2 = `2` * thresh + `1`;
195	for (i = `0`; i < `16`; ++i) {
196	if (needs_filter(p + i * stride, `1`, thresh2)) {
197	do_filter2(p + i * stride, `1`);
198	}
199	}
200	}
201
202	static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
203	int k;
204	for (k = `3`; k > `0`; --k) {
205	p += `4` * stride;
206	SimpleVFilter16(p, stride, thresh);
207	}
208	}
209
210	static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
211	int k;
212	for (k = `3`; k > `0`; --k) {
213	p += `4`;
214	SimpleHFilter16(p, stride, thresh);
215	}
216	}
217
218	static void TransformOne(const int16_t* in, uint8_t* dst) {
219	int temp0, temp1, temp2, temp3, temp4;
220	int temp5, temp6, temp7, temp8, temp9;
221	int temp10, temp11, temp12, temp13, temp14;
222	int temp15, temp16, temp17, temp18;
223	int16_t* p_in = (int16_t*)in;
224
225	// loops unrolled and merged to avoid usage of tmp buffer
226	// and to reduce number of stalls. MUL macro is written
227	// in assembler and inlined
228	__asm__ volatile(
229	"lh %[temp0], 0(%[in]) \n\t"
230	"lh %[temp8], 16(%[in]) \n\t"
231	"lh %[temp4], 8(%[in]) \n\t"
232	"lh %[temp12], 24(%[in]) \n\t"
233	"addu %[temp16], %[temp0], %[temp8] \n\t"
234	"subu %[temp0], %[temp0], %[temp8] \n\t"
235	"mul %[temp8], %[temp4], %[kC2] \n\t"
236	"mul %[temp17], %[temp12], %[kC1] \n\t"
237	"mul %[temp4], %[temp4], %[kC1] \n\t"
238	"mul %[temp12], %[temp12], %[kC2] \n\t"
239	"lh %[temp1], 2(%[in]) \n\t"
240	"lh %[temp5], 10(%[in]) \n\t"
241	"lh %[temp9], 18(%[in]) \n\t"
242	"lh %[temp13], 26(%[in]) \n\t"
243	"sra %[temp8], %[temp8], 16 \n\t"
244	"sra %[temp17], %[temp17], 16 \n\t"
245	"sra %[temp4], %[temp4], 16 \n\t"
246	"sra %[temp12], %[temp12], 16 \n\t"
247	"lh %[temp2], 4(%[in]) \n\t"
248	"lh %[temp6], 12(%[in]) \n\t"
249	"lh %[temp10], 20(%[in]) \n\t"
250	"lh %[temp14], 28(%[in]) \n\t"
251	"subu %[temp17], %[temp8], %[temp17] \n\t"
252	"addu %[temp4], %[temp4], %[temp12] \n\t"
253	"addu %[temp8], %[temp16], %[temp4] \n\t"
254	"subu %[temp4], %[temp16], %[temp4] \n\t"
255	"addu %[temp16], %[temp1], %[temp9] \n\t"
256	"subu %[temp1], %[temp1], %[temp9] \n\t"
257	"lh %[temp3], 6(%[in]) \n\t"
258	"lh %[temp7], 14(%[in]) \n\t"
259	"lh %[temp11], 22(%[in]) \n\t"
260	"lh %[temp15], 30(%[in]) \n\t"
261	"addu %[temp12], %[temp0], %[temp17] \n\t"
262	"subu %[temp0], %[temp0], %[temp17] \n\t"
263	"mul %[temp9], %[temp5], %[kC2] \n\t"
264	"mul %[temp17], %[temp13], %[kC1] \n\t"
265	"mul %[temp5], %[temp5], %[kC1] \n\t"
266	"mul %[temp13], %[temp13], %[kC2] \n\t"
267	"sra %[temp9], %[temp9], 16 \n\t"
268	"sra %[temp17], %[temp17], 16 \n\t"
269	"subu %[temp17], %[temp9], %[temp17] \n\t"
270	"sra %[temp5], %[temp5], 16 \n\t"
271	"sra %[temp13], %[temp13], 16 \n\t"
272	"addu %[temp5], %[temp5], %[temp13] \n\t"
273	"addu %[temp13], %[temp1], %[temp17] \n\t"
274	"subu %[temp1], %[temp1], %[temp17] \n\t"
275	"mul %[temp17], %[temp14], %[kC1] \n\t"
276	"mul %[temp14], %[temp14], %[kC2] \n\t"
277	"addu %[temp9], %[temp16], %[temp5] \n\t"
278	"subu %[temp5], %[temp16], %[temp5] \n\t"
279	"addu %[temp16], %[temp2], %[temp10] \n\t"
280	"subu %[temp2], %[temp2], %[temp10] \n\t"
281	"mul %[temp10], %[temp6], %[kC2] \n\t"
282	"mul %[temp6], %[temp6], %[kC1] \n\t"
283	"sra %[temp17], %[temp17], 16 \n\t"
284	"sra %[temp14], %[temp14], 16 \n\t"
285	"sra %[temp10], %[temp10], 16 \n\t"
286	"sra %[temp6], %[temp6], 16 \n\t"
287	"subu %[temp17], %[temp10], %[temp17] \n\t"
288	"addu %[temp6], %[temp6], %[temp14] \n\t"
289	"addu %[temp10], %[temp16], %[temp6] \n\t"
290	"subu %[temp6], %[temp16], %[temp6] \n\t"
291	"addu %[temp14], %[temp2], %[temp17] \n\t"
292	"subu %[temp2], %[temp2], %[temp17] \n\t"
293	"mul %[temp17], %[temp15], %[kC1] \n\t"
294	"mul %[temp15], %[temp15], %[kC2] \n\t"
295	"addu %[temp16], %[temp3], %[temp11] \n\t"
296	"subu %[temp3], %[temp3], %[temp11] \n\t"
297	"mul %[temp11], %[temp7], %[kC2] \n\t"
298	"mul %[temp7], %[temp7], %[kC1] \n\t"
299	"addiu %[temp8], %[temp8], 4 \n\t"
300	"addiu %[temp12], %[temp12], 4 \n\t"
301	"addiu %[temp0], %[temp0], 4 \n\t"
302	"addiu %[temp4], %[temp4], 4 \n\t"
303	"sra %[temp17], %[temp17], 16 \n\t"
304	"sra %[temp15], %[temp15], 16 \n\t"
305	"sra %[temp11], %[temp11], 16 \n\t"
306	"sra %[temp7], %[temp7], 16 \n\t"
307	"subu %[temp17], %[temp11], %[temp17] \n\t"
308	"addu %[temp7], %[temp7], %[temp15] \n\t"
309	"addu %[temp15], %[temp3], %[temp17] \n\t"
310	"subu %[temp3], %[temp3], %[temp17] \n\t"
311	"addu %[temp11], %[temp16], %[temp7] \n\t"
312	"subu %[temp7], %[temp16], %[temp7] \n\t"
313	"addu %[temp16], %[temp8], %[temp10] \n\t"
314	"subu %[temp8], %[temp8], %[temp10] \n\t"
315	"mul %[temp10], %[temp9], %[kC2] \n\t"
316	"mul %[temp17], %[temp11], %[kC1] \n\t"
317	"mul %[temp9], %[temp9], %[kC1] \n\t"
318	"mul %[temp11], %[temp11], %[kC2] \n\t"
319	"sra %[temp10], %[temp10], 16 \n\t"
320	"sra %[temp17], %[temp17], 16 \n\t"
321	"sra %[temp9], %[temp9], 16 \n\t"
322	"sra %[temp11], %[temp11], 16 \n\t"
323	"subu %[temp17], %[temp10], %[temp17] \n\t"
324	"addu %[temp11], %[temp9], %[temp11] \n\t"
325	"addu %[temp10], %[temp12], %[temp14] \n\t"
326	"subu %[temp12], %[temp12], %[temp14] \n\t"
327	"mul %[temp14], %[temp13], %[kC2] \n\t"
328	"mul %[temp9], %[temp15], %[kC1] \n\t"
329	"mul %[temp13], %[temp13], %[kC1] \n\t"
330	"mul %[temp15], %[temp15], %[kC2] \n\t"
331	"sra %[temp14], %[temp14], 16 \n\t"
332	"sra %[temp9], %[temp9], 16 \n\t"
333	"sra %[temp13], %[temp13], 16 \n\t"
334	"sra %[temp15], %[temp15], 16 \n\t"
335	"subu %[temp9], %[temp14], %[temp9] \n\t"
336	"addu %[temp15], %[temp13], %[temp15] \n\t"
337	"addu %[temp14], %[temp0], %[temp2] \n\t"
338	"subu %[temp0], %[temp0], %[temp2] \n\t"
339	"mul %[temp2], %[temp1], %[kC2] \n\t"
340	"mul %[temp13], %[temp3], %[kC1] \n\t"
341	"mul %[temp1], %[temp1], %[kC1] \n\t"
342	"mul %[temp3], %[temp3], %[kC2] \n\t"
343	"sra %[temp2], %[temp2], 16 \n\t"
344	"sra %[temp13], %[temp13], 16 \n\t"
345	"sra %[temp1], %[temp1], 16 \n\t"
346	"sra %[temp3], %[temp3], 16 \n\t"
347	"subu %[temp13], %[temp2], %[temp13] \n\t"
348	"addu %[temp3], %[temp1], %[temp3] \n\t"
349	"addu %[temp2], %[temp4], %[temp6] \n\t"
350	"subu %[temp4], %[temp4], %[temp6] \n\t"
351	"mul %[temp6], %[temp5], %[kC2] \n\t"
352	"mul %[temp1], %[temp7], %[kC1] \n\t"
353	"mul %[temp5], %[temp5], %[kC1] \n\t"
354	"mul %[temp7], %[temp7], %[kC2] \n\t"
355	"sra %[temp6], %[temp6], 16 \n\t"
356	"sra %[temp1], %[temp1], 16 \n\t"
357	"sra %[temp5], %[temp5], 16 \n\t"
358	"sra %[temp7], %[temp7], 16 \n\t"
359	"subu %[temp1], %[temp6], %[temp1] \n\t"
360	"addu %[temp7], %[temp5], %[temp7] \n\t"
361	"addu %[temp5], %[temp16], %[temp11] \n\t"
362	"subu %[temp16], %[temp16], %[temp11] \n\t"
363	"addu %[temp11], %[temp8], %[temp17] \n\t"
364	"subu %[temp8], %[temp8], %[temp17] \n\t"
365	"sra %[temp5], %[temp5], 3 \n\t"
366	"sra %[temp16], %[temp16], 3 \n\t"
367	"sra %[temp11], %[temp11], 3 \n\t"
368	"sra %[temp8], %[temp8], 3 \n\t"
369	"addu %[temp17], %[temp10], %[temp15] \n\t"
370	"subu %[temp10], %[temp10], %[temp15] \n\t"
371	"addu %[temp15], %[temp12], %[temp9] \n\t"
372	"subu %[temp12], %[temp12], %[temp9] \n\t"
373	"sra %[temp17], %[temp17], 3 \n\t"
374	"sra %[temp10], %[temp10], 3 \n\t"
375	"sra %[temp15], %[temp15], 3 \n\t"
376	"sra %[temp12], %[temp12], 3 \n\t"
377	"addu %[temp9], %[temp14], %[temp3] \n\t"
378	"subu %[temp14], %[temp14], %[temp3] \n\t"
379	"addu %[temp3], %[temp0], %[temp13] \n\t"
380	"subu %[temp0], %[temp0], %[temp13] \n\t"
381	"sra %[temp9], %[temp9], 3 \n\t"
382	"sra %[temp14], %[temp14], 3 \n\t"
383	"sra %[temp3], %[temp3], 3 \n\t"
384	"sra %[temp0], %[temp0], 3 \n\t"
385	"addu %[temp13], %[temp2], %[temp7] \n\t"
386	"subu %[temp2], %[temp2], %[temp7] \n\t"
387	"addu %[temp7], %[temp4], %[temp1] \n\t"
388	"subu %[temp4], %[temp4], %[temp1] \n\t"
389	"sra %[temp13], %[temp13], 3 \n\t"
390	"sra %[temp2], %[temp2], 3 \n\t"
391	"sra %[temp7], %[temp7], 3 \n\t"
392	"sra %[temp4], %[temp4], 3 \n\t"
393	"addiu %[temp6], $zero, 255 \n\t"
394	"lbu %[temp1], 0+0*" XSTR(BPS) "(%[dst]) \n\t"
395	"addu %[temp1], %[temp1], %[temp5] \n\t"
396	"sra %[temp5], %[temp1], 8 \n\t"
397	"sra %[temp18], %[temp1], 31 \n\t"
398	"beqz %[temp5], 1f \n\t"
399	"xor %[temp1], %[temp1], %[temp1] \n\t"
400	"movz %[temp1], %[temp6], %[temp18] \n\t"
401	"1: \n\t"
402	"lbu %[temp18], 1+0*" XSTR(BPS) "(%[dst]) \n\t"
403	"sb %[temp1], 0+0*" XSTR(BPS) "(%[dst]) \n\t"
404	"addu %[temp18], %[temp18], %[temp11] \n\t"
405	"sra %[temp11], %[temp18], 8 \n\t"
406	"sra %[temp1], %[temp18], 31 \n\t"
407	"beqz %[temp11], 2f \n\t"
408	"xor %[temp18], %[temp18], %[temp18] \n\t"
409	"movz %[temp18], %[temp6], %[temp1] \n\t"
410	"2: \n\t"
411	"lbu %[temp1], 2+0*" XSTR(BPS) "(%[dst]) \n\t"
412	"sb %[temp18], 1+0*" XSTR(BPS) "(%[dst]) \n\t"
413	"addu %[temp1], %[temp1], %[temp8] \n\t"
414	"sra %[temp8], %[temp1], 8 \n\t"
415	"sra %[temp18], %[temp1], 31 \n\t"
416	"beqz %[temp8], 3f \n\t"
417	"xor %[temp1], %[temp1], %[temp1] \n\t"
418	"movz %[temp1], %[temp6], %[temp18] \n\t"
419	"3: \n\t"
420	"lbu %[temp18], 3+0*" XSTR(BPS) "(%[dst]) \n\t"
421	"sb %[temp1], 2+0*" XSTR(BPS) "(%[dst]) \n\t"
422	"addu %[temp18], %[temp18], %[temp16] \n\t"
423	"sra %[temp16], %[temp18], 8 \n\t"
424	"sra %[temp1], %[temp18], 31 \n\t"
425	"beqz %[temp16], 4f \n\t"
426	"xor %[temp18], %[temp18], %[temp18] \n\t"
427	"movz %[temp18], %[temp6], %[temp1] \n\t"
428	"4: \n\t"
429	"sb %[temp18], 3+0*" XSTR(BPS) "(%[dst]) \n\t"
430	"lbu %[temp5], 0+1*" XSTR(BPS) "(%[dst]) \n\t"
431	"lbu %[temp8], 1+1*" XSTR(BPS) "(%[dst]) \n\t"
432	"lbu %[temp11], 2+1*" XSTR(BPS) "(%[dst]) \n\t"
433	"lbu %[temp16], 3+1*" XSTR(BPS) "(%[dst]) \n\t"
434	"addu %[temp5], %[temp5], %[temp17] \n\t"
435	"addu %[temp8], %[temp8], %[temp15] \n\t"
436	"addu %[temp11], %[temp11], %[temp12] \n\t"
437	"addu %[temp16], %[temp16], %[temp10] \n\t"
438	"sra %[temp18], %[temp5], 8 \n\t"
439	"sra %[temp1], %[temp5], 31 \n\t"
440	"beqz %[temp18], 5f \n\t"
441	"xor %[temp5], %[temp5], %[temp5] \n\t"
442	"movz %[temp5], %[temp6], %[temp1] \n\t"
443	"5: \n\t"
444	"sra %[temp18], %[temp8], 8 \n\t"
445	"sra %[temp1], %[temp8], 31 \n\t"
446	"beqz %[temp18], 6f \n\t"
447	"xor %[temp8], %[temp8], %[temp8] \n\t"
448	"movz %[temp8], %[temp6], %[temp1] \n\t"
449	"6: \n\t"
450	"sra %[temp18], %[temp11], 8 \n\t"
451	"sra %[temp1], %[temp11], 31 \n\t"
452	"sra %[temp17], %[temp16], 8 \n\t"
453	"sra %[temp15], %[temp16], 31 \n\t"
454	"beqz %[temp18], 7f \n\t"
455	"xor %[temp11], %[temp11], %[temp11] \n\t"
456	"movz %[temp11], %[temp6], %[temp1] \n\t"
457	"7: \n\t"
458	"beqz %[temp17], 8f \n\t"
459	"xor %[temp16], %[temp16], %[temp16] \n\t"
460	"movz %[temp16], %[temp6], %[temp15] \n\t"
461	"8: \n\t"
462	"sb %[temp5], 0+1*" XSTR(BPS) "(%[dst]) \n\t"
463	"sb %[temp8], 1+1*" XSTR(BPS) "(%[dst]) \n\t"
464	"sb %[temp11], 2+1*" XSTR(BPS) "(%[dst]) \n\t"
465	"sb %[temp16], 3+1*" XSTR(BPS) "(%[dst]) \n\t"
466	"lbu %[temp5], 0+2*" XSTR(BPS) "(%[dst]) \n\t"
467	"lbu %[temp8], 1+2*" XSTR(BPS) "(%[dst]) \n\t"
468	"lbu %[temp11], 2+2*" XSTR(BPS) "(%[dst]) \n\t"
469	"lbu %[temp16], 3+2*" XSTR(BPS) "(%[dst]) \n\t"
470	"addu %[temp5], %[temp5], %[temp9] \n\t"
471	"addu %[temp8], %[temp8], %[temp3] \n\t"
472	"addu %[temp11], %[temp11], %[temp0] \n\t"
473	"addu %[temp16], %[temp16], %[temp14] \n\t"
474	"sra %[temp18], %[temp5], 8 \n\t"
475	"sra %[temp1], %[temp5], 31 \n\t"
476	"sra %[temp17], %[temp8], 8 \n\t"
477	"sra %[temp15], %[temp8], 31 \n\t"
478	"sra %[temp12], %[temp11], 8 \n\t"
479	"sra %[temp10], %[temp11], 31 \n\t"
480	"sra %[temp9], %[temp16], 8 \n\t"
481	"sra %[temp3], %[temp16], 31 \n\t"
482	"beqz %[temp18], 9f \n\t"
483	"xor %[temp5], %[temp5], %[temp5] \n\t"
484	"movz %[temp5], %[temp6], %[temp1] \n\t"
485	"9: \n\t"
486	"beqz %[temp17], 10f \n\t"
487	"xor %[temp8], %[temp8], %[temp8] \n\t"
488	"movz %[temp8], %[temp6], %[temp15] \n\t"
489	"10: \n\t"
490	"beqz %[temp12], 11f \n\t"
491	"xor %[temp11], %[temp11], %[temp11] \n\t"
492	"movz %[temp11], %[temp6], %[temp10] \n\t"
493	"11: \n\t"
494	"beqz %[temp9], 12f \n\t"
495	"xor %[temp16], %[temp16], %[temp16] \n\t"
496	"movz %[temp16], %[temp6], %[temp3] \n\t"
497	"12: \n\t"
498	"sb %[temp5], 0+2*" XSTR(BPS) "(%[dst]) \n\t"
499	"sb %[temp8], 1+2*" XSTR(BPS) "(%[dst]) \n\t"
500	"sb %[temp11], 2+2*" XSTR(BPS) "(%[dst]) \n\t"
501	"sb %[temp16], 3+2*" XSTR(BPS) "(%[dst]) \n\t"
502	"lbu %[temp5], 0+3*" XSTR(BPS) "(%[dst]) \n\t"
503	"lbu %[temp8], 1+3*" XSTR(BPS) "(%[dst]) \n\t"
504	"lbu %[temp11], 2+3*" XSTR(BPS) "(%[dst]) \n\t"
505	"lbu %[temp16], 3+3*" XSTR(BPS) "(%[dst]) \n\t"
506	"addu %[temp5], %[temp5], %[temp13] \n\t"
507	"addu %[temp8], %[temp8], %[temp7] \n\t"
508	"addu %[temp11], %[temp11], %[temp4] \n\t"
509	"addu %[temp16], %[temp16], %[temp2] \n\t"
510	"sra %[temp18], %[temp5], 8 \n\t"
511	"sra %[temp1], %[temp5], 31 \n\t"
512	"sra %[temp17], %[temp8], 8 \n\t"
513	"sra %[temp15], %[temp8], 31 \n\t"
514	"sra %[temp12], %[temp11], 8 \n\t"
515	"sra %[temp10], %[temp11], 31 \n\t"
516	"sra %[temp9], %[temp16], 8 \n\t"
517	"sra %[temp3], %[temp16], 31 \n\t"
518	"beqz %[temp18], 13f \n\t"
519	"xor %[temp5], %[temp5], %[temp5] \n\t"
520	"movz %[temp5], %[temp6], %[temp1] \n\t"
521	"13: \n\t"
522	"beqz %[temp17], 14f \n\t"
523	"xor %[temp8], %[temp8], %[temp8] \n\t"
524	"movz %[temp8], %[temp6], %[temp15] \n\t"
525	"14: \n\t"
526	"beqz %[temp12], 15f \n\t"
527	"xor %[temp11], %[temp11], %[temp11] \n\t"
528	"movz %[temp11], %[temp6], %[temp10] \n\t"
529	"15: \n\t"
530	"beqz %[temp9], 16f \n\t"
531	"xor %[temp16], %[temp16], %[temp16] \n\t"
532	"movz %[temp16], %[temp6], %[temp3] \n\t"
533	"16: \n\t"
534	"sb %[temp5], 0+3*" XSTR(BPS) "(%[dst]) \n\t"
535	"sb %[temp8], 1+3*" XSTR(BPS) "(%[dst]) \n\t"
536	"sb %[temp11], 2+3*" XSTR(BPS) "(%[dst]) \n\t"
537	"sb %[temp16], 3+3*" XSTR(BPS) "(%[dst]) \n\t"
538
539	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
540	[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
541	[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
542	[temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
543	[temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
544	[temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
545	[temp18]"=&r"(temp18)
546	: [in]"r"(p_in), [kC1]"r"(kC1), [kC2]"r"(kC2), [dst]"r"(dst)
547	: "memory", "hi", "lo"
548	);
549	}
550
551	static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
552	TransformOne(in, dst);
553	if (do_two) {
554	TransformOne(in + `16`, dst + `4`);
555	}
556	}
557
558	//------------------------------------------------------------------------------
559	// Entry point
560
561	extern void VP8DspInitMIPS32(void);
562
563	WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitMIPS32(void) {
564	VP8InitClipTables();
565
566	VP8Transform = TransformTwo;
567
568	VP8VFilter16 = VFilter16;
569	VP8HFilter16 = HFilter16;
570	VP8VFilter8 = VFilter8;
571	VP8HFilter8 = HFilter8;
572	VP8VFilter16i = VFilter16i;
573	VP8HFilter16i = HFilter16i;
574	VP8VFilter8i = VFilter8i;
575	VP8HFilter8i = HFilter8i;
576
577	VP8SimpleVFilter16 = SimpleVFilter16;
578	VP8SimpleHFilter16 = SimpleHFilter16;
579	VP8SimpleVFilter16i = SimpleVFilter16i;
580	VP8SimpleHFilter16i = SimpleHFilter16i;
581	}
582
583	#else // !WEBP_USE_MIPS32
584
585	WEBP_DSP_INIT_STUB(VP8DspInitMIPS32)
586
587	#endif // WEBP_USE_MIPS32
588

Browse the source code of engine/third_party/libwebp/src/dsp/dec_mips32.c