rescaler_mips_dsp_r2.c source code [engine/third_party/libwebp/src/dsp/rescaler_mips_dsp_r2.c]

1	// Copyright 2014 Google Inc. All Rights Reserved.
2	//
3	// Use of this source code is governed by a BSD-style license
4	// that can be found in the COPYING file in the root of the source
5	// tree. An additional intellectual property rights grant can be found
6	// in the file PATENTS. All contributing project authors may
7	// be found in the AUTHORS file in the root of the source tree.
8	// -----------------------------------------------------------------------------
9	//
10	// MIPS version of rescaling functions
11	//
12	// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
13
14	#include "./dsp.h"
15
16	#if defined(WEBP_USE_MIPS_DSP_R2)
17
18	#include <assert.h>
19	#include "../utils/rescaler_utils.h"
20
21	#define ROUNDER (WEBP_RESCALER_ONE >> 1)
22	#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
23
24	//------------------------------------------------------------------------------
25	// Row export
26
27	static void ExportRowShrink(WebPRescaler* const wrk) {
28	int i;
29	const int x_out_max = wrk->dst_width * wrk->num_channels;
30	uint8_t* dst = wrk->dst;
31	rescaler_t* irow = wrk->irow;
32	const rescaler_t* frow = wrk->frow;
33	const int yscale = wrk->fy_scale * (-wrk->y_accum);
34	int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
35	const int temp7 = (int)wrk->fxy_scale;
36	const int temp6 = (x_out_max & ~`0x3`) << `2`;
37	assert(!WebPRescalerOutputDone(wrk));
38	assert(wrk->y_accum <= `0`);
39	assert(!wrk->y_expand);
40	assert(wrk->fxy_scale != `0`);
41	if (yscale) {
42	if (x_out_max >= `4`) {
43	int temp8, temp9, temp10, temp11;
44	__asm__ volatile (
45	"li %[temp3], 0x10000 \n\t"
46	"li %[temp4], 0x8000 \n\t"
47	"addu %[loop_end], %[frow], %[temp6] \n\t"
48	"1: \n\t"
49	"lw %[temp0], 0(%[frow]) \n\t"
50	"lw %[temp1], 4(%[frow]) \n\t"
51	"lw %[temp2], 8(%[frow]) \n\t"
52	"lw %[temp5], 12(%[frow]) \n\t"
53	"mult $ac0, %[temp3], %[temp4] \n\t"
54	"maddu $ac0, %[temp0], %[yscale] \n\t"
55	"mult $ac1, %[temp3], %[temp4] \n\t"
56	"maddu $ac1, %[temp1], %[yscale] \n\t"
57	"mult $ac2, %[temp3], %[temp4] \n\t"
58	"maddu $ac2, %[temp2], %[yscale] \n\t"
59	"mult $ac3, %[temp3], %[temp4] \n\t"
60	"maddu $ac3, %[temp5], %[yscale] \n\t"
61	"addiu %[frow], %[frow], 16 \n\t"
62	"mfhi %[temp0], $ac0 \n\t"
63	"mfhi %[temp1], $ac1 \n\t"
64	"mfhi %[temp2], $ac2 \n\t"
65	"mfhi %[temp5], $ac3 \n\t"
66	"lw %[temp8], 0(%[irow]) \n\t"
67	"lw %[temp9], 4(%[irow]) \n\t"
68	"lw %[temp10], 8(%[irow]) \n\t"
69	"lw %[temp11], 12(%[irow]) \n\t"
70	"addiu %[dst], %[dst], 4 \n\t"
71	"addiu %[irow], %[irow], 16 \n\t"
72	"subu %[temp8], %[temp8], %[temp0] \n\t"
73	"subu %[temp9], %[temp9], %[temp1] \n\t"
74	"subu %[temp10], %[temp10], %[temp2] \n\t"
75	"subu %[temp11], %[temp11], %[temp5] \n\t"
76	"mult $ac0, %[temp3], %[temp4] \n\t"
77	"maddu $ac0, %[temp8], %[temp7] \n\t"
78	"mult $ac1, %[temp3], %[temp4] \n\t"
79	"maddu $ac1, %[temp9], %[temp7] \n\t"
80	"mult $ac2, %[temp3], %[temp4] \n\t"
81	"maddu $ac2, %[temp10], %[temp7] \n\t"
82	"mult $ac3, %[temp3], %[temp4] \n\t"
83	"maddu $ac3, %[temp11], %[temp7] \n\t"
84	"mfhi %[temp8], $ac0 \n\t"
85	"mfhi %[temp9], $ac1 \n\t"
86	"mfhi %[temp10], $ac2 \n\t"
87	"mfhi %[temp11], $ac3 \n\t"
88	"sw %[temp0], -16(%[irow]) \n\t"
89	"sw %[temp1], -12(%[irow]) \n\t"
90	"sw %[temp2], -8(%[irow]) \n\t"
91	"sw %[temp5], -4(%[irow]) \n\t"
92	"sb %[temp8], -4(%[dst]) \n\t"
93	"sb %[temp9], -3(%[dst]) \n\t"
94	"sb %[temp10], -2(%[dst]) \n\t"
95	"sb %[temp11], -1(%[dst]) \n\t"
96	"bne %[frow], %[loop_end], 1b \n\t"
97	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
98	[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
99	[irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
100	[temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
101	[temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
102	: [temp7]"r"(temp7), [yscale]"r"(yscale), [temp6]"r"(temp6)
103	: "memory", "hi", "lo", "$ac1hi", "$ac1lo",
104	"$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
105	);
106	}
107	for (i = `0`; i < (x_out_max & `0x3`); ++i) {
108	const uint32_t frac = (uint32_t)MULT_FIX(*frow++, yscale);
109	const int v = (int)MULT_FIX(*irow - frac, wrk->fxy_scale);
110	assert(v >= `0` && v <= `255`);
111	*dst++ = v;
112	irow++ = frac; // new fractional start*
113	}
114	} else {
115	if (x_out_max >= `4`) {
116	__asm__ volatile (
117	"li %[temp3], 0x10000 \n\t"
118	"li %[temp4], 0x8000 \n\t"
119	"addu %[loop_end], %[irow], %[temp6] \n\t"
120	"1: \n\t"
121	"lw %[temp0], 0(%[irow]) \n\t"
122	"lw %[temp1], 4(%[irow]) \n\t"
123	"lw %[temp2], 8(%[irow]) \n\t"
124	"lw %[temp5], 12(%[irow]) \n\t"
125	"addiu %[dst], %[dst], 4 \n\t"
126	"addiu %[irow], %[irow], 16 \n\t"
127	"mult $ac0, %[temp3], %[temp4] \n\t"
128	"maddu $ac0, %[temp0], %[temp7] \n\t"
129	"mult $ac1, %[temp3], %[temp4] \n\t"
130	"maddu $ac1, %[temp1], %[temp7] \n\t"
131	"mult $ac2, %[temp3], %[temp4] \n\t"
132	"maddu $ac2, %[temp2], %[temp7] \n\t"
133	"mult $ac3, %[temp3], %[temp4] \n\t"
134	"maddu $ac3, %[temp5], %[temp7] \n\t"
135	"mfhi %[temp0], $ac0 \n\t"
136	"mfhi %[temp1], $ac1 \n\t"
137	"mfhi %[temp2], $ac2 \n\t"
138	"mfhi %[temp5], $ac3 \n\t"
139	"sw $zero, -16(%[irow]) \n\t"
140	"sw $zero, -12(%[irow]) \n\t"
141	"sw $zero, -8(%[irow]) \n\t"
142	"sw $zero, -4(%[irow]) \n\t"
143	"sb %[temp0], -4(%[dst]) \n\t"
144	"sb %[temp1], -3(%[dst]) \n\t"
145	"sb %[temp2], -2(%[dst]) \n\t"
146	"sb %[temp5], -1(%[dst]) \n\t"
147	"bne %[irow], %[loop_end], 1b \n\t"
148	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
149	[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow),
150	[dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
151	: [temp7]"r"(temp7), [temp6]"r"(temp6)
152	: "memory", "hi", "lo", "$ac1hi", "$ac1lo",
153	"$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
154	);
155	}
156	for (i = `0`; i < (x_out_max & `0x3`); ++i) {
157	const int v = (int)MULT_FIX(*irow, wrk->fxy_scale);
158	assert(v >= `0` && v <= `255`);
159	*dst++ = v;
160	*irow++ = `0`;
161	}
162	}
163	}
164
165	static void ExportRowExpand(WebPRescaler* const wrk) {
166	int i;
167	uint8_t* dst = wrk->dst;
168	rescaler_t* irow = wrk->irow;
169	const int x_out_max = wrk->dst_width * wrk->num_channels;
170	const rescaler_t* frow = wrk->frow;
171	int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
172	const int temp6 = (x_out_max & ~`0x3`) << `2`;
173	const int temp7 = (int)wrk->fy_scale;
174	assert(!WebPRescalerOutputDone(wrk));
175	assert(wrk->y_accum <= `0`);
176	assert(wrk->y_expand);
177	assert(wrk->y_sub != `0`);
178	if (wrk->y_accum == `0`) {
179	if (x_out_max >= `4`) {
180	__asm__ volatile (
181	"li %[temp4], 0x10000 \n\t"
182	"li %[temp5], 0x8000 \n\t"
183	"addu %[loop_end], %[frow], %[temp6] \n\t"
184	"1: \n\t"
185	"lw %[temp0], 0(%[frow]) \n\t"
186	"lw %[temp1], 4(%[frow]) \n\t"
187	"lw %[temp2], 8(%[frow]) \n\t"
188	"lw %[temp3], 12(%[frow]) \n\t"
189	"addiu %[dst], %[dst], 4 \n\t"
190	"addiu %[frow], %[frow], 16 \n\t"
191	"mult $ac0, %[temp4], %[temp5] \n\t"
192	"maddu $ac0, %[temp0], %[temp7] \n\t"
193	"mult $ac1, %[temp4], %[temp5] \n\t"
194	"maddu $ac1, %[temp1], %[temp7] \n\t"
195	"mult $ac2, %[temp4], %[temp5] \n\t"
196	"maddu $ac2, %[temp2], %[temp7] \n\t"
197	"mult $ac3, %[temp4], %[temp5] \n\t"
198	"maddu $ac3, %[temp3], %[temp7] \n\t"
199	"mfhi %[temp0], $ac0 \n\t"
200	"mfhi %[temp1], $ac1 \n\t"
201	"mfhi %[temp2], $ac2 \n\t"
202	"mfhi %[temp3], $ac3 \n\t"
203	"sb %[temp0], -4(%[dst]) \n\t"
204	"sb %[temp1], -3(%[dst]) \n\t"
205	"sb %[temp2], -2(%[dst]) \n\t"
206	"sb %[temp3], -1(%[dst]) \n\t"
207	"bne %[frow], %[loop_end], 1b \n\t"
208	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
209	[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
210	[dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
211	: [temp7]"r"(temp7), [temp6]"r"(temp6)
212	: "memory", "hi", "lo", "$ac1hi", "$ac1lo",
213	"$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
214	);
215	}
216	for (i = `0`; i < (x_out_max & `0x3`); ++i) {
217	const uint32_t J = *frow++;
218	const int v = (int)MULT_FIX(J, wrk->fy_scale);
219	assert(v >= `0` && v <= `255`);
220	*dst++ = v;
221	}
222	} else {
223	const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
224	const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
225	if (x_out_max >= `4`) {
226	int temp8, temp9, temp10, temp11;
227	__asm__ volatile (
228	"li %[temp8], 0x10000 \n\t"
229	"li %[temp9], 0x8000 \n\t"
230	"addu %[loop_end], %[frow], %[temp6] \n\t"
231	"1: \n\t"
232	"lw %[temp0], 0(%[frow]) \n\t"
233	"lw %[temp1], 4(%[frow]) \n\t"
234	"lw %[temp2], 8(%[frow]) \n\t"
235	"lw %[temp3], 12(%[frow]) \n\t"
236	"lw %[temp4], 0(%[irow]) \n\t"
237	"lw %[temp5], 4(%[irow]) \n\t"
238	"lw %[temp10], 8(%[irow]) \n\t"
239	"lw %[temp11], 12(%[irow]) \n\t"
240	"addiu %[dst], %[dst], 4 \n\t"
241	"mult $ac0, %[temp8], %[temp9] \n\t"
242	"maddu $ac0, %[A], %[temp0] \n\t"
243	"maddu $ac0, %[B], %[temp4] \n\t"
244	"mult $ac1, %[temp8], %[temp9] \n\t"
245	"maddu $ac1, %[A], %[temp1] \n\t"
246	"maddu $ac1, %[B], %[temp5] \n\t"
247	"mult $ac2, %[temp8], %[temp9] \n\t"
248	"maddu $ac2, %[A], %[temp2] \n\t"
249	"maddu $ac2, %[B], %[temp10] \n\t"
250	"mult $ac3, %[temp8], %[temp9] \n\t"
251	"maddu $ac3, %[A], %[temp3] \n\t"
252	"maddu $ac3, %[B], %[temp11] \n\t"
253	"addiu %[frow], %[frow], 16 \n\t"
254	"addiu %[irow], %[irow], 16 \n\t"
255	"mfhi %[temp0], $ac0 \n\t"
256	"mfhi %[temp1], $ac1 \n\t"
257	"mfhi %[temp2], $ac2 \n\t"
258	"mfhi %[temp3], $ac3 \n\t"
259	"mult $ac0, %[temp8], %[temp9] \n\t"
260	"maddu $ac0, %[temp0], %[temp7] \n\t"
261	"mult $ac1, %[temp8], %[temp9] \n\t"
262	"maddu $ac1, %[temp1], %[temp7] \n\t"
263	"mult $ac2, %[temp8], %[temp9] \n\t"
264	"maddu $ac2, %[temp2], %[temp7] \n\t"
265	"mult $ac3, %[temp8], %[temp9] \n\t"
266	"maddu $ac3, %[temp3], %[temp7] \n\t"
267	"mfhi %[temp0], $ac0 \n\t"
268	"mfhi %[temp1], $ac1 \n\t"
269	"mfhi %[temp2], $ac2 \n\t"
270	"mfhi %[temp3], $ac3 \n\t"
271	"sb %[temp0], -4(%[dst]) \n\t"
272	"sb %[temp1], -3(%[dst]) \n\t"
273	"sb %[temp2], -2(%[dst]) \n\t"
274	"sb %[temp3], -1(%[dst]) \n\t"
275	"bne %[frow], %[loop_end], 1b \n\t"
276	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
277	[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
278	[irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
279	[temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
280	[temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
281	: [temp7]"r"(temp7), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B)
282	: "memory", "hi", "lo", "$ac1hi", "$ac1lo",
283	"$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
284	);
285	}
286	for (i = `0`; i < (x_out_max & `0x3`); ++i) {
287	const uint64_t I = (uint64_t)A * *frow++
288	+ (uint64_t)B * *irow++;
289	const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
290	const int v = (int)MULT_FIX(J, wrk->fy_scale);
291	assert(v >= `0` && v <= `255`);
292	*dst++ = v;
293	}
294	}
295	}
296
297	#undef MULT_FIX
298	#undef ROUNDER
299
300	//------------------------------------------------------------------------------
301	// Entry point
302
303	extern void WebPRescalerDspInitMIPSdspR2(void);
304
305	WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPSdspR2(void) {
306	WebPRescalerExportRowExpand = ExportRowExpand;
307	WebPRescalerExportRowShrink = ExportRowShrink;
308	}
309
310	#else // !WEBP_USE_MIPS_DSP_R2
311
312	WEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPSdspR2)
313
314	#endif // WEBP_USE_MIPS_DSP_R2
315

Browse the source code of engine/third_party/libwebp/src/dsp/rescaler_mips_dsp_r2.c