rescaler_mips_dsp_r2.c source code [Skia/third_party/externals/libwebp/src/dsp/rescaler_mips_dsp_r2.c]

1	// Copyright 2014 Google Inc. All Rights Reserved.
2	//
3	// Use of this source code is governed by a BSD-style license
4	// that can be found in the COPYING file in the root of the source
5	// tree. An additional intellectual property rights grant can be found
6	// in the file PATENTS. All contributing project authors may
7	// be found in the AUTHORS file in the root of the source tree.
8	// -----------------------------------------------------------------------------
9	//
10	// MIPS version of rescaling functions
11	//
12	// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
13
14	#include "src/dsp/dsp.h"
15
16	#if defined(WEBP_USE_MIPS_DSP_R2) && !defined(WEBP_REDUCE_SIZE)
17
18	#include <assert.h>
19	#include "src/utils/rescaler_utils.h"
20
21	#define ROUNDER (WEBP_RESCALER_ONE >> 1)
22	#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
23	#define MULT_FIX_FLOOR(x, y) (((uint64_t)(x) * (y)) >> WEBP_RESCALER_RFIX)
24
25	//------------------------------------------------------------------------------
26	// Row export
27
28	#if 0 // disabled for now. TODO(skal): make match the C-code
29	static void ExportRowShrink_MIPSdspR2(WebPRescaler* const wrk) {
30	int i;
31	const int x_out_max = wrk->dst_width * wrk->num_channels;
32	uint8_t* dst = wrk->dst;
33	rescaler_t* irow = wrk->irow;
34	const rescaler_t* frow = wrk->frow;
35	const int yscale = wrk->fy_scale * (-wrk->y_accum);
36	int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
37	const int temp7 = (int)wrk->fxy_scale;
38	const int temp6 = (x_out_max & ~`0x3`) << `2`;
39	assert(!WebPRescalerOutputDone(wrk));
40	assert(wrk->y_accum <= `0`);
41	assert(!wrk->y_expand);
42	assert(wrk->fxy_scale != `0`);
43	if (yscale) {
44	if (x_out_max >= `4`) {
45	int temp8, temp9, temp10, temp11;
46	__asm__ volatile (
47	"li %[temp3], 0x10000 \n\t"
48	"li %[temp4], 0x8000 \n\t"
49	"addu %[loop_end], %[frow], %[temp6] \n\t"
50	"1: \n\t"
51	"lw %[temp0], 0(%[frow]) \n\t"
52	"lw %[temp1], 4(%[frow]) \n\t"
53	"lw %[temp2], 8(%[frow]) \n\t"
54	"lw %[temp5], 12(%[frow]) \n\t"
55	"mult $ac0, %[temp3], %[temp4] \n\t"
56	"maddu $ac0, %[temp0], %[yscale] \n\t"
57	"mult $ac1, %[temp3], %[temp4] \n\t"
58	"maddu $ac1, %[temp1], %[yscale] \n\t"
59	"mult $ac2, %[temp3], %[temp4] \n\t"
60	"maddu $ac2, %[temp2], %[yscale] \n\t"
61	"mult $ac3, %[temp3], %[temp4] \n\t"
62	"maddu $ac3, %[temp5], %[yscale] \n\t"
63	"addiu %[frow], %[frow], 16 \n\t"
64	"mfhi %[temp0], $ac0 \n\t"
65	"mfhi %[temp1], $ac1 \n\t"
66	"mfhi %[temp2], $ac2 \n\t"
67	"mfhi %[temp5], $ac3 \n\t"
68	"lw %[temp8], 0(%[irow]) \n\t"
69	"lw %[temp9], 4(%[irow]) \n\t"
70	"lw %[temp10], 8(%[irow]) \n\t"
71	"lw %[temp11], 12(%[irow]) \n\t"
72	"addiu %[dst], %[dst], 4 \n\t"
73	"addiu %[irow], %[irow], 16 \n\t"
74	"subu %[temp8], %[temp8], %[temp0] \n\t"
75	"subu %[temp9], %[temp9], %[temp1] \n\t"
76	"subu %[temp10], %[temp10], %[temp2] \n\t"
77	"subu %[temp11], %[temp11], %[temp5] \n\t"
78	"mult $ac0, %[temp3], %[temp4] \n\t"
79	"maddu $ac0, %[temp8], %[temp7] \n\t"
80	"mult $ac1, %[temp3], %[temp4] \n\t"
81	"maddu $ac1, %[temp9], %[temp7] \n\t"
82	"mult $ac2, %[temp3], %[temp4] \n\t"
83	"maddu $ac2, %[temp10], %[temp7] \n\t"
84	"mult $ac3, %[temp3], %[temp4] \n\t"
85	"maddu $ac3, %[temp11], %[temp7] \n\t"
86	"mfhi %[temp8], $ac0 \n\t"
87	"mfhi %[temp9], $ac1 \n\t"
88	"mfhi %[temp10], $ac2 \n\t"
89	"mfhi %[temp11], $ac3 \n\t"
90	"sw %[temp0], -16(%[irow]) \n\t"
91	"sw %[temp1], -12(%[irow]) \n\t"
92	"sw %[temp2], -8(%[irow]) \n\t"
93	"sw %[temp5], -4(%[irow]) \n\t"
94	"sb %[temp8], -4(%[dst]) \n\t"
95	"sb %[temp9], -3(%[dst]) \n\t"
96	"sb %[temp10], -2(%[dst]) \n\t"
97	"sb %[temp11], -1(%[dst]) \n\t"
98	"bne %[frow], %[loop_end], 1b \n\t"
99	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
100	[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
101	[irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
102	[temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
103	[temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
104	: [temp7]"r"(temp7), [yscale]"r"(yscale), [temp6]"r"(temp6)
105	: "memory", "hi", "lo", "$ac1hi", "$ac1lo",
106	"$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
107	);
108	}
109	for (i = `0`; i < (x_out_max & `0x3`); ++i) {
110	const uint32_t frac = (uint32_t)MULT_FIX_FLOOR(*frow++, yscale);
111	const int v = (int)MULT_FIX(*irow - frac, wrk->fxy_scale);
112	*dst++ = (v > `255`) ? `255u` : (uint8_t)v;
113	irow++ = frac; // new fractional start*
114	}
115	} else {
116	if (x_out_max >= `4`) {
117	__asm__ volatile (
118	"li %[temp3], 0x10000 \n\t"
119	"li %[temp4], 0x8000 \n\t"
120	"addu %[loop_end], %[irow], %[temp6] \n\t"
121	"1: \n\t"
122	"lw %[temp0], 0(%[irow]) \n\t"
123	"lw %[temp1], 4(%[irow]) \n\t"
124	"lw %[temp2], 8(%[irow]) \n\t"
125	"lw %[temp5], 12(%[irow]) \n\t"
126	"addiu %[dst], %[dst], 4 \n\t"
127	"addiu %[irow], %[irow], 16 \n\t"
128	"mult $ac0, %[temp3], %[temp4] \n\t"
129	"maddu $ac0, %[temp0], %[temp7] \n\t"
130	"mult $ac1, %[temp3], %[temp4] \n\t"
131	"maddu $ac1, %[temp1], %[temp7] \n\t"
132	"mult $ac2, %[temp3], %[temp4] \n\t"
133	"maddu $ac2, %[temp2], %[temp7] \n\t"
134	"mult $ac3, %[temp3], %[temp4] \n\t"
135	"maddu $ac3, %[temp5], %[temp7] \n\t"
136	"mfhi %[temp0], $ac0 \n\t"
137	"mfhi %[temp1], $ac1 \n\t"
138	"mfhi %[temp2], $ac2 \n\t"
139	"mfhi %[temp5], $ac3 \n\t"
140	"sw $zero, -16(%[irow]) \n\t"
141	"sw $zero, -12(%[irow]) \n\t"
142	"sw $zero, -8(%[irow]) \n\t"
143	"sw $zero, -4(%[irow]) \n\t"
144	"sb %[temp0], -4(%[dst]) \n\t"
145	"sb %[temp1], -3(%[dst]) \n\t"
146	"sb %[temp2], -2(%[dst]) \n\t"
147	"sb %[temp5], -1(%[dst]) \n\t"
148	"bne %[irow], %[loop_end], 1b \n\t"
149	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
150	[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow),
151	[dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
152	: [temp7]"r"(temp7), [temp6]"r"(temp6)
153	: "memory", "hi", "lo", "$ac1hi", "$ac1lo",
154	"$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
155	);
156	}
157	for (i = `0`; i < (x_out_max & `0x3`); ++i) {
158	const int v = (int)MULT_FIX_FLOOR(*irow, wrk->fxy_scale);
159	*dst++ = (v > `255`) ? `255u` : (uint8_t)v;
160	*irow++ = `0`;
161	}
162	}
163	}
164	#endif // 0
165
166	static void ExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) {
167	int i;
168	uint8_t* dst = wrk->dst;
169	rescaler_t* irow = wrk->irow;
170	const int x_out_max = wrk->dst_width * wrk->num_channels;
171	const rescaler_t* frow = wrk->frow;
172	int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
173	const int temp6 = (x_out_max & ~`0x3`) << `2`;
174	const int temp7 = (int)wrk->fy_scale;
175	assert(!WebPRescalerOutputDone(wrk));
176	assert(wrk->y_accum <= `0`);
177	assert(wrk->y_expand);
178	assert(wrk->y_sub != `0`);
179	if (wrk->y_accum == `0`) {
180	if (x_out_max >= `4`) {
181	__asm__ volatile (
182	"li %[temp4], 0x10000 \n\t"
183	"li %[temp5], 0x8000 \n\t"
184	"addu %[loop_end], %[frow], %[temp6] \n\t"
185	"1: \n\t"
186	"lw %[temp0], 0(%[frow]) \n\t"
187	"lw %[temp1], 4(%[frow]) \n\t"
188	"lw %[temp2], 8(%[frow]) \n\t"
189	"lw %[temp3], 12(%[frow]) \n\t"
190	"addiu %[dst], %[dst], 4 \n\t"
191	"addiu %[frow], %[frow], 16 \n\t"
192	"mult $ac0, %[temp4], %[temp5] \n\t"
193	"maddu $ac0, %[temp0], %[temp7] \n\t"
194	"mult $ac1, %[temp4], %[temp5] \n\t"
195	"maddu $ac1, %[temp1], %[temp7] \n\t"
196	"mult $ac2, %[temp4], %[temp5] \n\t"
197	"maddu $ac2, %[temp2], %[temp7] \n\t"
198	"mult $ac3, %[temp4], %[temp5] \n\t"
199	"maddu $ac3, %[temp3], %[temp7] \n\t"
200	"mfhi %[temp0], $ac0 \n\t"
201	"mfhi %[temp1], $ac1 \n\t"
202	"mfhi %[temp2], $ac2 \n\t"
203	"mfhi %[temp3], $ac3 \n\t"
204	"sb %[temp0], -4(%[dst]) \n\t"
205	"sb %[temp1], -3(%[dst]) \n\t"
206	"sb %[temp2], -2(%[dst]) \n\t"
207	"sb %[temp3], -1(%[dst]) \n\t"
208	"bne %[frow], %[loop_end], 1b \n\t"
209	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
210	[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
211	[dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
212	: [temp7]"r"(temp7), [temp6]"r"(temp6)
213	: "memory", "hi", "lo", "$ac1hi", "$ac1lo",
214	"$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
215	);
216	}
217	for (i = `0`; i < (x_out_max & `0x3`); ++i) {
218	const uint32_t J = *frow++;
219	const int v = (int)MULT_FIX(J, wrk->fy_scale);
220	*dst++ = (v > `255`) ? `255u` : (uint8_t)v;
221	}
222	} else {
223	const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
224	const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
225	if (x_out_max >= `4`) {
226	int temp8, temp9, temp10, temp11;
227	__asm__ volatile (
228	"li %[temp8], 0x10000 \n\t"
229	"li %[temp9], 0x8000 \n\t"
230	"addu %[loop_end], %[frow], %[temp6] \n\t"
231	"1: \n\t"
232	"lw %[temp0], 0(%[frow]) \n\t"
233	"lw %[temp1], 4(%[frow]) \n\t"
234	"lw %[temp2], 8(%[frow]) \n\t"
235	"lw %[temp3], 12(%[frow]) \n\t"
236	"lw %[temp4], 0(%[irow]) \n\t"
237	"lw %[temp5], 4(%[irow]) \n\t"
238	"lw %[temp10], 8(%[irow]) \n\t"
239	"lw %[temp11], 12(%[irow]) \n\t"
240	"addiu %[dst], %[dst], 4 \n\t"
241	"mult $ac0, %[temp8], %[temp9] \n\t"
242	"maddu $ac0, %[A], %[temp0] \n\t"
243	"maddu $ac0, %[B], %[temp4] \n\t"
244	"mult $ac1, %[temp8], %[temp9] \n\t"
245	"maddu $ac1, %[A], %[temp1] \n\t"
246	"maddu $ac1, %[B], %[temp5] \n\t"
247	"mult $ac2, %[temp8], %[temp9] \n\t"
248	"maddu $ac2, %[A], %[temp2] \n\t"
249	"maddu $ac2, %[B], %[temp10] \n\t"
250	"mult $ac3, %[temp8], %[temp9] \n\t"
251	"maddu $ac3, %[A], %[temp3] \n\t"
252	"maddu $ac3, %[B], %[temp11] \n\t"
253	"addiu %[frow], %[frow], 16 \n\t"
254	"addiu %[irow], %[irow], 16 \n\t"
255	"mfhi %[temp0], $ac0 \n\t"
256	"mfhi %[temp1], $ac1 \n\t"
257	"mfhi %[temp2], $ac2 \n\t"
258	"mfhi %[temp3], $ac3 \n\t"
259	"mult $ac0, %[temp8], %[temp9] \n\t"
260	"maddu $ac0, %[temp0], %[temp7] \n\t"
261	"mult $ac1, %[temp8], %[temp9] \n\t"
262	"maddu $ac1, %[temp1], %[temp7] \n\t"
263	"mult $ac2, %[temp8], %[temp9] \n\t"
264	"maddu $ac2, %[temp2], %[temp7] \n\t"
265	"mult $ac3, %[temp8], %[temp9] \n\t"
266	"maddu $ac3, %[temp3], %[temp7] \n\t"
267	"mfhi %[temp0], $ac0 \n\t"
268	"mfhi %[temp1], $ac1 \n\t"
269	"mfhi %[temp2], $ac2 \n\t"
270	"mfhi %[temp3], $ac3 \n\t"
271	"sb %[temp0], -4(%[dst]) \n\t"
272	"sb %[temp1], -3(%[dst]) \n\t"
273	"sb %[temp2], -2(%[dst]) \n\t"
274	"sb %[temp3], -1(%[dst]) \n\t"
275	"bne %[frow], %[loop_end], 1b \n\t"
276	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
277	[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
278	[irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
279	[temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
280	[temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
281	: [temp7]"r"(temp7), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B)
282	: "memory", "hi", "lo", "$ac1hi", "$ac1lo",
283	"$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
284	);
285	}
286	for (i = `0`; i < (x_out_max & `0x3`); ++i) {
287	const uint64_t I = (uint64_t)A * *frow++
288	+ (uint64_t)B * *irow++;
289	const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
290	const int v = (int)MULT_FIX(J, wrk->fy_scale);
291	*dst++ = (v > `255`) ? `255u` : (uint8_t)v;
292	}
293	}
294	}
295
296	#undef MULT_FIX_FLOOR
297	#undef MULT_FIX
298	#undef ROUNDER
299
300	//------------------------------------------------------------------------------
301	// Entry point
302
303	extern void WebPRescalerDspInitMIPSdspR2(void);
304
305	WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPSdspR2(void) {
306	WebPRescalerExportRowExpand = ExportRowExpand_MIPSdspR2;
307	// WebPRescalerExportRowShrink = ExportRowShrink_MIPSdspR2;
308	}
309
310	#else // !WEBP_USE_MIPS_DSP_R2
311
312	WEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPSdspR2)
313
314	#endif // WEBP_USE_MIPS_DSP_R2
315

Browse the source code of Skia/third_party/externals/libwebp/src/dsp/rescaler_mips_dsp_r2.c