1// Copyright 2014 Google Inc. All Rights Reserved.
2//
3// Use of this source code is governed by a BSD-style license
4// that can be found in the COPYING file in the root of the source
5// tree. An additional intellectual property rights grant can be found
6// in the file PATENTS. All contributing project authors may
7// be found in the AUTHORS file in the root of the source tree.
8// -----------------------------------------------------------------------------
9//
10// MIPS version of rescaling functions
11//
12// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
13
14#include "src/dsp/dsp.h"
15
16#if defined(WEBP_USE_MIPS_DSP_R2) && !defined(WEBP_REDUCE_SIZE)
17
18#include <assert.h>
19#include "src/utils/rescaler_utils.h"
20
21#define ROUNDER (WEBP_RESCALER_ONE >> 1)
22#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
23#define MULT_FIX_FLOOR(x, y) (((uint64_t)(x) * (y)) >> WEBP_RESCALER_RFIX)
24
25//------------------------------------------------------------------------------
26// Row export
27
28#if 0 // disabled for now. TODO(skal): make match the C-code
29static void ExportRowShrink_MIPSdspR2(WebPRescaler* const wrk) {
30 int i;
31 const int x_out_max = wrk->dst_width * wrk->num_channels;
32 uint8_t* dst = wrk->dst;
33 rescaler_t* irow = wrk->irow;
34 const rescaler_t* frow = wrk->frow;
35 const int yscale = wrk->fy_scale * (-wrk->y_accum);
36 int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
37 const int temp7 = (int)wrk->fxy_scale;
38 const int temp6 = (x_out_max & ~0x3) << 2;
39 assert(!WebPRescalerOutputDone(wrk));
40 assert(wrk->y_accum <= 0);
41 assert(!wrk->y_expand);
42 assert(wrk->fxy_scale != 0);
43 if (yscale) {
44 if (x_out_max >= 4) {
45 int temp8, temp9, temp10, temp11;
46 __asm__ volatile (
47 "li %[temp3], 0x10000 \n\t"
48 "li %[temp4], 0x8000 \n\t"
49 "addu %[loop_end], %[frow], %[temp6] \n\t"
50 "1: \n\t"
51 "lw %[temp0], 0(%[frow]) \n\t"
52 "lw %[temp1], 4(%[frow]) \n\t"
53 "lw %[temp2], 8(%[frow]) \n\t"
54 "lw %[temp5], 12(%[frow]) \n\t"
55 "mult $ac0, %[temp3], %[temp4] \n\t"
56 "maddu $ac0, %[temp0], %[yscale] \n\t"
57 "mult $ac1, %[temp3], %[temp4] \n\t"
58 "maddu $ac1, %[temp1], %[yscale] \n\t"
59 "mult $ac2, %[temp3], %[temp4] \n\t"
60 "maddu $ac2, %[temp2], %[yscale] \n\t"
61 "mult $ac3, %[temp3], %[temp4] \n\t"
62 "maddu $ac3, %[temp5], %[yscale] \n\t"
63 "addiu %[frow], %[frow], 16 \n\t"
64 "mfhi %[temp0], $ac0 \n\t"
65 "mfhi %[temp1], $ac1 \n\t"
66 "mfhi %[temp2], $ac2 \n\t"
67 "mfhi %[temp5], $ac3 \n\t"
68 "lw %[temp8], 0(%[irow]) \n\t"
69 "lw %[temp9], 4(%[irow]) \n\t"
70 "lw %[temp10], 8(%[irow]) \n\t"
71 "lw %[temp11], 12(%[irow]) \n\t"
72 "addiu %[dst], %[dst], 4 \n\t"
73 "addiu %[irow], %[irow], 16 \n\t"
74 "subu %[temp8], %[temp8], %[temp0] \n\t"
75 "subu %[temp9], %[temp9], %[temp1] \n\t"
76 "subu %[temp10], %[temp10], %[temp2] \n\t"
77 "subu %[temp11], %[temp11], %[temp5] \n\t"
78 "mult $ac0, %[temp3], %[temp4] \n\t"
79 "maddu $ac0, %[temp8], %[temp7] \n\t"
80 "mult $ac1, %[temp3], %[temp4] \n\t"
81 "maddu $ac1, %[temp9], %[temp7] \n\t"
82 "mult $ac2, %[temp3], %[temp4] \n\t"
83 "maddu $ac2, %[temp10], %[temp7] \n\t"
84 "mult $ac3, %[temp3], %[temp4] \n\t"
85 "maddu $ac3, %[temp11], %[temp7] \n\t"
86 "mfhi %[temp8], $ac0 \n\t"
87 "mfhi %[temp9], $ac1 \n\t"
88 "mfhi %[temp10], $ac2 \n\t"
89 "mfhi %[temp11], $ac3 \n\t"
90 "sw %[temp0], -16(%[irow]) \n\t"
91 "sw %[temp1], -12(%[irow]) \n\t"
92 "sw %[temp2], -8(%[irow]) \n\t"
93 "sw %[temp5], -4(%[irow]) \n\t"
94 "sb %[temp8], -4(%[dst]) \n\t"
95 "sb %[temp9], -3(%[dst]) \n\t"
96 "sb %[temp10], -2(%[dst]) \n\t"
97 "sb %[temp11], -1(%[dst]) \n\t"
98 "bne %[frow], %[loop_end], 1b \n\t"
99 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
100 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
101 [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
102 [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
103 [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
104 : [temp7]"r"(temp7), [yscale]"r"(yscale), [temp6]"r"(temp6)
105 : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
106 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
107 );
108 }
109 for (i = 0; i < (x_out_max & 0x3); ++i) {
110 const uint32_t frac = (uint32_t)MULT_FIX_FLOOR(*frow++, yscale);
111 const int v = (int)MULT_FIX(*irow - frac, wrk->fxy_scale);
112 *dst++ = (v > 255) ? 255u : (uint8_t)v;
113 *irow++ = frac; // new fractional start
114 }
115 } else {
116 if (x_out_max >= 4) {
117 __asm__ volatile (
118 "li %[temp3], 0x10000 \n\t"
119 "li %[temp4], 0x8000 \n\t"
120 "addu %[loop_end], %[irow], %[temp6] \n\t"
121 "1: \n\t"
122 "lw %[temp0], 0(%[irow]) \n\t"
123 "lw %[temp1], 4(%[irow]) \n\t"
124 "lw %[temp2], 8(%[irow]) \n\t"
125 "lw %[temp5], 12(%[irow]) \n\t"
126 "addiu %[dst], %[dst], 4 \n\t"
127 "addiu %[irow], %[irow], 16 \n\t"
128 "mult $ac0, %[temp3], %[temp4] \n\t"
129 "maddu $ac0, %[temp0], %[temp7] \n\t"
130 "mult $ac1, %[temp3], %[temp4] \n\t"
131 "maddu $ac1, %[temp1], %[temp7] \n\t"
132 "mult $ac2, %[temp3], %[temp4] \n\t"
133 "maddu $ac2, %[temp2], %[temp7] \n\t"
134 "mult $ac3, %[temp3], %[temp4] \n\t"
135 "maddu $ac3, %[temp5], %[temp7] \n\t"
136 "mfhi %[temp0], $ac0 \n\t"
137 "mfhi %[temp1], $ac1 \n\t"
138 "mfhi %[temp2], $ac2 \n\t"
139 "mfhi %[temp5], $ac3 \n\t"
140 "sw $zero, -16(%[irow]) \n\t"
141 "sw $zero, -12(%[irow]) \n\t"
142 "sw $zero, -8(%[irow]) \n\t"
143 "sw $zero, -4(%[irow]) \n\t"
144 "sb %[temp0], -4(%[dst]) \n\t"
145 "sb %[temp1], -3(%[dst]) \n\t"
146 "sb %[temp2], -2(%[dst]) \n\t"
147 "sb %[temp5], -1(%[dst]) \n\t"
148 "bne %[irow], %[loop_end], 1b \n\t"
149 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
150 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow),
151 [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
152 : [temp7]"r"(temp7), [temp6]"r"(temp6)
153 : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
154 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
155 );
156 }
157 for (i = 0; i < (x_out_max & 0x3); ++i) {
158 const int v = (int)MULT_FIX_FLOOR(*irow, wrk->fxy_scale);
159 *dst++ = (v > 255) ? 255u : (uint8_t)v;
160 *irow++ = 0;
161 }
162 }
163}
164#endif // 0
165
166static void ExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) {
167 int i;
168 uint8_t* dst = wrk->dst;
169 rescaler_t* irow = wrk->irow;
170 const int x_out_max = wrk->dst_width * wrk->num_channels;
171 const rescaler_t* frow = wrk->frow;
172 int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
173 const int temp6 = (x_out_max & ~0x3) << 2;
174 const int temp7 = (int)wrk->fy_scale;
175 assert(!WebPRescalerOutputDone(wrk));
176 assert(wrk->y_accum <= 0);
177 assert(wrk->y_expand);
178 assert(wrk->y_sub != 0);
179 if (wrk->y_accum == 0) {
180 if (x_out_max >= 4) {
181 __asm__ volatile (
182 "li %[temp4], 0x10000 \n\t"
183 "li %[temp5], 0x8000 \n\t"
184 "addu %[loop_end], %[frow], %[temp6] \n\t"
185 "1: \n\t"
186 "lw %[temp0], 0(%[frow]) \n\t"
187 "lw %[temp1], 4(%[frow]) \n\t"
188 "lw %[temp2], 8(%[frow]) \n\t"
189 "lw %[temp3], 12(%[frow]) \n\t"
190 "addiu %[dst], %[dst], 4 \n\t"
191 "addiu %[frow], %[frow], 16 \n\t"
192 "mult $ac0, %[temp4], %[temp5] \n\t"
193 "maddu $ac0, %[temp0], %[temp7] \n\t"
194 "mult $ac1, %[temp4], %[temp5] \n\t"
195 "maddu $ac1, %[temp1], %[temp7] \n\t"
196 "mult $ac2, %[temp4], %[temp5] \n\t"
197 "maddu $ac2, %[temp2], %[temp7] \n\t"
198 "mult $ac3, %[temp4], %[temp5] \n\t"
199 "maddu $ac3, %[temp3], %[temp7] \n\t"
200 "mfhi %[temp0], $ac0 \n\t"
201 "mfhi %[temp1], $ac1 \n\t"
202 "mfhi %[temp2], $ac2 \n\t"
203 "mfhi %[temp3], $ac3 \n\t"
204 "sb %[temp0], -4(%[dst]) \n\t"
205 "sb %[temp1], -3(%[dst]) \n\t"
206 "sb %[temp2], -2(%[dst]) \n\t"
207 "sb %[temp3], -1(%[dst]) \n\t"
208 "bne %[frow], %[loop_end], 1b \n\t"
209 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
210 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
211 [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
212 : [temp7]"r"(temp7), [temp6]"r"(temp6)
213 : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
214 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
215 );
216 }
217 for (i = 0; i < (x_out_max & 0x3); ++i) {
218 const uint32_t J = *frow++;
219 const int v = (int)MULT_FIX(J, wrk->fy_scale);
220 *dst++ = (v > 255) ? 255u : (uint8_t)v;
221 }
222 } else {
223 const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
224 const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
225 if (x_out_max >= 4) {
226 int temp8, temp9, temp10, temp11;
227 __asm__ volatile (
228 "li %[temp8], 0x10000 \n\t"
229 "li %[temp9], 0x8000 \n\t"
230 "addu %[loop_end], %[frow], %[temp6] \n\t"
231 "1: \n\t"
232 "lw %[temp0], 0(%[frow]) \n\t"
233 "lw %[temp1], 4(%[frow]) \n\t"
234 "lw %[temp2], 8(%[frow]) \n\t"
235 "lw %[temp3], 12(%[frow]) \n\t"
236 "lw %[temp4], 0(%[irow]) \n\t"
237 "lw %[temp5], 4(%[irow]) \n\t"
238 "lw %[temp10], 8(%[irow]) \n\t"
239 "lw %[temp11], 12(%[irow]) \n\t"
240 "addiu %[dst], %[dst], 4 \n\t"
241 "mult $ac0, %[temp8], %[temp9] \n\t"
242 "maddu $ac0, %[A], %[temp0] \n\t"
243 "maddu $ac0, %[B], %[temp4] \n\t"
244 "mult $ac1, %[temp8], %[temp9] \n\t"
245 "maddu $ac1, %[A], %[temp1] \n\t"
246 "maddu $ac1, %[B], %[temp5] \n\t"
247 "mult $ac2, %[temp8], %[temp9] \n\t"
248 "maddu $ac2, %[A], %[temp2] \n\t"
249 "maddu $ac2, %[B], %[temp10] \n\t"
250 "mult $ac3, %[temp8], %[temp9] \n\t"
251 "maddu $ac3, %[A], %[temp3] \n\t"
252 "maddu $ac3, %[B], %[temp11] \n\t"
253 "addiu %[frow], %[frow], 16 \n\t"
254 "addiu %[irow], %[irow], 16 \n\t"
255 "mfhi %[temp0], $ac0 \n\t"
256 "mfhi %[temp1], $ac1 \n\t"
257 "mfhi %[temp2], $ac2 \n\t"
258 "mfhi %[temp3], $ac3 \n\t"
259 "mult $ac0, %[temp8], %[temp9] \n\t"
260 "maddu $ac0, %[temp0], %[temp7] \n\t"
261 "mult $ac1, %[temp8], %[temp9] \n\t"
262 "maddu $ac1, %[temp1], %[temp7] \n\t"
263 "mult $ac2, %[temp8], %[temp9] \n\t"
264 "maddu $ac2, %[temp2], %[temp7] \n\t"
265 "mult $ac3, %[temp8], %[temp9] \n\t"
266 "maddu $ac3, %[temp3], %[temp7] \n\t"
267 "mfhi %[temp0], $ac0 \n\t"
268 "mfhi %[temp1], $ac1 \n\t"
269 "mfhi %[temp2], $ac2 \n\t"
270 "mfhi %[temp3], $ac3 \n\t"
271 "sb %[temp0], -4(%[dst]) \n\t"
272 "sb %[temp1], -3(%[dst]) \n\t"
273 "sb %[temp2], -2(%[dst]) \n\t"
274 "sb %[temp3], -1(%[dst]) \n\t"
275 "bne %[frow], %[loop_end], 1b \n\t"
276 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
277 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
278 [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
279 [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
280 [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
281 : [temp7]"r"(temp7), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B)
282 : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
283 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
284 );
285 }
286 for (i = 0; i < (x_out_max & 0x3); ++i) {
287 const uint64_t I = (uint64_t)A * *frow++
288 + (uint64_t)B * *irow++;
289 const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
290 const int v = (int)MULT_FIX(J, wrk->fy_scale);
291 *dst++ = (v > 255) ? 255u : (uint8_t)v;
292 }
293 }
294}
295
296#undef MULT_FIX_FLOOR
297#undef MULT_FIX
298#undef ROUNDER
299
300//------------------------------------------------------------------------------
301// Entry point
302
303extern void WebPRescalerDspInitMIPSdspR2(void);
304
305WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPSdspR2(void) {
306 WebPRescalerExportRowExpand = ExportRowExpand_MIPSdspR2;
307// WebPRescalerExportRowShrink = ExportRowShrink_MIPSdspR2;
308}
309
310#else // !WEBP_USE_MIPS_DSP_R2
311
312WEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPSdspR2)
313
314#endif // WEBP_USE_MIPS_DSP_R2
315