1// Copyright 2014 Google Inc. All Rights Reserved.
2//
3// Use of this source code is governed by a BSD-style license
4// that can be found in the COPYING file in the root of the source
5// tree. An additional intellectual property rights grant can be found
6// in the file PATENTS. All contributing project authors may
7// be found in the AUTHORS file in the root of the source tree.
8// -----------------------------------------------------------------------------
9//
10// MIPS version of rescaling functions
11//
12// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
13
14#include "./dsp.h"
15
16#if defined(WEBP_USE_MIPS_DSP_R2)
17
18#include <assert.h>
19#include "../utils/rescaler_utils.h"
20
21#define ROUNDER (WEBP_RESCALER_ONE >> 1)
22#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
23
24//------------------------------------------------------------------------------
25// Row export
26
27static void ExportRowShrink(WebPRescaler* const wrk) {
28 int i;
29 const int x_out_max = wrk->dst_width * wrk->num_channels;
30 uint8_t* dst = wrk->dst;
31 rescaler_t* irow = wrk->irow;
32 const rescaler_t* frow = wrk->frow;
33 const int yscale = wrk->fy_scale * (-wrk->y_accum);
34 int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
35 const int temp7 = (int)wrk->fxy_scale;
36 const int temp6 = (x_out_max & ~0x3) << 2;
37 assert(!WebPRescalerOutputDone(wrk));
38 assert(wrk->y_accum <= 0);
39 assert(!wrk->y_expand);
40 assert(wrk->fxy_scale != 0);
41 if (yscale) {
42 if (x_out_max >= 4) {
43 int temp8, temp9, temp10, temp11;
44 __asm__ volatile (
45 "li %[temp3], 0x10000 \n\t"
46 "li %[temp4], 0x8000 \n\t"
47 "addu %[loop_end], %[frow], %[temp6] \n\t"
48 "1: \n\t"
49 "lw %[temp0], 0(%[frow]) \n\t"
50 "lw %[temp1], 4(%[frow]) \n\t"
51 "lw %[temp2], 8(%[frow]) \n\t"
52 "lw %[temp5], 12(%[frow]) \n\t"
53 "mult $ac0, %[temp3], %[temp4] \n\t"
54 "maddu $ac0, %[temp0], %[yscale] \n\t"
55 "mult $ac1, %[temp3], %[temp4] \n\t"
56 "maddu $ac1, %[temp1], %[yscale] \n\t"
57 "mult $ac2, %[temp3], %[temp4] \n\t"
58 "maddu $ac2, %[temp2], %[yscale] \n\t"
59 "mult $ac3, %[temp3], %[temp4] \n\t"
60 "maddu $ac3, %[temp5], %[yscale] \n\t"
61 "addiu %[frow], %[frow], 16 \n\t"
62 "mfhi %[temp0], $ac0 \n\t"
63 "mfhi %[temp1], $ac1 \n\t"
64 "mfhi %[temp2], $ac2 \n\t"
65 "mfhi %[temp5], $ac3 \n\t"
66 "lw %[temp8], 0(%[irow]) \n\t"
67 "lw %[temp9], 4(%[irow]) \n\t"
68 "lw %[temp10], 8(%[irow]) \n\t"
69 "lw %[temp11], 12(%[irow]) \n\t"
70 "addiu %[dst], %[dst], 4 \n\t"
71 "addiu %[irow], %[irow], 16 \n\t"
72 "subu %[temp8], %[temp8], %[temp0] \n\t"
73 "subu %[temp9], %[temp9], %[temp1] \n\t"
74 "subu %[temp10], %[temp10], %[temp2] \n\t"
75 "subu %[temp11], %[temp11], %[temp5] \n\t"
76 "mult $ac0, %[temp3], %[temp4] \n\t"
77 "maddu $ac0, %[temp8], %[temp7] \n\t"
78 "mult $ac1, %[temp3], %[temp4] \n\t"
79 "maddu $ac1, %[temp9], %[temp7] \n\t"
80 "mult $ac2, %[temp3], %[temp4] \n\t"
81 "maddu $ac2, %[temp10], %[temp7] \n\t"
82 "mult $ac3, %[temp3], %[temp4] \n\t"
83 "maddu $ac3, %[temp11], %[temp7] \n\t"
84 "mfhi %[temp8], $ac0 \n\t"
85 "mfhi %[temp9], $ac1 \n\t"
86 "mfhi %[temp10], $ac2 \n\t"
87 "mfhi %[temp11], $ac3 \n\t"
88 "sw %[temp0], -16(%[irow]) \n\t"
89 "sw %[temp1], -12(%[irow]) \n\t"
90 "sw %[temp2], -8(%[irow]) \n\t"
91 "sw %[temp5], -4(%[irow]) \n\t"
92 "sb %[temp8], -4(%[dst]) \n\t"
93 "sb %[temp9], -3(%[dst]) \n\t"
94 "sb %[temp10], -2(%[dst]) \n\t"
95 "sb %[temp11], -1(%[dst]) \n\t"
96 "bne %[frow], %[loop_end], 1b \n\t"
97 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
98 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
99 [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
100 [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
101 [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
102 : [temp7]"r"(temp7), [yscale]"r"(yscale), [temp6]"r"(temp6)
103 : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
104 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
105 );
106 }
107 for (i = 0; i < (x_out_max & 0x3); ++i) {
108 const uint32_t frac = (uint32_t)MULT_FIX(*frow++, yscale);
109 const int v = (int)MULT_FIX(*irow - frac, wrk->fxy_scale);
110 assert(v >= 0 && v <= 255);
111 *dst++ = v;
112 *irow++ = frac; // new fractional start
113 }
114 } else {
115 if (x_out_max >= 4) {
116 __asm__ volatile (
117 "li %[temp3], 0x10000 \n\t"
118 "li %[temp4], 0x8000 \n\t"
119 "addu %[loop_end], %[irow], %[temp6] \n\t"
120 "1: \n\t"
121 "lw %[temp0], 0(%[irow]) \n\t"
122 "lw %[temp1], 4(%[irow]) \n\t"
123 "lw %[temp2], 8(%[irow]) \n\t"
124 "lw %[temp5], 12(%[irow]) \n\t"
125 "addiu %[dst], %[dst], 4 \n\t"
126 "addiu %[irow], %[irow], 16 \n\t"
127 "mult $ac0, %[temp3], %[temp4] \n\t"
128 "maddu $ac0, %[temp0], %[temp7] \n\t"
129 "mult $ac1, %[temp3], %[temp4] \n\t"
130 "maddu $ac1, %[temp1], %[temp7] \n\t"
131 "mult $ac2, %[temp3], %[temp4] \n\t"
132 "maddu $ac2, %[temp2], %[temp7] \n\t"
133 "mult $ac3, %[temp3], %[temp4] \n\t"
134 "maddu $ac3, %[temp5], %[temp7] \n\t"
135 "mfhi %[temp0], $ac0 \n\t"
136 "mfhi %[temp1], $ac1 \n\t"
137 "mfhi %[temp2], $ac2 \n\t"
138 "mfhi %[temp5], $ac3 \n\t"
139 "sw $zero, -16(%[irow]) \n\t"
140 "sw $zero, -12(%[irow]) \n\t"
141 "sw $zero, -8(%[irow]) \n\t"
142 "sw $zero, -4(%[irow]) \n\t"
143 "sb %[temp0], -4(%[dst]) \n\t"
144 "sb %[temp1], -3(%[dst]) \n\t"
145 "sb %[temp2], -2(%[dst]) \n\t"
146 "sb %[temp5], -1(%[dst]) \n\t"
147 "bne %[irow], %[loop_end], 1b \n\t"
148 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
149 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow),
150 [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
151 : [temp7]"r"(temp7), [temp6]"r"(temp6)
152 : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
153 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
154 );
155 }
156 for (i = 0; i < (x_out_max & 0x3); ++i) {
157 const int v = (int)MULT_FIX(*irow, wrk->fxy_scale);
158 assert(v >= 0 && v <= 255);
159 *dst++ = v;
160 *irow++ = 0;
161 }
162 }
163}
164
165static void ExportRowExpand(WebPRescaler* const wrk) {
166 int i;
167 uint8_t* dst = wrk->dst;
168 rescaler_t* irow = wrk->irow;
169 const int x_out_max = wrk->dst_width * wrk->num_channels;
170 const rescaler_t* frow = wrk->frow;
171 int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
172 const int temp6 = (x_out_max & ~0x3) << 2;
173 const int temp7 = (int)wrk->fy_scale;
174 assert(!WebPRescalerOutputDone(wrk));
175 assert(wrk->y_accum <= 0);
176 assert(wrk->y_expand);
177 assert(wrk->y_sub != 0);
178 if (wrk->y_accum == 0) {
179 if (x_out_max >= 4) {
180 __asm__ volatile (
181 "li %[temp4], 0x10000 \n\t"
182 "li %[temp5], 0x8000 \n\t"
183 "addu %[loop_end], %[frow], %[temp6] \n\t"
184 "1: \n\t"
185 "lw %[temp0], 0(%[frow]) \n\t"
186 "lw %[temp1], 4(%[frow]) \n\t"
187 "lw %[temp2], 8(%[frow]) \n\t"
188 "lw %[temp3], 12(%[frow]) \n\t"
189 "addiu %[dst], %[dst], 4 \n\t"
190 "addiu %[frow], %[frow], 16 \n\t"
191 "mult $ac0, %[temp4], %[temp5] \n\t"
192 "maddu $ac0, %[temp0], %[temp7] \n\t"
193 "mult $ac1, %[temp4], %[temp5] \n\t"
194 "maddu $ac1, %[temp1], %[temp7] \n\t"
195 "mult $ac2, %[temp4], %[temp5] \n\t"
196 "maddu $ac2, %[temp2], %[temp7] \n\t"
197 "mult $ac3, %[temp4], %[temp5] \n\t"
198 "maddu $ac3, %[temp3], %[temp7] \n\t"
199 "mfhi %[temp0], $ac0 \n\t"
200 "mfhi %[temp1], $ac1 \n\t"
201 "mfhi %[temp2], $ac2 \n\t"
202 "mfhi %[temp3], $ac3 \n\t"
203 "sb %[temp0], -4(%[dst]) \n\t"
204 "sb %[temp1], -3(%[dst]) \n\t"
205 "sb %[temp2], -2(%[dst]) \n\t"
206 "sb %[temp3], -1(%[dst]) \n\t"
207 "bne %[frow], %[loop_end], 1b \n\t"
208 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
209 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
210 [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
211 : [temp7]"r"(temp7), [temp6]"r"(temp6)
212 : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
213 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
214 );
215 }
216 for (i = 0; i < (x_out_max & 0x3); ++i) {
217 const uint32_t J = *frow++;
218 const int v = (int)MULT_FIX(J, wrk->fy_scale);
219 assert(v >= 0 && v <= 255);
220 *dst++ = v;
221 }
222 } else {
223 const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
224 const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
225 if (x_out_max >= 4) {
226 int temp8, temp9, temp10, temp11;
227 __asm__ volatile (
228 "li %[temp8], 0x10000 \n\t"
229 "li %[temp9], 0x8000 \n\t"
230 "addu %[loop_end], %[frow], %[temp6] \n\t"
231 "1: \n\t"
232 "lw %[temp0], 0(%[frow]) \n\t"
233 "lw %[temp1], 4(%[frow]) \n\t"
234 "lw %[temp2], 8(%[frow]) \n\t"
235 "lw %[temp3], 12(%[frow]) \n\t"
236 "lw %[temp4], 0(%[irow]) \n\t"
237 "lw %[temp5], 4(%[irow]) \n\t"
238 "lw %[temp10], 8(%[irow]) \n\t"
239 "lw %[temp11], 12(%[irow]) \n\t"
240 "addiu %[dst], %[dst], 4 \n\t"
241 "mult $ac0, %[temp8], %[temp9] \n\t"
242 "maddu $ac0, %[A], %[temp0] \n\t"
243 "maddu $ac0, %[B], %[temp4] \n\t"
244 "mult $ac1, %[temp8], %[temp9] \n\t"
245 "maddu $ac1, %[A], %[temp1] \n\t"
246 "maddu $ac1, %[B], %[temp5] \n\t"
247 "mult $ac2, %[temp8], %[temp9] \n\t"
248 "maddu $ac2, %[A], %[temp2] \n\t"
249 "maddu $ac2, %[B], %[temp10] \n\t"
250 "mult $ac3, %[temp8], %[temp9] \n\t"
251 "maddu $ac3, %[A], %[temp3] \n\t"
252 "maddu $ac3, %[B], %[temp11] \n\t"
253 "addiu %[frow], %[frow], 16 \n\t"
254 "addiu %[irow], %[irow], 16 \n\t"
255 "mfhi %[temp0], $ac0 \n\t"
256 "mfhi %[temp1], $ac1 \n\t"
257 "mfhi %[temp2], $ac2 \n\t"
258 "mfhi %[temp3], $ac3 \n\t"
259 "mult $ac0, %[temp8], %[temp9] \n\t"
260 "maddu $ac0, %[temp0], %[temp7] \n\t"
261 "mult $ac1, %[temp8], %[temp9] \n\t"
262 "maddu $ac1, %[temp1], %[temp7] \n\t"
263 "mult $ac2, %[temp8], %[temp9] \n\t"
264 "maddu $ac2, %[temp2], %[temp7] \n\t"
265 "mult $ac3, %[temp8], %[temp9] \n\t"
266 "maddu $ac3, %[temp3], %[temp7] \n\t"
267 "mfhi %[temp0], $ac0 \n\t"
268 "mfhi %[temp1], $ac1 \n\t"
269 "mfhi %[temp2], $ac2 \n\t"
270 "mfhi %[temp3], $ac3 \n\t"
271 "sb %[temp0], -4(%[dst]) \n\t"
272 "sb %[temp1], -3(%[dst]) \n\t"
273 "sb %[temp2], -2(%[dst]) \n\t"
274 "sb %[temp3], -1(%[dst]) \n\t"
275 "bne %[frow], %[loop_end], 1b \n\t"
276 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
277 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
278 [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
279 [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
280 [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
281 : [temp7]"r"(temp7), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B)
282 : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
283 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
284 );
285 }
286 for (i = 0; i < (x_out_max & 0x3); ++i) {
287 const uint64_t I = (uint64_t)A * *frow++
288 + (uint64_t)B * *irow++;
289 const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
290 const int v = (int)MULT_FIX(J, wrk->fy_scale);
291 assert(v >= 0 && v <= 255);
292 *dst++ = v;
293 }
294 }
295}
296
297#undef MULT_FIX
298#undef ROUNDER
299
300//------------------------------------------------------------------------------
301// Entry point
302
303extern void WebPRescalerDspInitMIPSdspR2(void);
304
305WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPSdspR2(void) {
306 WebPRescalerExportRowExpand = ExportRowExpand;
307 WebPRescalerExportRowShrink = ExportRowShrink;
308}
309
310#else // !WEBP_USE_MIPS_DSP_R2
311
312WEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPSdspR2)
313
314#endif // WEBP_USE_MIPS_DSP_R2
315