1 | // Copyright 2014 Google Inc. All Rights Reserved. |
2 | // |
3 | // Use of this source code is governed by a BSD-style license |
4 | // that can be found in the COPYING file in the root of the source |
5 | // tree. An additional intellectual property rights grant can be found |
6 | // in the file PATENTS. All contributing project authors may |
7 | // be found in the AUTHORS file in the root of the source tree. |
8 | // ----------------------------------------------------------------------------- |
9 | // |
10 | // Image transforms and color space conversion methods for lossless decoder. |
11 | // |
12 | // Author(s): Djordje Pesut (djordje.pesut@imgtec.com) |
13 | // Jovan Zelincevic (jovan.zelincevic@imgtec.com) |
14 | |
15 | #include "src/dsp/dsp.h" |
16 | |
17 | #if defined(WEBP_USE_MIPS_DSP_R2) |
18 | |
19 | #include "src/dsp/lossless.h" |
20 | #include "src/dsp/lossless_common.h" |
21 | |
22 | #define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \ |
23 | static void FUNC_NAME(const TYPE* src, \ |
24 | const uint32_t* const color_map, \ |
25 | TYPE* dst, int y_start, int y_end, \ |
26 | int width) { \ |
27 | int y; \ |
28 | for (y = y_start; y < y_end; ++y) { \ |
29 | int x; \ |
30 | for (x = 0; x < (width >> 2); ++x) { \ |
31 | int tmp1, tmp2, tmp3, tmp4; \ |
32 | __asm__ volatile ( \ |
33 | ".ifc " #TYPE ", uint8_t \n\t" \ |
34 | "lbu %[tmp1], 0(%[src]) \n\t" \ |
35 | "lbu %[tmp2], 1(%[src]) \n\t" \ |
36 | "lbu %[tmp3], 2(%[src]) \n\t" \ |
37 | "lbu %[tmp4], 3(%[src]) \n\t" \ |
38 | "addiu %[src], %[src], 4 \n\t" \ |
39 | ".endif \n\t" \ |
40 | ".ifc " #TYPE ", uint32_t \n\t" \ |
41 | "lw %[tmp1], 0(%[src]) \n\t" \ |
42 | "lw %[tmp2], 4(%[src]) \n\t" \ |
43 | "lw %[tmp3], 8(%[src]) \n\t" \ |
44 | "lw %[tmp4], 12(%[src]) \n\t" \ |
45 | "ext %[tmp1], %[tmp1], 8, 8 \n\t" \ |
46 | "ext %[tmp2], %[tmp2], 8, 8 \n\t" \ |
47 | "ext %[tmp3], %[tmp3], 8, 8 \n\t" \ |
48 | "ext %[tmp4], %[tmp4], 8, 8 \n\t" \ |
49 | "addiu %[src], %[src], 16 \n\t" \ |
50 | ".endif \n\t" \ |
51 | "sll %[tmp1], %[tmp1], 2 \n\t" \ |
52 | "sll %[tmp2], %[tmp2], 2 \n\t" \ |
53 | "sll %[tmp3], %[tmp3], 2 \n\t" \ |
54 | "sll %[tmp4], %[tmp4], 2 \n\t" \ |
55 | "lwx %[tmp1], %[tmp1](%[color_map]) \n\t" \ |
56 | "lwx %[tmp2], %[tmp2](%[color_map]) \n\t" \ |
57 | "lwx %[tmp3], %[tmp3](%[color_map]) \n\t" \ |
58 | "lwx %[tmp4], %[tmp4](%[color_map]) \n\t" \ |
59 | ".ifc " #TYPE ", uint8_t \n\t" \ |
60 | "ext %[tmp1], %[tmp1], 8, 8 \n\t" \ |
61 | "ext %[tmp2], %[tmp2], 8, 8 \n\t" \ |
62 | "ext %[tmp3], %[tmp3], 8, 8 \n\t" \ |
63 | "ext %[tmp4], %[tmp4], 8, 8 \n\t" \ |
64 | "sb %[tmp1], 0(%[dst]) \n\t" \ |
65 | "sb %[tmp2], 1(%[dst]) \n\t" \ |
66 | "sb %[tmp3], 2(%[dst]) \n\t" \ |
67 | "sb %[tmp4], 3(%[dst]) \n\t" \ |
68 | "addiu %[dst], %[dst], 4 \n\t" \ |
69 | ".endif \n\t" \ |
70 | ".ifc " #TYPE ", uint32_t \n\t" \ |
71 | "sw %[tmp1], 0(%[dst]) \n\t" \ |
72 | "sw %[tmp2], 4(%[dst]) \n\t" \ |
73 | "sw %[tmp3], 8(%[dst]) \n\t" \ |
74 | "sw %[tmp4], 12(%[dst]) \n\t" \ |
75 | "addiu %[dst], %[dst], 16 \n\t" \ |
76 | ".endif \n\t" \ |
77 | : [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3), \ |
78 | [tmp4]"=&r"(tmp4), [src]"+&r"(src), [dst]"+r"(dst) \ |
79 | : [color_map]"r"(color_map) \ |
80 | : "memory" \ |
81 | ); \ |
82 | } \ |
83 | for (x = 0; x < (width & 3); ++x) { \ |
84 | *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \ |
85 | } \ |
86 | } \ |
87 | } |
88 | |
89 | MAP_COLOR_FUNCS(MapARGB_MIPSdspR2, uint32_t, VP8GetARGBIndex, VP8GetARGBValue) |
90 | MAP_COLOR_FUNCS(MapAlpha_MIPSdspR2, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue) |
91 | |
92 | #undef MAP_COLOR_FUNCS |
93 | |
94 | static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, |
95 | uint32_t c2) { |
96 | int temp0, temp1, temp2, temp3, temp4, temp5; |
97 | __asm__ volatile ( |
98 | "preceu.ph.qbr %[temp1], %[c0] \n\t" |
99 | "preceu.ph.qbl %[temp2], %[c0] \n\t" |
100 | "preceu.ph.qbr %[temp3], %[c1] \n\t" |
101 | "preceu.ph.qbl %[temp4], %[c1] \n\t" |
102 | "preceu.ph.qbr %[temp5], %[c2] \n\t" |
103 | "preceu.ph.qbl %[temp0], %[c2] \n\t" |
104 | "subq.ph %[temp3], %[temp3], %[temp5] \n\t" |
105 | "subq.ph %[temp4], %[temp4], %[temp0] \n\t" |
106 | "addq.ph %[temp1], %[temp1], %[temp3] \n\t" |
107 | "addq.ph %[temp2], %[temp2], %[temp4] \n\t" |
108 | "shll_s.ph %[temp1], %[temp1], 7 \n\t" |
109 | "shll_s.ph %[temp2], %[temp2], 7 \n\t" |
110 | "precrqu_s.qb.ph %[temp2], %[temp2], %[temp1] \n\t" |
111 | : [temp0]"=r" (temp0), [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), |
112 | [temp3]"=&r" (temp3), [temp4]"=&r" (temp4), [temp5]"=&r" (temp5) |
113 | : [c0]"r" (c0), [c1]"r" (c1), [c2]"r" (c2) |
114 | : "memory" |
115 | ); |
116 | return temp2; |
117 | } |
118 | |
119 | static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, |
120 | uint32_t c2) { |
121 | int temp0, temp1, temp2, temp3, temp4, temp5; |
122 | __asm__ volatile ( |
123 | "adduh.qb %[temp5], %[c0], %[c1] \n\t" |
124 | "preceu.ph.qbr %[temp3], %[c2] \n\t" |
125 | "preceu.ph.qbr %[temp1], %[temp5] \n\t" |
126 | "preceu.ph.qbl %[temp2], %[temp5] \n\t" |
127 | "preceu.ph.qbl %[temp4], %[c2] \n\t" |
128 | "subq.ph %[temp3], %[temp1], %[temp3] \n\t" |
129 | "subq.ph %[temp4], %[temp2], %[temp4] \n\t" |
130 | "shrl.ph %[temp5], %[temp3], 15 \n\t" |
131 | "shrl.ph %[temp0], %[temp4], 15 \n\t" |
132 | "addq.ph %[temp3], %[temp3], %[temp5] \n\t" |
133 | "addq.ph %[temp4], %[temp0], %[temp4] \n\t" |
134 | "shra.ph %[temp3], %[temp3], 1 \n\t" |
135 | "shra.ph %[temp4], %[temp4], 1 \n\t" |
136 | "addq.ph %[temp1], %[temp1], %[temp3] \n\t" |
137 | "addq.ph %[temp2], %[temp2], %[temp4] \n\t" |
138 | "shll_s.ph %[temp1], %[temp1], 7 \n\t" |
139 | "shll_s.ph %[temp2], %[temp2], 7 \n\t" |
140 | "precrqu_s.qb.ph %[temp1], %[temp2], %[temp1] \n\t" |
141 | : [temp0]"=r" (temp0), [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), |
142 | [temp3]"=&r" (temp3), [temp4]"=r" (temp4), [temp5]"=&r" (temp5) |
143 | : [c0]"r" (c0), [c1]"r" (c1), [c2]"r" (c2) |
144 | : "memory" |
145 | ); |
146 | return temp1; |
147 | } |
148 | |
149 | static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { |
150 | int temp0, temp1, temp2, temp3, temp4, temp5; |
151 | __asm__ volatile ( |
152 | "cmpgdu.lt.qb %[temp1], %[c], %[b] \n\t" |
153 | "pick.qb %[temp1], %[b], %[c] \n\t" |
154 | "pick.qb %[temp2], %[c], %[b] \n\t" |
155 | "cmpgdu.lt.qb %[temp4], %[c], %[a] \n\t" |
156 | "pick.qb %[temp4], %[a], %[c] \n\t" |
157 | "pick.qb %[temp5], %[c], %[a] \n\t" |
158 | "subu.qb %[temp3], %[temp1], %[temp2] \n\t" |
159 | "subu.qb %[temp0], %[temp4], %[temp5] \n\t" |
160 | "raddu.w.qb %[temp3], %[temp3] \n\t" |
161 | "raddu.w.qb %[temp0], %[temp0] \n\t" |
162 | "subu %[temp3], %[temp3], %[temp0] \n\t" |
163 | "slti %[temp0], %[temp3], 0x1 \n\t" |
164 | "movz %[a], %[b], %[temp0] \n\t" |
165 | : [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), [temp3]"=&r" (temp3), |
166 | [temp4]"=&r" (temp4), [temp5]"=&r" (temp5), [temp0]"=&r" (temp0), |
167 | [a]"+&r" (a) |
168 | : [b]"r" (b), [c]"r" (c) |
169 | ); |
170 | return a; |
171 | } |
172 | |
173 | static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) { |
174 | __asm__ volatile ( |
175 | "adduh.qb %[a0], %[a0], %[a1] \n\t" |
176 | : [a0]"+r" (a0) |
177 | : [a1]"r" (a1) |
178 | ); |
179 | return a0; |
180 | } |
181 | |
182 | static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) { |
183 | return Average2(Average2(a0, a2), a1); |
184 | } |
185 | |
186 | static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, |
187 | uint32_t a2, uint32_t a3) { |
188 | return Average2(Average2(a0, a1), Average2(a2, a3)); |
189 | } |
190 | |
191 | static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left, |
192 | const uint32_t* const top) { |
193 | return Average3(*left, top[0], top[1]); |
194 | } |
195 | |
196 | static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left, |
197 | const uint32_t* const top) { |
198 | return Average2(*left, top[-1]); |
199 | } |
200 | |
201 | static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left, |
202 | const uint32_t* const top) { |
203 | return Average2(*left, top[0]); |
204 | } |
205 | |
206 | static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left, |
207 | const uint32_t* const top) { |
208 | (void)left; |
209 | return Average2(top[-1], top[0]); |
210 | } |
211 | |
212 | static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left, |
213 | const uint32_t* const top) { |
214 | (void)left; |
215 | return Average2(top[0], top[1]); |
216 | } |
217 | |
218 | static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left, |
219 | const uint32_t* const top) { |
220 | return Average4(*left, top[-1], top[0], top[1]); |
221 | } |
222 | |
223 | static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left, |
224 | const uint32_t* const top) { |
225 | return Select(top[0], *left, top[-1]); |
226 | } |
227 | |
228 | static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left, |
229 | const uint32_t* const top) { |
230 | return ClampedAddSubtractFull(*left, top[0], top[-1]); |
231 | } |
232 | |
233 | static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left, |
234 | const uint32_t* const top) { |
235 | return ClampedAddSubtractHalf(*left, top[0], top[-1]); |
236 | } |
237 | |
238 | // Add green to blue and red channels (i.e. perform the inverse transform of |
239 | // 'subtract green'). |
240 | static void AddGreenToBlueAndRed_MIPSdspR2(const uint32_t* src, int num_pixels, |
241 | uint32_t* dst) { |
242 | uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; |
243 | const uint32_t* const p_loop1_end = src + (num_pixels & ~3); |
244 | const uint32_t* const p_loop2_end = src + num_pixels; |
245 | __asm__ volatile ( |
246 | ".set push \n\t" |
247 | ".set noreorder \n\t" |
248 | "beq %[src], %[p_loop1_end], 3f \n\t" |
249 | " nop \n\t" |
250 | "0: \n\t" |
251 | "lw %[temp0], 0(%[src]) \n\t" |
252 | "lw %[temp1], 4(%[src]) \n\t" |
253 | "lw %[temp2], 8(%[src]) \n\t" |
254 | "lw %[temp3], 12(%[src]) \n\t" |
255 | "ext %[temp4], %[temp0], 8, 8 \n\t" |
256 | "ext %[temp5], %[temp1], 8, 8 \n\t" |
257 | "ext %[temp6], %[temp2], 8, 8 \n\t" |
258 | "ext %[temp7], %[temp3], 8, 8 \n\t" |
259 | "addiu %[src], %[src], 16 \n\t" |
260 | "addiu %[dst], %[dst], 16 \n\t" |
261 | "replv.ph %[temp4], %[temp4] \n\t" |
262 | "replv.ph %[temp5], %[temp5] \n\t" |
263 | "replv.ph %[temp6], %[temp6] \n\t" |
264 | "replv.ph %[temp7], %[temp7] \n\t" |
265 | "addu.qb %[temp0], %[temp0], %[temp4] \n\t" |
266 | "addu.qb %[temp1], %[temp1], %[temp5] \n\t" |
267 | "addu.qb %[temp2], %[temp2], %[temp6] \n\t" |
268 | "addu.qb %[temp3], %[temp3], %[temp7] \n\t" |
269 | "sw %[temp0], -16(%[dst]) \n\t" |
270 | "sw %[temp1], -12(%[dst]) \n\t" |
271 | "sw %[temp2], -8(%[dst]) \n\t" |
272 | "bne %[src], %[p_loop1_end], 0b \n\t" |
273 | " sw %[temp3], -4(%[dst]) \n\t" |
274 | "3: \n\t" |
275 | "beq %[src], %[p_loop2_end], 2f \n\t" |
276 | " nop \n\t" |
277 | "1: \n\t" |
278 | "lw %[temp0], 0(%[src]) \n\t" |
279 | "addiu %[src], %[src], 4 \n\t" |
280 | "addiu %[dst], %[dst], 4 \n\t" |
281 | "ext %[temp4], %[temp0], 8, 8 \n\t" |
282 | "replv.ph %[temp4], %[temp4] \n\t" |
283 | "addu.qb %[temp0], %[temp0], %[temp4] \n\t" |
284 | "bne %[src], %[p_loop2_end], 1b \n\t" |
285 | " sw %[temp0], -4(%[dst]) \n\t" |
286 | "2: \n\t" |
287 | ".set pop \n\t" |
288 | : [dst]"+&r" (dst), [src]"+&r" (src), [temp0]"=&r" (temp0), |
289 | [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), [temp3]"=&r" (temp3), |
290 | [temp4]"=&r" (temp4), [temp5]"=&r" (temp5), [temp6]"=&r" (temp6), |
291 | [temp7]"=&r" (temp7) |
292 | : [p_loop1_end]"r" (p_loop1_end), [p_loop2_end]"r" (p_loop2_end) |
293 | : "memory" |
294 | ); |
295 | } |
296 | |
297 | static void TransformColorInverse_MIPSdspR2(const VP8LMultipliers* const m, |
298 | const uint32_t* src, int num_pixels, |
299 | uint32_t* dst) { |
300 | int temp0, temp1, temp2, temp3, temp4, temp5; |
301 | uint32_t argb, argb1, new_red; |
302 | const uint32_t G_to_R = m->green_to_red_; |
303 | const uint32_t G_to_B = m->green_to_blue_; |
304 | const uint32_t R_to_B = m->red_to_blue_; |
305 | const uint32_t* const p_loop_end = src + (num_pixels & ~1); |
306 | __asm__ volatile ( |
307 | ".set push \n\t" |
308 | ".set noreorder \n\t" |
309 | "beq %[src], %[p_loop_end], 1f \n\t" |
310 | " nop \n\t" |
311 | "replv.ph %[temp0], %[G_to_R] \n\t" |
312 | "replv.ph %[temp1], %[G_to_B] \n\t" |
313 | "replv.ph %[temp2], %[R_to_B] \n\t" |
314 | "shll.ph %[temp0], %[temp0], 8 \n\t" |
315 | "shll.ph %[temp1], %[temp1], 8 \n\t" |
316 | "shll.ph %[temp2], %[temp2], 8 \n\t" |
317 | "shra.ph %[temp0], %[temp0], 8 \n\t" |
318 | "shra.ph %[temp1], %[temp1], 8 \n\t" |
319 | "shra.ph %[temp2], %[temp2], 8 \n\t" |
320 | "0: \n\t" |
321 | "lw %[argb], 0(%[src]) \n\t" |
322 | "lw %[argb1], 4(%[src]) \n\t" |
323 | "sw %[argb], 0(%[dst]) \n\t" |
324 | "sw %[argb1], 4(%[dst]) \n\t" |
325 | "addiu %[src], %[src], 8 \n\t" |
326 | "addiu %[dst], %[dst], 8 \n\t" |
327 | "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t" |
328 | "preceu.ph.qbra %[temp3], %[temp3] \n\t" |
329 | "shll.ph %[temp3], %[temp3], 8 \n\t" |
330 | "shra.ph %[temp3], %[temp3], 8 \n\t" |
331 | "mul.ph %[temp5], %[temp3], %[temp0] \n\t" |
332 | "mul.ph %[temp3], %[temp3], %[temp1] \n\t" |
333 | "precrq.ph.w %[new_red], %[argb], %[argb1] \n\t" |
334 | "ins %[argb1], %[argb], 16, 16 \n\t" |
335 | "shra.ph %[temp5], %[temp5], 5 \n\t" |
336 | "shra.ph %[temp3], %[temp3], 5 \n\t" |
337 | "addu.ph %[new_red], %[new_red], %[temp5] \n\t" |
338 | "addu.ph %[argb1], %[argb1], %[temp3] \n\t" |
339 | "preceu.ph.qbra %[temp5], %[new_red] \n\t" |
340 | "shll.ph %[temp4], %[temp5], 8 \n\t" |
341 | "shra.ph %[temp4], %[temp4], 8 \n\t" |
342 | "mul.ph %[temp4], %[temp4], %[temp2] \n\t" |
343 | "sb %[temp5], -2(%[dst]) \n\t" |
344 | "sra %[temp5], %[temp5], 16 \n\t" |
345 | "shra.ph %[temp4], %[temp4], 5 \n\t" |
346 | "addu.ph %[argb1], %[argb1], %[temp4] \n\t" |
347 | "preceu.ph.qbra %[temp3], %[argb1] \n\t" |
348 | "sb %[temp5], -6(%[dst]) \n\t" |
349 | "sb %[temp3], -4(%[dst]) \n\t" |
350 | "sra %[temp3], %[temp3], 16 \n\t" |
351 | "bne %[src], %[p_loop_end], 0b \n\t" |
352 | " sb %[temp3], -8(%[dst]) \n\t" |
353 | "1: \n\t" |
354 | ".set pop \n\t" |
355 | : [temp0]"=&r" (temp0), [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), |
356 | [temp3]"=&r" (temp3), [temp4]"=&r" (temp4), [temp5]"=&r" (temp5), |
357 | [new_red]"=&r" (new_red), [argb]"=&r" (argb), |
358 | [argb1]"=&r" (argb1), [dst]"+&r" (dst), [src]"+&r" (src) |
359 | : [G_to_R]"r" (G_to_R), [R_to_B]"r" (R_to_B), |
360 | [G_to_B]"r" (G_to_B), [p_loop_end]"r" (p_loop_end) |
361 | : "memory" , "hi" , "lo" |
362 | ); |
363 | |
364 | // Fall-back to C-version for left-overs. |
365 | if (num_pixels & 1) VP8LTransformColorInverse_C(m, src, 1, dst); |
366 | } |
367 | |
368 | static void ConvertBGRAToRGB_MIPSdspR2(const uint32_t* src, |
369 | int num_pixels, uint8_t* dst) { |
370 | int temp0, temp1, temp2, temp3; |
371 | const uint32_t* const p_loop1_end = src + (num_pixels & ~3); |
372 | const uint32_t* const p_loop2_end = src + num_pixels; |
373 | __asm__ volatile ( |
374 | ".set push \n\t" |
375 | ".set noreorder \n\t" |
376 | "beq %[src], %[p_loop1_end], 3f \n\t" |
377 | " nop \n\t" |
378 | "0: \n\t" |
379 | "lw %[temp3], 12(%[src]) \n\t" |
380 | "lw %[temp2], 8(%[src]) \n\t" |
381 | "lw %[temp1], 4(%[src]) \n\t" |
382 | "lw %[temp0], 0(%[src]) \n\t" |
383 | "ins %[temp3], %[temp2], 24, 8 \n\t" |
384 | "sll %[temp2], %[temp2], 8 \n\t" |
385 | "rotr %[temp3], %[temp3], 16 \n\t" |
386 | "ins %[temp2], %[temp1], 0, 16 \n\t" |
387 | "sll %[temp1], %[temp1], 8 \n\t" |
388 | "wsbh %[temp3], %[temp3] \n\t" |
389 | "balign %[temp0], %[temp1], 1 \n\t" |
390 | "wsbh %[temp2], %[temp2] \n\t" |
391 | "wsbh %[temp0], %[temp0] \n\t" |
392 | "usw %[temp3], 8(%[dst]) \n\t" |
393 | "rotr %[temp0], %[temp0], 16 \n\t" |
394 | "usw %[temp2], 4(%[dst]) \n\t" |
395 | "addiu %[src], %[src], 16 \n\t" |
396 | "usw %[temp0], 0(%[dst]) \n\t" |
397 | "bne %[src], %[p_loop1_end], 0b \n\t" |
398 | " addiu %[dst], %[dst], 12 \n\t" |
399 | "3: \n\t" |
400 | "beq %[src], %[p_loop2_end], 2f \n\t" |
401 | " nop \n\t" |
402 | "1: \n\t" |
403 | "lw %[temp0], 0(%[src]) \n\t" |
404 | "addiu %[src], %[src], 4 \n\t" |
405 | "wsbh %[temp1], %[temp0] \n\t" |
406 | "addiu %[dst], %[dst], 3 \n\t" |
407 | "ush %[temp1], -2(%[dst]) \n\t" |
408 | "sra %[temp0], %[temp0], 16 \n\t" |
409 | "bne %[src], %[p_loop2_end], 1b \n\t" |
410 | " sb %[temp0], -3(%[dst]) \n\t" |
411 | "2: \n\t" |
412 | ".set pop \n\t" |
413 | : [temp0]"=&r" (temp0), [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), |
414 | [temp3]"=&r" (temp3), [dst]"+&r" (dst), [src]"+&r" (src) |
415 | : [p_loop1_end]"r" (p_loop1_end), [p_loop2_end]"r" (p_loop2_end) |
416 | : "memory" |
417 | ); |
418 | } |
419 | |
420 | static void ConvertBGRAToRGBA_MIPSdspR2(const uint32_t* src, |
421 | int num_pixels, uint8_t* dst) { |
422 | int temp0, temp1, temp2, temp3; |
423 | const uint32_t* const p_loop1_end = src + (num_pixels & ~3); |
424 | const uint32_t* const p_loop2_end = src + num_pixels; |
425 | __asm__ volatile ( |
426 | ".set push \n\t" |
427 | ".set noreorder \n\t" |
428 | "beq %[src], %[p_loop1_end], 3f \n\t" |
429 | " nop \n\t" |
430 | "0: \n\t" |
431 | "lw %[temp0], 0(%[src]) \n\t" |
432 | "lw %[temp1], 4(%[src]) \n\t" |
433 | "lw %[temp2], 8(%[src]) \n\t" |
434 | "lw %[temp3], 12(%[src]) \n\t" |
435 | "wsbh %[temp0], %[temp0] \n\t" |
436 | "wsbh %[temp1], %[temp1] \n\t" |
437 | "wsbh %[temp2], %[temp2] \n\t" |
438 | "wsbh %[temp3], %[temp3] \n\t" |
439 | "addiu %[src], %[src], 16 \n\t" |
440 | "balign %[temp0], %[temp0], 1 \n\t" |
441 | "balign %[temp1], %[temp1], 1 \n\t" |
442 | "balign %[temp2], %[temp2], 1 \n\t" |
443 | "balign %[temp3], %[temp3], 1 \n\t" |
444 | "usw %[temp0], 0(%[dst]) \n\t" |
445 | "usw %[temp1], 4(%[dst]) \n\t" |
446 | "usw %[temp2], 8(%[dst]) \n\t" |
447 | "usw %[temp3], 12(%[dst]) \n\t" |
448 | "bne %[src], %[p_loop1_end], 0b \n\t" |
449 | " addiu %[dst], %[dst], 16 \n\t" |
450 | "3: \n\t" |
451 | "beq %[src], %[p_loop2_end], 2f \n\t" |
452 | " nop \n\t" |
453 | "1: \n\t" |
454 | "lw %[temp0], 0(%[src]) \n\t" |
455 | "wsbh %[temp0], %[temp0] \n\t" |
456 | "addiu %[src], %[src], 4 \n\t" |
457 | "balign %[temp0], %[temp0], 1 \n\t" |
458 | "usw %[temp0], 0(%[dst]) \n\t" |
459 | "bne %[src], %[p_loop2_end], 1b \n\t" |
460 | " addiu %[dst], %[dst], 4 \n\t" |
461 | "2: \n\t" |
462 | ".set pop \n\t" |
463 | : [temp0]"=&r" (temp0), [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), |
464 | [temp3]"=&r" (temp3), [dst]"+&r" (dst), [src]"+&r" (src) |
465 | : [p_loop1_end]"r" (p_loop1_end), [p_loop2_end]"r" (p_loop2_end) |
466 | : "memory" |
467 | ); |
468 | } |
469 | |
470 | static void ConvertBGRAToRGBA4444_MIPSdspR2(const uint32_t* src, |
471 | int num_pixels, uint8_t* dst) { |
472 | int temp0, temp1, temp2, temp3, temp4, temp5; |
473 | const uint32_t* const p_loop1_end = src + (num_pixels & ~3); |
474 | const uint32_t* const p_loop2_end = src + num_pixels; |
475 | __asm__ volatile ( |
476 | ".set push \n\t" |
477 | ".set noreorder \n\t" |
478 | "beq %[src], %[p_loop1_end], 3f \n\t" |
479 | " nop \n\t" |
480 | "0: \n\t" |
481 | "lw %[temp0], 0(%[src]) \n\t" |
482 | "lw %[temp1], 4(%[src]) \n\t" |
483 | "lw %[temp2], 8(%[src]) \n\t" |
484 | "lw %[temp3], 12(%[src]) \n\t" |
485 | "ext %[temp4], %[temp0], 28, 4 \n\t" |
486 | "ext %[temp5], %[temp0], 12, 4 \n\t" |
487 | "ins %[temp0], %[temp4], 0, 4 \n\t" |
488 | "ext %[temp4], %[temp1], 28, 4 \n\t" |
489 | "ins %[temp0], %[temp5], 16, 4 \n\t" |
490 | "ext %[temp5], %[temp1], 12, 4 \n\t" |
491 | "ins %[temp1], %[temp4], 0, 4 \n\t" |
492 | "ext %[temp4], %[temp2], 28, 4 \n\t" |
493 | "ins %[temp1], %[temp5], 16, 4 \n\t" |
494 | "ext %[temp5], %[temp2], 12, 4 \n\t" |
495 | "ins %[temp2], %[temp4], 0, 4 \n\t" |
496 | "ext %[temp4], %[temp3], 28, 4 \n\t" |
497 | "ins %[temp2], %[temp5], 16, 4 \n\t" |
498 | "ext %[temp5], %[temp3], 12, 4 \n\t" |
499 | "ins %[temp3], %[temp4], 0, 4 \n\t" |
500 | "precr.qb.ph %[temp1], %[temp1], %[temp0] \n\t" |
501 | "ins %[temp3], %[temp5], 16, 4 \n\t" |
502 | "addiu %[src], %[src], 16 \n\t" |
503 | "precr.qb.ph %[temp3], %[temp3], %[temp2] \n\t" |
504 | #if (WEBP_SWAP_16BIT_CSP == 1) |
505 | "usw %[temp1], 0(%[dst]) \n\t" |
506 | "usw %[temp3], 4(%[dst]) \n\t" |
507 | #else |
508 | "wsbh %[temp1], %[temp1] \n\t" |
509 | "wsbh %[temp3], %[temp3] \n\t" |
510 | "usw %[temp1], 0(%[dst]) \n\t" |
511 | "usw %[temp3], 4(%[dst]) \n\t" |
512 | #endif |
513 | "bne %[src], %[p_loop1_end], 0b \n\t" |
514 | " addiu %[dst], %[dst], 8 \n\t" |
515 | "3: \n\t" |
516 | "beq %[src], %[p_loop2_end], 2f \n\t" |
517 | " nop \n\t" |
518 | "1: \n\t" |
519 | "lw %[temp0], 0(%[src]) \n\t" |
520 | "ext %[temp4], %[temp0], 28, 4 \n\t" |
521 | "ext %[temp5], %[temp0], 12, 4 \n\t" |
522 | "ins %[temp0], %[temp4], 0, 4 \n\t" |
523 | "ins %[temp0], %[temp5], 16, 4 \n\t" |
524 | "addiu %[src], %[src], 4 \n\t" |
525 | "precr.qb.ph %[temp0], %[temp0], %[temp0] \n\t" |
526 | #if (WEBP_SWAP_16BIT_CSP == 1) |
527 | "ush %[temp0], 0(%[dst]) \n\t" |
528 | #else |
529 | "wsbh %[temp0], %[temp0] \n\t" |
530 | "ush %[temp0], 0(%[dst]) \n\t" |
531 | #endif |
532 | "bne %[src], %[p_loop2_end], 1b \n\t" |
533 | " addiu %[dst], %[dst], 2 \n\t" |
534 | "2: \n\t" |
535 | ".set pop \n\t" |
536 | : [temp0]"=&r" (temp0), [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), |
537 | [temp3]"=&r" (temp3), [temp4]"=&r" (temp4), [temp5]"=&r" (temp5), |
538 | [dst]"+&r" (dst), [src]"+&r" (src) |
539 | : [p_loop1_end]"r" (p_loop1_end), [p_loop2_end]"r" (p_loop2_end) |
540 | : "memory" |
541 | ); |
542 | } |
543 | |
544 | static void ConvertBGRAToRGB565_MIPSdspR2(const uint32_t* src, |
545 | int num_pixels, uint8_t* dst) { |
546 | int temp0, temp1, temp2, temp3, temp4, temp5; |
547 | const uint32_t* const p_loop1_end = src + (num_pixels & ~3); |
548 | const uint32_t* const p_loop2_end = src + num_pixels; |
549 | __asm__ volatile ( |
550 | ".set push \n\t" |
551 | ".set noreorder \n\t" |
552 | "beq %[src], %[p_loop1_end], 3f \n\t" |
553 | " nop \n\t" |
554 | "0: \n\t" |
555 | "lw %[temp0], 0(%[src]) \n\t" |
556 | "lw %[temp1], 4(%[src]) \n\t" |
557 | "lw %[temp2], 8(%[src]) \n\t" |
558 | "lw %[temp3], 12(%[src]) \n\t" |
559 | "ext %[temp4], %[temp0], 8, 16 \n\t" |
560 | "ext %[temp5], %[temp0], 5, 11 \n\t" |
561 | "ext %[temp0], %[temp0], 3, 5 \n\t" |
562 | "ins %[temp4], %[temp5], 0, 11 \n\t" |
563 | "ext %[temp5], %[temp1], 5, 11 \n\t" |
564 | "ins %[temp4], %[temp0], 0, 5 \n\t" |
565 | "ext %[temp0], %[temp1], 8, 16 \n\t" |
566 | "ext %[temp1], %[temp1], 3, 5 \n\t" |
567 | "ins %[temp0], %[temp5], 0, 11 \n\t" |
568 | "ext %[temp5], %[temp2], 5, 11 \n\t" |
569 | "ins %[temp0], %[temp1], 0, 5 \n\t" |
570 | "ext %[temp1], %[temp2], 8, 16 \n\t" |
571 | "ext %[temp2], %[temp2], 3, 5 \n\t" |
572 | "ins %[temp1], %[temp5], 0, 11 \n\t" |
573 | "ext %[temp5], %[temp3], 5, 11 \n\t" |
574 | "ins %[temp1], %[temp2], 0, 5 \n\t" |
575 | "ext %[temp2], %[temp3], 8, 16 \n\t" |
576 | "ext %[temp3], %[temp3], 3, 5 \n\t" |
577 | "ins %[temp2], %[temp5], 0, 11 \n\t" |
578 | "append %[temp0], %[temp4], 16 \n\t" |
579 | "ins %[temp2], %[temp3], 0, 5 \n\t" |
580 | "addiu %[src], %[src], 16 \n\t" |
581 | "append %[temp2], %[temp1], 16 \n\t" |
582 | #if (WEBP_SWAP_16BIT_CSP == 1) |
583 | "usw %[temp0], 0(%[dst]) \n\t" |
584 | "usw %[temp2], 4(%[dst]) \n\t" |
585 | #else |
586 | "wsbh %[temp0], %[temp0] \n\t" |
587 | "wsbh %[temp2], %[temp2] \n\t" |
588 | "usw %[temp0], 0(%[dst]) \n\t" |
589 | "usw %[temp2], 4(%[dst]) \n\t" |
590 | #endif |
591 | "bne %[src], %[p_loop1_end], 0b \n\t" |
592 | " addiu %[dst], %[dst], 8 \n\t" |
593 | "3: \n\t" |
594 | "beq %[src], %[p_loop2_end], 2f \n\t" |
595 | " nop \n\t" |
596 | "1: \n\t" |
597 | "lw %[temp0], 0(%[src]) \n\t" |
598 | "ext %[temp4], %[temp0], 8, 16 \n\t" |
599 | "ext %[temp5], %[temp0], 5, 11 \n\t" |
600 | "ext %[temp0], %[temp0], 3, 5 \n\t" |
601 | "ins %[temp4], %[temp5], 0, 11 \n\t" |
602 | "addiu %[src], %[src], 4 \n\t" |
603 | "ins %[temp4], %[temp0], 0, 5 \n\t" |
604 | #if (WEBP_SWAP_16BIT_CSP == 1) |
605 | "ush %[temp4], 0(%[dst]) \n\t" |
606 | #else |
607 | "wsbh %[temp4], %[temp4] \n\t" |
608 | "ush %[temp4], 0(%[dst]) \n\t" |
609 | #endif |
610 | "bne %[src], %[p_loop2_end], 1b \n\t" |
611 | " addiu %[dst], %[dst], 2 \n\t" |
612 | "2: \n\t" |
613 | ".set pop \n\t" |
614 | : [temp0]"=&r" (temp0), [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), |
615 | [temp3]"=&r" (temp3), [temp4]"=&r" (temp4), [temp5]"=&r" (temp5), |
616 | [dst]"+&r" (dst), [src]"+&r" (src) |
617 | : [p_loop1_end]"r" (p_loop1_end), [p_loop2_end]"r" (p_loop2_end) |
618 | : "memory" |
619 | ); |
620 | } |
621 | |
622 | static void ConvertBGRAToBGR_MIPSdspR2(const uint32_t* src, |
623 | int num_pixels, uint8_t* dst) { |
624 | int temp0, temp1, temp2, temp3; |
625 | const uint32_t* const p_loop1_end = src + (num_pixels & ~3); |
626 | const uint32_t* const p_loop2_end = src + num_pixels; |
627 | __asm__ volatile ( |
628 | ".set push \n\t" |
629 | ".set noreorder \n\t" |
630 | "beq %[src], %[p_loop1_end], 3f \n\t" |
631 | " nop \n\t" |
632 | "0: \n\t" |
633 | "lw %[temp0], 0(%[src]) \n\t" |
634 | "lw %[temp1], 4(%[src]) \n\t" |
635 | "lw %[temp2], 8(%[src]) \n\t" |
636 | "lw %[temp3], 12(%[src]) \n\t" |
637 | "ins %[temp0], %[temp1], 24, 8 \n\t" |
638 | "sra %[temp1], %[temp1], 8 \n\t" |
639 | "ins %[temp1], %[temp2], 16, 16 \n\t" |
640 | "sll %[temp2], %[temp2], 8 \n\t" |
641 | "balign %[temp3], %[temp2], 1 \n\t" |
642 | "addiu %[src], %[src], 16 \n\t" |
643 | "usw %[temp0], 0(%[dst]) \n\t" |
644 | "usw %[temp1], 4(%[dst]) \n\t" |
645 | "usw %[temp3], 8(%[dst]) \n\t" |
646 | "bne %[src], %[p_loop1_end], 0b \n\t" |
647 | " addiu %[dst], %[dst], 12 \n\t" |
648 | "3: \n\t" |
649 | "beq %[src], %[p_loop2_end], 2f \n\t" |
650 | " nop \n\t" |
651 | "1: \n\t" |
652 | "lw %[temp0], 0(%[src]) \n\t" |
653 | "addiu %[src], %[src], 4 \n\t" |
654 | "addiu %[dst], %[dst], 3 \n\t" |
655 | "ush %[temp0], -3(%[dst]) \n\t" |
656 | "sra %[temp0], %[temp0], 16 \n\t" |
657 | "bne %[src], %[p_loop2_end], 1b \n\t" |
658 | " sb %[temp0], -1(%[dst]) \n\t" |
659 | "2: \n\t" |
660 | ".set pop \n\t" |
661 | : [temp0]"=&r" (temp0), [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), |
662 | [temp3]"=&r" (temp3), [dst]"+&r" (dst), [src]"+&r" (src) |
663 | : [p_loop1_end]"r" (p_loop1_end), [p_loop2_end]"r" (p_loop2_end) |
664 | : "memory" |
665 | ); |
666 | } |
667 | |
668 | //------------------------------------------------------------------------------ |
669 | // Entry point |
670 | |
671 | extern void VP8LDspInitMIPSdspR2(void); |
672 | |
673 | WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) { |
674 | VP8LMapColor32b = MapARGB_MIPSdspR2; |
675 | VP8LMapColor8b = MapAlpha_MIPSdspR2; |
676 | |
677 | VP8LPredictors[5] = Predictor5_MIPSdspR2; |
678 | VP8LPredictors[6] = Predictor6_MIPSdspR2; |
679 | VP8LPredictors[7] = Predictor7_MIPSdspR2; |
680 | VP8LPredictors[8] = Predictor8_MIPSdspR2; |
681 | VP8LPredictors[9] = Predictor9_MIPSdspR2; |
682 | VP8LPredictors[10] = Predictor10_MIPSdspR2; |
683 | VP8LPredictors[11] = Predictor11_MIPSdspR2; |
684 | VP8LPredictors[12] = Predictor12_MIPSdspR2; |
685 | VP8LPredictors[13] = Predictor13_MIPSdspR2; |
686 | |
687 | VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_MIPSdspR2; |
688 | VP8LTransformColorInverse = TransformColorInverse_MIPSdspR2; |
689 | |
690 | VP8LConvertBGRAToRGB = ConvertBGRAToRGB_MIPSdspR2; |
691 | VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_MIPSdspR2; |
692 | VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444_MIPSdspR2; |
693 | VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565_MIPSdspR2; |
694 | VP8LConvertBGRAToBGR = ConvertBGRAToBGR_MIPSdspR2; |
695 | } |
696 | |
697 | #else // !WEBP_USE_MIPS_DSP_R2 |
698 | |
699 | WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2) |
700 | |
701 | #endif // WEBP_USE_MIPS_DSP_R2 |
702 | |