| 1 | // Copyright 2014 Google Inc. All Rights Reserved. |
| 2 | // |
| 3 | // Use of this source code is governed by a BSD-style license |
| 4 | // that can be found in the COPYING file in the root of the source |
| 5 | // tree. An additional intellectual property rights grant can be found |
| 6 | // in the file PATENTS. All contributing project authors may |
| 7 | // be found in the AUTHORS file in the root of the source tree. |
| 8 | // ----------------------------------------------------------------------------- |
| 9 | // |
| 10 | // Image transforms and color space conversion methods for lossless decoder. |
| 11 | // |
| 12 | // Author(s): Djordje Pesut (djordje.pesut@imgtec.com) |
| 13 | // Jovan Zelincevic (jovan.zelincevic@imgtec.com) |
| 14 | |
| 15 | #include "src/dsp/dsp.h" |
| 16 | |
| 17 | #if defined(WEBP_USE_MIPS_DSP_R2) |
| 18 | |
| 19 | #include "src/dsp/lossless.h" |
| 20 | #include "src/dsp/lossless_common.h" |
| 21 | |
| 22 | #define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \ |
| 23 | static void FUNC_NAME(const TYPE* src, \ |
| 24 | const uint32_t* const color_map, \ |
| 25 | TYPE* dst, int y_start, int y_end, \ |
| 26 | int width) { \ |
| 27 | int y; \ |
| 28 | for (y = y_start; y < y_end; ++y) { \ |
| 29 | int x; \ |
| 30 | for (x = 0; x < (width >> 2); ++x) { \ |
| 31 | int tmp1, tmp2, tmp3, tmp4; \ |
| 32 | __asm__ volatile ( \ |
| 33 | ".ifc " #TYPE ", uint8_t \n\t" \ |
| 34 | "lbu %[tmp1], 0(%[src]) \n\t" \ |
| 35 | "lbu %[tmp2], 1(%[src]) \n\t" \ |
| 36 | "lbu %[tmp3], 2(%[src]) \n\t" \ |
| 37 | "lbu %[tmp4], 3(%[src]) \n\t" \ |
| 38 | "addiu %[src], %[src], 4 \n\t" \ |
| 39 | ".endif \n\t" \ |
| 40 | ".ifc " #TYPE ", uint32_t \n\t" \ |
| 41 | "lw %[tmp1], 0(%[src]) \n\t" \ |
| 42 | "lw %[tmp2], 4(%[src]) \n\t" \ |
| 43 | "lw %[tmp3], 8(%[src]) \n\t" \ |
| 44 | "lw %[tmp4], 12(%[src]) \n\t" \ |
| 45 | "ext %[tmp1], %[tmp1], 8, 8 \n\t" \ |
| 46 | "ext %[tmp2], %[tmp2], 8, 8 \n\t" \ |
| 47 | "ext %[tmp3], %[tmp3], 8, 8 \n\t" \ |
| 48 | "ext %[tmp4], %[tmp4], 8, 8 \n\t" \ |
| 49 | "addiu %[src], %[src], 16 \n\t" \ |
| 50 | ".endif \n\t" \ |
| 51 | "sll %[tmp1], %[tmp1], 2 \n\t" \ |
| 52 | "sll %[tmp2], %[tmp2], 2 \n\t" \ |
| 53 | "sll %[tmp3], %[tmp3], 2 \n\t" \ |
| 54 | "sll %[tmp4], %[tmp4], 2 \n\t" \ |
| 55 | "lwx %[tmp1], %[tmp1](%[color_map]) \n\t" \ |
| 56 | "lwx %[tmp2], %[tmp2](%[color_map]) \n\t" \ |
| 57 | "lwx %[tmp3], %[tmp3](%[color_map]) \n\t" \ |
| 58 | "lwx %[tmp4], %[tmp4](%[color_map]) \n\t" \ |
| 59 | ".ifc " #TYPE ", uint8_t \n\t" \ |
| 60 | "ext %[tmp1], %[tmp1], 8, 8 \n\t" \ |
| 61 | "ext %[tmp2], %[tmp2], 8, 8 \n\t" \ |
| 62 | "ext %[tmp3], %[tmp3], 8, 8 \n\t" \ |
| 63 | "ext %[tmp4], %[tmp4], 8, 8 \n\t" \ |
| 64 | "sb %[tmp1], 0(%[dst]) \n\t" \ |
| 65 | "sb %[tmp2], 1(%[dst]) \n\t" \ |
| 66 | "sb %[tmp3], 2(%[dst]) \n\t" \ |
| 67 | "sb %[tmp4], 3(%[dst]) \n\t" \ |
| 68 | "addiu %[dst], %[dst], 4 \n\t" \ |
| 69 | ".endif \n\t" \ |
| 70 | ".ifc " #TYPE ", uint32_t \n\t" \ |
| 71 | "sw %[tmp1], 0(%[dst]) \n\t" \ |
| 72 | "sw %[tmp2], 4(%[dst]) \n\t" \ |
| 73 | "sw %[tmp3], 8(%[dst]) \n\t" \ |
| 74 | "sw %[tmp4], 12(%[dst]) \n\t" \ |
| 75 | "addiu %[dst], %[dst], 16 \n\t" \ |
| 76 | ".endif \n\t" \ |
| 77 | : [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3), \ |
| 78 | [tmp4]"=&r"(tmp4), [src]"+&r"(src), [dst]"+r"(dst) \ |
| 79 | : [color_map]"r"(color_map) \ |
| 80 | : "memory" \ |
| 81 | ); \ |
| 82 | } \ |
| 83 | for (x = 0; x < (width & 3); ++x) { \ |
| 84 | *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \ |
| 85 | } \ |
| 86 | } \ |
| 87 | } |
| 88 | |
| 89 | MAP_COLOR_FUNCS(MapARGB_MIPSdspR2, uint32_t, VP8GetARGBIndex, VP8GetARGBValue) |
| 90 | MAP_COLOR_FUNCS(MapAlpha_MIPSdspR2, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue) |
| 91 | |
| 92 | #undef MAP_COLOR_FUNCS |
| 93 | |
| 94 | static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, |
| 95 | uint32_t c2) { |
| 96 | int temp0, temp1, temp2, temp3, temp4, temp5; |
| 97 | __asm__ volatile ( |
| 98 | "preceu.ph.qbr %[temp1], %[c0] \n\t" |
| 99 | "preceu.ph.qbl %[temp2], %[c0] \n\t" |
| 100 | "preceu.ph.qbr %[temp3], %[c1] \n\t" |
| 101 | "preceu.ph.qbl %[temp4], %[c1] \n\t" |
| 102 | "preceu.ph.qbr %[temp5], %[c2] \n\t" |
| 103 | "preceu.ph.qbl %[temp0], %[c2] \n\t" |
| 104 | "subq.ph %[temp3], %[temp3], %[temp5] \n\t" |
| 105 | "subq.ph %[temp4], %[temp4], %[temp0] \n\t" |
| 106 | "addq.ph %[temp1], %[temp1], %[temp3] \n\t" |
| 107 | "addq.ph %[temp2], %[temp2], %[temp4] \n\t" |
| 108 | "shll_s.ph %[temp1], %[temp1], 7 \n\t" |
| 109 | "shll_s.ph %[temp2], %[temp2], 7 \n\t" |
| 110 | "precrqu_s.qb.ph %[temp2], %[temp2], %[temp1] \n\t" |
| 111 | : [temp0]"=r" (temp0), [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), |
| 112 | [temp3]"=&r" (temp3), [temp4]"=&r" (temp4), [temp5]"=&r" (temp5) |
| 113 | : [c0]"r" (c0), [c1]"r" (c1), [c2]"r" (c2) |
| 114 | : "memory" |
| 115 | ); |
| 116 | return temp2; |
| 117 | } |
| 118 | |
| 119 | static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, |
| 120 | uint32_t c2) { |
| 121 | int temp0, temp1, temp2, temp3, temp4, temp5; |
| 122 | __asm__ volatile ( |
| 123 | "adduh.qb %[temp5], %[c0], %[c1] \n\t" |
| 124 | "preceu.ph.qbr %[temp3], %[c2] \n\t" |
| 125 | "preceu.ph.qbr %[temp1], %[temp5] \n\t" |
| 126 | "preceu.ph.qbl %[temp2], %[temp5] \n\t" |
| 127 | "preceu.ph.qbl %[temp4], %[c2] \n\t" |
| 128 | "subq.ph %[temp3], %[temp1], %[temp3] \n\t" |
| 129 | "subq.ph %[temp4], %[temp2], %[temp4] \n\t" |
| 130 | "shrl.ph %[temp5], %[temp3], 15 \n\t" |
| 131 | "shrl.ph %[temp0], %[temp4], 15 \n\t" |
| 132 | "addq.ph %[temp3], %[temp3], %[temp5] \n\t" |
| 133 | "addq.ph %[temp4], %[temp0], %[temp4] \n\t" |
| 134 | "shra.ph %[temp3], %[temp3], 1 \n\t" |
| 135 | "shra.ph %[temp4], %[temp4], 1 \n\t" |
| 136 | "addq.ph %[temp1], %[temp1], %[temp3] \n\t" |
| 137 | "addq.ph %[temp2], %[temp2], %[temp4] \n\t" |
| 138 | "shll_s.ph %[temp1], %[temp1], 7 \n\t" |
| 139 | "shll_s.ph %[temp2], %[temp2], 7 \n\t" |
| 140 | "precrqu_s.qb.ph %[temp1], %[temp2], %[temp1] \n\t" |
| 141 | : [temp0]"=r" (temp0), [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), |
| 142 | [temp3]"=&r" (temp3), [temp4]"=r" (temp4), [temp5]"=&r" (temp5) |
| 143 | : [c0]"r" (c0), [c1]"r" (c1), [c2]"r" (c2) |
| 144 | : "memory" |
| 145 | ); |
| 146 | return temp1; |
| 147 | } |
| 148 | |
| 149 | static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { |
| 150 | int temp0, temp1, temp2, temp3, temp4, temp5; |
| 151 | __asm__ volatile ( |
| 152 | "cmpgdu.lt.qb %[temp1], %[c], %[b] \n\t" |
| 153 | "pick.qb %[temp1], %[b], %[c] \n\t" |
| 154 | "pick.qb %[temp2], %[c], %[b] \n\t" |
| 155 | "cmpgdu.lt.qb %[temp4], %[c], %[a] \n\t" |
| 156 | "pick.qb %[temp4], %[a], %[c] \n\t" |
| 157 | "pick.qb %[temp5], %[c], %[a] \n\t" |
| 158 | "subu.qb %[temp3], %[temp1], %[temp2] \n\t" |
| 159 | "subu.qb %[temp0], %[temp4], %[temp5] \n\t" |
| 160 | "raddu.w.qb %[temp3], %[temp3] \n\t" |
| 161 | "raddu.w.qb %[temp0], %[temp0] \n\t" |
| 162 | "subu %[temp3], %[temp3], %[temp0] \n\t" |
| 163 | "slti %[temp0], %[temp3], 0x1 \n\t" |
| 164 | "movz %[a], %[b], %[temp0] \n\t" |
| 165 | : [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), [temp3]"=&r" (temp3), |
| 166 | [temp4]"=&r" (temp4), [temp5]"=&r" (temp5), [temp0]"=&r" (temp0), |
| 167 | [a]"+&r" (a) |
| 168 | : [b]"r" (b), [c]"r" (c) |
| 169 | ); |
| 170 | return a; |
| 171 | } |
| 172 | |
| 173 | static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) { |
| 174 | __asm__ volatile ( |
| 175 | "adduh.qb %[a0], %[a0], %[a1] \n\t" |
| 176 | : [a0]"+r" (a0) |
| 177 | : [a1]"r" (a1) |
| 178 | ); |
| 179 | return a0; |
| 180 | } |
| 181 | |
| 182 | static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) { |
| 183 | return Average2(Average2(a0, a2), a1); |
| 184 | } |
| 185 | |
| 186 | static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, |
| 187 | uint32_t a2, uint32_t a3) { |
| 188 | return Average2(Average2(a0, a1), Average2(a2, a3)); |
| 189 | } |
| 190 | |
| 191 | static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left, |
| 192 | const uint32_t* const top) { |
| 193 | return Average3(*left, top[0], top[1]); |
| 194 | } |
| 195 | |
| 196 | static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left, |
| 197 | const uint32_t* const top) { |
| 198 | return Average2(*left, top[-1]); |
| 199 | } |
| 200 | |
| 201 | static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left, |
| 202 | const uint32_t* const top) { |
| 203 | return Average2(*left, top[0]); |
| 204 | } |
| 205 | |
| 206 | static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left, |
| 207 | const uint32_t* const top) { |
| 208 | (void)left; |
| 209 | return Average2(top[-1], top[0]); |
| 210 | } |
| 211 | |
| 212 | static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left, |
| 213 | const uint32_t* const top) { |
| 214 | (void)left; |
| 215 | return Average2(top[0], top[1]); |
| 216 | } |
| 217 | |
| 218 | static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left, |
| 219 | const uint32_t* const top) { |
| 220 | return Average4(*left, top[-1], top[0], top[1]); |
| 221 | } |
| 222 | |
| 223 | static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left, |
| 224 | const uint32_t* const top) { |
| 225 | return Select(top[0], *left, top[-1]); |
| 226 | } |
| 227 | |
| 228 | static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left, |
| 229 | const uint32_t* const top) { |
| 230 | return ClampedAddSubtractFull(*left, top[0], top[-1]); |
| 231 | } |
| 232 | |
| 233 | static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left, |
| 234 | const uint32_t* const top) { |
| 235 | return ClampedAddSubtractHalf(*left, top[0], top[-1]); |
| 236 | } |
| 237 | |
| 238 | // Add green to blue and red channels (i.e. perform the inverse transform of |
| 239 | // 'subtract green'). |
| 240 | static void AddGreenToBlueAndRed_MIPSdspR2(const uint32_t* src, int num_pixels, |
| 241 | uint32_t* dst) { |
| 242 | uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; |
| 243 | const uint32_t* const p_loop1_end = src + (num_pixels & ~3); |
| 244 | const uint32_t* const p_loop2_end = src + num_pixels; |
| 245 | __asm__ volatile ( |
| 246 | ".set push \n\t" |
| 247 | ".set noreorder \n\t" |
| 248 | "beq %[src], %[p_loop1_end], 3f \n\t" |
| 249 | " nop \n\t" |
| 250 | "0: \n\t" |
| 251 | "lw %[temp0], 0(%[src]) \n\t" |
| 252 | "lw %[temp1], 4(%[src]) \n\t" |
| 253 | "lw %[temp2], 8(%[src]) \n\t" |
| 254 | "lw %[temp3], 12(%[src]) \n\t" |
| 255 | "ext %[temp4], %[temp0], 8, 8 \n\t" |
| 256 | "ext %[temp5], %[temp1], 8, 8 \n\t" |
| 257 | "ext %[temp6], %[temp2], 8, 8 \n\t" |
| 258 | "ext %[temp7], %[temp3], 8, 8 \n\t" |
| 259 | "addiu %[src], %[src], 16 \n\t" |
| 260 | "addiu %[dst], %[dst], 16 \n\t" |
| 261 | "replv.ph %[temp4], %[temp4] \n\t" |
| 262 | "replv.ph %[temp5], %[temp5] \n\t" |
| 263 | "replv.ph %[temp6], %[temp6] \n\t" |
| 264 | "replv.ph %[temp7], %[temp7] \n\t" |
| 265 | "addu.qb %[temp0], %[temp0], %[temp4] \n\t" |
| 266 | "addu.qb %[temp1], %[temp1], %[temp5] \n\t" |
| 267 | "addu.qb %[temp2], %[temp2], %[temp6] \n\t" |
| 268 | "addu.qb %[temp3], %[temp3], %[temp7] \n\t" |
| 269 | "sw %[temp0], -16(%[dst]) \n\t" |
| 270 | "sw %[temp1], -12(%[dst]) \n\t" |
| 271 | "sw %[temp2], -8(%[dst]) \n\t" |
| 272 | "bne %[src], %[p_loop1_end], 0b \n\t" |
| 273 | " sw %[temp3], -4(%[dst]) \n\t" |
| 274 | "3: \n\t" |
| 275 | "beq %[src], %[p_loop2_end], 2f \n\t" |
| 276 | " nop \n\t" |
| 277 | "1: \n\t" |
| 278 | "lw %[temp0], 0(%[src]) \n\t" |
| 279 | "addiu %[src], %[src], 4 \n\t" |
| 280 | "addiu %[dst], %[dst], 4 \n\t" |
| 281 | "ext %[temp4], %[temp0], 8, 8 \n\t" |
| 282 | "replv.ph %[temp4], %[temp4] \n\t" |
| 283 | "addu.qb %[temp0], %[temp0], %[temp4] \n\t" |
| 284 | "bne %[src], %[p_loop2_end], 1b \n\t" |
| 285 | " sw %[temp0], -4(%[dst]) \n\t" |
| 286 | "2: \n\t" |
| 287 | ".set pop \n\t" |
| 288 | : [dst]"+&r" (dst), [src]"+&r" (src), [temp0]"=&r" (temp0), |
| 289 | [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), [temp3]"=&r" (temp3), |
| 290 | [temp4]"=&r" (temp4), [temp5]"=&r" (temp5), [temp6]"=&r" (temp6), |
| 291 | [temp7]"=&r" (temp7) |
| 292 | : [p_loop1_end]"r" (p_loop1_end), [p_loop2_end]"r" (p_loop2_end) |
| 293 | : "memory" |
| 294 | ); |
| 295 | } |
| 296 | |
| 297 | static void TransformColorInverse_MIPSdspR2(const VP8LMultipliers* const m, |
| 298 | const uint32_t* src, int num_pixels, |
| 299 | uint32_t* dst) { |
| 300 | int temp0, temp1, temp2, temp3, temp4, temp5; |
| 301 | uint32_t argb, argb1, new_red; |
| 302 | const uint32_t G_to_R = m->green_to_red_; |
| 303 | const uint32_t G_to_B = m->green_to_blue_; |
| 304 | const uint32_t R_to_B = m->red_to_blue_; |
| 305 | const uint32_t* const p_loop_end = src + (num_pixels & ~1); |
| 306 | __asm__ volatile ( |
| 307 | ".set push \n\t" |
| 308 | ".set noreorder \n\t" |
| 309 | "beq %[src], %[p_loop_end], 1f \n\t" |
| 310 | " nop \n\t" |
| 311 | "replv.ph %[temp0], %[G_to_R] \n\t" |
| 312 | "replv.ph %[temp1], %[G_to_B] \n\t" |
| 313 | "replv.ph %[temp2], %[R_to_B] \n\t" |
| 314 | "shll.ph %[temp0], %[temp0], 8 \n\t" |
| 315 | "shll.ph %[temp1], %[temp1], 8 \n\t" |
| 316 | "shll.ph %[temp2], %[temp2], 8 \n\t" |
| 317 | "shra.ph %[temp0], %[temp0], 8 \n\t" |
| 318 | "shra.ph %[temp1], %[temp1], 8 \n\t" |
| 319 | "shra.ph %[temp2], %[temp2], 8 \n\t" |
| 320 | "0: \n\t" |
| 321 | "lw %[argb], 0(%[src]) \n\t" |
| 322 | "lw %[argb1], 4(%[src]) \n\t" |
| 323 | "sw %[argb], 0(%[dst]) \n\t" |
| 324 | "sw %[argb1], 4(%[dst]) \n\t" |
| 325 | "addiu %[src], %[src], 8 \n\t" |
| 326 | "addiu %[dst], %[dst], 8 \n\t" |
| 327 | "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t" |
| 328 | "preceu.ph.qbra %[temp3], %[temp3] \n\t" |
| 329 | "shll.ph %[temp3], %[temp3], 8 \n\t" |
| 330 | "shra.ph %[temp3], %[temp3], 8 \n\t" |
| 331 | "mul.ph %[temp5], %[temp3], %[temp0] \n\t" |
| 332 | "mul.ph %[temp3], %[temp3], %[temp1] \n\t" |
| 333 | "precrq.ph.w %[new_red], %[argb], %[argb1] \n\t" |
| 334 | "ins %[argb1], %[argb], 16, 16 \n\t" |
| 335 | "shra.ph %[temp5], %[temp5], 5 \n\t" |
| 336 | "shra.ph %[temp3], %[temp3], 5 \n\t" |
| 337 | "addu.ph %[new_red], %[new_red], %[temp5] \n\t" |
| 338 | "addu.ph %[argb1], %[argb1], %[temp3] \n\t" |
| 339 | "preceu.ph.qbra %[temp5], %[new_red] \n\t" |
| 340 | "shll.ph %[temp4], %[temp5], 8 \n\t" |
| 341 | "shra.ph %[temp4], %[temp4], 8 \n\t" |
| 342 | "mul.ph %[temp4], %[temp4], %[temp2] \n\t" |
| 343 | "sb %[temp5], -2(%[dst]) \n\t" |
| 344 | "sra %[temp5], %[temp5], 16 \n\t" |
| 345 | "shra.ph %[temp4], %[temp4], 5 \n\t" |
| 346 | "addu.ph %[argb1], %[argb1], %[temp4] \n\t" |
| 347 | "preceu.ph.qbra %[temp3], %[argb1] \n\t" |
| 348 | "sb %[temp5], -6(%[dst]) \n\t" |
| 349 | "sb %[temp3], -4(%[dst]) \n\t" |
| 350 | "sra %[temp3], %[temp3], 16 \n\t" |
| 351 | "bne %[src], %[p_loop_end], 0b \n\t" |
| 352 | " sb %[temp3], -8(%[dst]) \n\t" |
| 353 | "1: \n\t" |
| 354 | ".set pop \n\t" |
| 355 | : [temp0]"=&r" (temp0), [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), |
| 356 | [temp3]"=&r" (temp3), [temp4]"=&r" (temp4), [temp5]"=&r" (temp5), |
| 357 | [new_red]"=&r" (new_red), [argb]"=&r" (argb), |
| 358 | [argb1]"=&r" (argb1), [dst]"+&r" (dst), [src]"+&r" (src) |
| 359 | : [G_to_R]"r" (G_to_R), [R_to_B]"r" (R_to_B), |
| 360 | [G_to_B]"r" (G_to_B), [p_loop_end]"r" (p_loop_end) |
| 361 | : "memory" , "hi" , "lo" |
| 362 | ); |
| 363 | |
| 364 | // Fall-back to C-version for left-overs. |
| 365 | if (num_pixels & 1) VP8LTransformColorInverse_C(m, src, 1, dst); |
| 366 | } |
| 367 | |
| 368 | static void ConvertBGRAToRGB_MIPSdspR2(const uint32_t* src, |
| 369 | int num_pixels, uint8_t* dst) { |
| 370 | int temp0, temp1, temp2, temp3; |
| 371 | const uint32_t* const p_loop1_end = src + (num_pixels & ~3); |
| 372 | const uint32_t* const p_loop2_end = src + num_pixels; |
| 373 | __asm__ volatile ( |
| 374 | ".set push \n\t" |
| 375 | ".set noreorder \n\t" |
| 376 | "beq %[src], %[p_loop1_end], 3f \n\t" |
| 377 | " nop \n\t" |
| 378 | "0: \n\t" |
| 379 | "lw %[temp3], 12(%[src]) \n\t" |
| 380 | "lw %[temp2], 8(%[src]) \n\t" |
| 381 | "lw %[temp1], 4(%[src]) \n\t" |
| 382 | "lw %[temp0], 0(%[src]) \n\t" |
| 383 | "ins %[temp3], %[temp2], 24, 8 \n\t" |
| 384 | "sll %[temp2], %[temp2], 8 \n\t" |
| 385 | "rotr %[temp3], %[temp3], 16 \n\t" |
| 386 | "ins %[temp2], %[temp1], 0, 16 \n\t" |
| 387 | "sll %[temp1], %[temp1], 8 \n\t" |
| 388 | "wsbh %[temp3], %[temp3] \n\t" |
| 389 | "balign %[temp0], %[temp1], 1 \n\t" |
| 390 | "wsbh %[temp2], %[temp2] \n\t" |
| 391 | "wsbh %[temp0], %[temp0] \n\t" |
| 392 | "usw %[temp3], 8(%[dst]) \n\t" |
| 393 | "rotr %[temp0], %[temp0], 16 \n\t" |
| 394 | "usw %[temp2], 4(%[dst]) \n\t" |
| 395 | "addiu %[src], %[src], 16 \n\t" |
| 396 | "usw %[temp0], 0(%[dst]) \n\t" |
| 397 | "bne %[src], %[p_loop1_end], 0b \n\t" |
| 398 | " addiu %[dst], %[dst], 12 \n\t" |
| 399 | "3: \n\t" |
| 400 | "beq %[src], %[p_loop2_end], 2f \n\t" |
| 401 | " nop \n\t" |
| 402 | "1: \n\t" |
| 403 | "lw %[temp0], 0(%[src]) \n\t" |
| 404 | "addiu %[src], %[src], 4 \n\t" |
| 405 | "wsbh %[temp1], %[temp0] \n\t" |
| 406 | "addiu %[dst], %[dst], 3 \n\t" |
| 407 | "ush %[temp1], -2(%[dst]) \n\t" |
| 408 | "sra %[temp0], %[temp0], 16 \n\t" |
| 409 | "bne %[src], %[p_loop2_end], 1b \n\t" |
| 410 | " sb %[temp0], -3(%[dst]) \n\t" |
| 411 | "2: \n\t" |
| 412 | ".set pop \n\t" |
| 413 | : [temp0]"=&r" (temp0), [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), |
| 414 | [temp3]"=&r" (temp3), [dst]"+&r" (dst), [src]"+&r" (src) |
| 415 | : [p_loop1_end]"r" (p_loop1_end), [p_loop2_end]"r" (p_loop2_end) |
| 416 | : "memory" |
| 417 | ); |
| 418 | } |
| 419 | |
| 420 | static void ConvertBGRAToRGBA_MIPSdspR2(const uint32_t* src, |
| 421 | int num_pixels, uint8_t* dst) { |
| 422 | int temp0, temp1, temp2, temp3; |
| 423 | const uint32_t* const p_loop1_end = src + (num_pixels & ~3); |
| 424 | const uint32_t* const p_loop2_end = src + num_pixels; |
| 425 | __asm__ volatile ( |
| 426 | ".set push \n\t" |
| 427 | ".set noreorder \n\t" |
| 428 | "beq %[src], %[p_loop1_end], 3f \n\t" |
| 429 | " nop \n\t" |
| 430 | "0: \n\t" |
| 431 | "lw %[temp0], 0(%[src]) \n\t" |
| 432 | "lw %[temp1], 4(%[src]) \n\t" |
| 433 | "lw %[temp2], 8(%[src]) \n\t" |
| 434 | "lw %[temp3], 12(%[src]) \n\t" |
| 435 | "wsbh %[temp0], %[temp0] \n\t" |
| 436 | "wsbh %[temp1], %[temp1] \n\t" |
| 437 | "wsbh %[temp2], %[temp2] \n\t" |
| 438 | "wsbh %[temp3], %[temp3] \n\t" |
| 439 | "addiu %[src], %[src], 16 \n\t" |
| 440 | "balign %[temp0], %[temp0], 1 \n\t" |
| 441 | "balign %[temp1], %[temp1], 1 \n\t" |
| 442 | "balign %[temp2], %[temp2], 1 \n\t" |
| 443 | "balign %[temp3], %[temp3], 1 \n\t" |
| 444 | "usw %[temp0], 0(%[dst]) \n\t" |
| 445 | "usw %[temp1], 4(%[dst]) \n\t" |
| 446 | "usw %[temp2], 8(%[dst]) \n\t" |
| 447 | "usw %[temp3], 12(%[dst]) \n\t" |
| 448 | "bne %[src], %[p_loop1_end], 0b \n\t" |
| 449 | " addiu %[dst], %[dst], 16 \n\t" |
| 450 | "3: \n\t" |
| 451 | "beq %[src], %[p_loop2_end], 2f \n\t" |
| 452 | " nop \n\t" |
| 453 | "1: \n\t" |
| 454 | "lw %[temp0], 0(%[src]) \n\t" |
| 455 | "wsbh %[temp0], %[temp0] \n\t" |
| 456 | "addiu %[src], %[src], 4 \n\t" |
| 457 | "balign %[temp0], %[temp0], 1 \n\t" |
| 458 | "usw %[temp0], 0(%[dst]) \n\t" |
| 459 | "bne %[src], %[p_loop2_end], 1b \n\t" |
| 460 | " addiu %[dst], %[dst], 4 \n\t" |
| 461 | "2: \n\t" |
| 462 | ".set pop \n\t" |
| 463 | : [temp0]"=&r" (temp0), [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), |
| 464 | [temp3]"=&r" (temp3), [dst]"+&r" (dst), [src]"+&r" (src) |
| 465 | : [p_loop1_end]"r" (p_loop1_end), [p_loop2_end]"r" (p_loop2_end) |
| 466 | : "memory" |
| 467 | ); |
| 468 | } |
| 469 | |
| 470 | static void ConvertBGRAToRGBA4444_MIPSdspR2(const uint32_t* src, |
| 471 | int num_pixels, uint8_t* dst) { |
| 472 | int temp0, temp1, temp2, temp3, temp4, temp5; |
| 473 | const uint32_t* const p_loop1_end = src + (num_pixels & ~3); |
| 474 | const uint32_t* const p_loop2_end = src + num_pixels; |
| 475 | __asm__ volatile ( |
| 476 | ".set push \n\t" |
| 477 | ".set noreorder \n\t" |
| 478 | "beq %[src], %[p_loop1_end], 3f \n\t" |
| 479 | " nop \n\t" |
| 480 | "0: \n\t" |
| 481 | "lw %[temp0], 0(%[src]) \n\t" |
| 482 | "lw %[temp1], 4(%[src]) \n\t" |
| 483 | "lw %[temp2], 8(%[src]) \n\t" |
| 484 | "lw %[temp3], 12(%[src]) \n\t" |
| 485 | "ext %[temp4], %[temp0], 28, 4 \n\t" |
| 486 | "ext %[temp5], %[temp0], 12, 4 \n\t" |
| 487 | "ins %[temp0], %[temp4], 0, 4 \n\t" |
| 488 | "ext %[temp4], %[temp1], 28, 4 \n\t" |
| 489 | "ins %[temp0], %[temp5], 16, 4 \n\t" |
| 490 | "ext %[temp5], %[temp1], 12, 4 \n\t" |
| 491 | "ins %[temp1], %[temp4], 0, 4 \n\t" |
| 492 | "ext %[temp4], %[temp2], 28, 4 \n\t" |
| 493 | "ins %[temp1], %[temp5], 16, 4 \n\t" |
| 494 | "ext %[temp5], %[temp2], 12, 4 \n\t" |
| 495 | "ins %[temp2], %[temp4], 0, 4 \n\t" |
| 496 | "ext %[temp4], %[temp3], 28, 4 \n\t" |
| 497 | "ins %[temp2], %[temp5], 16, 4 \n\t" |
| 498 | "ext %[temp5], %[temp3], 12, 4 \n\t" |
| 499 | "ins %[temp3], %[temp4], 0, 4 \n\t" |
| 500 | "precr.qb.ph %[temp1], %[temp1], %[temp0] \n\t" |
| 501 | "ins %[temp3], %[temp5], 16, 4 \n\t" |
| 502 | "addiu %[src], %[src], 16 \n\t" |
| 503 | "precr.qb.ph %[temp3], %[temp3], %[temp2] \n\t" |
| 504 | #if (WEBP_SWAP_16BIT_CSP == 1) |
| 505 | "usw %[temp1], 0(%[dst]) \n\t" |
| 506 | "usw %[temp3], 4(%[dst]) \n\t" |
| 507 | #else |
| 508 | "wsbh %[temp1], %[temp1] \n\t" |
| 509 | "wsbh %[temp3], %[temp3] \n\t" |
| 510 | "usw %[temp1], 0(%[dst]) \n\t" |
| 511 | "usw %[temp3], 4(%[dst]) \n\t" |
| 512 | #endif |
| 513 | "bne %[src], %[p_loop1_end], 0b \n\t" |
| 514 | " addiu %[dst], %[dst], 8 \n\t" |
| 515 | "3: \n\t" |
| 516 | "beq %[src], %[p_loop2_end], 2f \n\t" |
| 517 | " nop \n\t" |
| 518 | "1: \n\t" |
| 519 | "lw %[temp0], 0(%[src]) \n\t" |
| 520 | "ext %[temp4], %[temp0], 28, 4 \n\t" |
| 521 | "ext %[temp5], %[temp0], 12, 4 \n\t" |
| 522 | "ins %[temp0], %[temp4], 0, 4 \n\t" |
| 523 | "ins %[temp0], %[temp5], 16, 4 \n\t" |
| 524 | "addiu %[src], %[src], 4 \n\t" |
| 525 | "precr.qb.ph %[temp0], %[temp0], %[temp0] \n\t" |
| 526 | #if (WEBP_SWAP_16BIT_CSP == 1) |
| 527 | "ush %[temp0], 0(%[dst]) \n\t" |
| 528 | #else |
| 529 | "wsbh %[temp0], %[temp0] \n\t" |
| 530 | "ush %[temp0], 0(%[dst]) \n\t" |
| 531 | #endif |
| 532 | "bne %[src], %[p_loop2_end], 1b \n\t" |
| 533 | " addiu %[dst], %[dst], 2 \n\t" |
| 534 | "2: \n\t" |
| 535 | ".set pop \n\t" |
| 536 | : [temp0]"=&r" (temp0), [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), |
| 537 | [temp3]"=&r" (temp3), [temp4]"=&r" (temp4), [temp5]"=&r" (temp5), |
| 538 | [dst]"+&r" (dst), [src]"+&r" (src) |
| 539 | : [p_loop1_end]"r" (p_loop1_end), [p_loop2_end]"r" (p_loop2_end) |
| 540 | : "memory" |
| 541 | ); |
| 542 | } |
| 543 | |
| 544 | static void ConvertBGRAToRGB565_MIPSdspR2(const uint32_t* src, |
| 545 | int num_pixels, uint8_t* dst) { |
| 546 | int temp0, temp1, temp2, temp3, temp4, temp5; |
| 547 | const uint32_t* const p_loop1_end = src + (num_pixels & ~3); |
| 548 | const uint32_t* const p_loop2_end = src + num_pixels; |
| 549 | __asm__ volatile ( |
| 550 | ".set push \n\t" |
| 551 | ".set noreorder \n\t" |
| 552 | "beq %[src], %[p_loop1_end], 3f \n\t" |
| 553 | " nop \n\t" |
| 554 | "0: \n\t" |
| 555 | "lw %[temp0], 0(%[src]) \n\t" |
| 556 | "lw %[temp1], 4(%[src]) \n\t" |
| 557 | "lw %[temp2], 8(%[src]) \n\t" |
| 558 | "lw %[temp3], 12(%[src]) \n\t" |
| 559 | "ext %[temp4], %[temp0], 8, 16 \n\t" |
| 560 | "ext %[temp5], %[temp0], 5, 11 \n\t" |
| 561 | "ext %[temp0], %[temp0], 3, 5 \n\t" |
| 562 | "ins %[temp4], %[temp5], 0, 11 \n\t" |
| 563 | "ext %[temp5], %[temp1], 5, 11 \n\t" |
| 564 | "ins %[temp4], %[temp0], 0, 5 \n\t" |
| 565 | "ext %[temp0], %[temp1], 8, 16 \n\t" |
| 566 | "ext %[temp1], %[temp1], 3, 5 \n\t" |
| 567 | "ins %[temp0], %[temp5], 0, 11 \n\t" |
| 568 | "ext %[temp5], %[temp2], 5, 11 \n\t" |
| 569 | "ins %[temp0], %[temp1], 0, 5 \n\t" |
| 570 | "ext %[temp1], %[temp2], 8, 16 \n\t" |
| 571 | "ext %[temp2], %[temp2], 3, 5 \n\t" |
| 572 | "ins %[temp1], %[temp5], 0, 11 \n\t" |
| 573 | "ext %[temp5], %[temp3], 5, 11 \n\t" |
| 574 | "ins %[temp1], %[temp2], 0, 5 \n\t" |
| 575 | "ext %[temp2], %[temp3], 8, 16 \n\t" |
| 576 | "ext %[temp3], %[temp3], 3, 5 \n\t" |
| 577 | "ins %[temp2], %[temp5], 0, 11 \n\t" |
| 578 | "append %[temp0], %[temp4], 16 \n\t" |
| 579 | "ins %[temp2], %[temp3], 0, 5 \n\t" |
| 580 | "addiu %[src], %[src], 16 \n\t" |
| 581 | "append %[temp2], %[temp1], 16 \n\t" |
| 582 | #if (WEBP_SWAP_16BIT_CSP == 1) |
| 583 | "usw %[temp0], 0(%[dst]) \n\t" |
| 584 | "usw %[temp2], 4(%[dst]) \n\t" |
| 585 | #else |
| 586 | "wsbh %[temp0], %[temp0] \n\t" |
| 587 | "wsbh %[temp2], %[temp2] \n\t" |
| 588 | "usw %[temp0], 0(%[dst]) \n\t" |
| 589 | "usw %[temp2], 4(%[dst]) \n\t" |
| 590 | #endif |
| 591 | "bne %[src], %[p_loop1_end], 0b \n\t" |
| 592 | " addiu %[dst], %[dst], 8 \n\t" |
| 593 | "3: \n\t" |
| 594 | "beq %[src], %[p_loop2_end], 2f \n\t" |
| 595 | " nop \n\t" |
| 596 | "1: \n\t" |
| 597 | "lw %[temp0], 0(%[src]) \n\t" |
| 598 | "ext %[temp4], %[temp0], 8, 16 \n\t" |
| 599 | "ext %[temp5], %[temp0], 5, 11 \n\t" |
| 600 | "ext %[temp0], %[temp0], 3, 5 \n\t" |
| 601 | "ins %[temp4], %[temp5], 0, 11 \n\t" |
| 602 | "addiu %[src], %[src], 4 \n\t" |
| 603 | "ins %[temp4], %[temp0], 0, 5 \n\t" |
| 604 | #if (WEBP_SWAP_16BIT_CSP == 1) |
| 605 | "ush %[temp4], 0(%[dst]) \n\t" |
| 606 | #else |
| 607 | "wsbh %[temp4], %[temp4] \n\t" |
| 608 | "ush %[temp4], 0(%[dst]) \n\t" |
| 609 | #endif |
| 610 | "bne %[src], %[p_loop2_end], 1b \n\t" |
| 611 | " addiu %[dst], %[dst], 2 \n\t" |
| 612 | "2: \n\t" |
| 613 | ".set pop \n\t" |
| 614 | : [temp0]"=&r" (temp0), [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), |
| 615 | [temp3]"=&r" (temp3), [temp4]"=&r" (temp4), [temp5]"=&r" (temp5), |
| 616 | [dst]"+&r" (dst), [src]"+&r" (src) |
| 617 | : [p_loop1_end]"r" (p_loop1_end), [p_loop2_end]"r" (p_loop2_end) |
| 618 | : "memory" |
| 619 | ); |
| 620 | } |
| 621 | |
| 622 | static void ConvertBGRAToBGR_MIPSdspR2(const uint32_t* src, |
| 623 | int num_pixels, uint8_t* dst) { |
| 624 | int temp0, temp1, temp2, temp3; |
| 625 | const uint32_t* const p_loop1_end = src + (num_pixels & ~3); |
| 626 | const uint32_t* const p_loop2_end = src + num_pixels; |
| 627 | __asm__ volatile ( |
| 628 | ".set push \n\t" |
| 629 | ".set noreorder \n\t" |
| 630 | "beq %[src], %[p_loop1_end], 3f \n\t" |
| 631 | " nop \n\t" |
| 632 | "0: \n\t" |
| 633 | "lw %[temp0], 0(%[src]) \n\t" |
| 634 | "lw %[temp1], 4(%[src]) \n\t" |
| 635 | "lw %[temp2], 8(%[src]) \n\t" |
| 636 | "lw %[temp3], 12(%[src]) \n\t" |
| 637 | "ins %[temp0], %[temp1], 24, 8 \n\t" |
| 638 | "sra %[temp1], %[temp1], 8 \n\t" |
| 639 | "ins %[temp1], %[temp2], 16, 16 \n\t" |
| 640 | "sll %[temp2], %[temp2], 8 \n\t" |
| 641 | "balign %[temp3], %[temp2], 1 \n\t" |
| 642 | "addiu %[src], %[src], 16 \n\t" |
| 643 | "usw %[temp0], 0(%[dst]) \n\t" |
| 644 | "usw %[temp1], 4(%[dst]) \n\t" |
| 645 | "usw %[temp3], 8(%[dst]) \n\t" |
| 646 | "bne %[src], %[p_loop1_end], 0b \n\t" |
| 647 | " addiu %[dst], %[dst], 12 \n\t" |
| 648 | "3: \n\t" |
| 649 | "beq %[src], %[p_loop2_end], 2f \n\t" |
| 650 | " nop \n\t" |
| 651 | "1: \n\t" |
| 652 | "lw %[temp0], 0(%[src]) \n\t" |
| 653 | "addiu %[src], %[src], 4 \n\t" |
| 654 | "addiu %[dst], %[dst], 3 \n\t" |
| 655 | "ush %[temp0], -3(%[dst]) \n\t" |
| 656 | "sra %[temp0], %[temp0], 16 \n\t" |
| 657 | "bne %[src], %[p_loop2_end], 1b \n\t" |
| 658 | " sb %[temp0], -1(%[dst]) \n\t" |
| 659 | "2: \n\t" |
| 660 | ".set pop \n\t" |
| 661 | : [temp0]"=&r" (temp0), [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), |
| 662 | [temp3]"=&r" (temp3), [dst]"+&r" (dst), [src]"+&r" (src) |
| 663 | : [p_loop1_end]"r" (p_loop1_end), [p_loop2_end]"r" (p_loop2_end) |
| 664 | : "memory" |
| 665 | ); |
| 666 | } |
| 667 | |
| 668 | //------------------------------------------------------------------------------ |
| 669 | // Entry point |
| 670 | |
| 671 | extern void VP8LDspInitMIPSdspR2(void); |
| 672 | |
| 673 | WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) { |
| 674 | VP8LMapColor32b = MapARGB_MIPSdspR2; |
| 675 | VP8LMapColor8b = MapAlpha_MIPSdspR2; |
| 676 | |
| 677 | VP8LPredictors[5] = Predictor5_MIPSdspR2; |
| 678 | VP8LPredictors[6] = Predictor6_MIPSdspR2; |
| 679 | VP8LPredictors[7] = Predictor7_MIPSdspR2; |
| 680 | VP8LPredictors[8] = Predictor8_MIPSdspR2; |
| 681 | VP8LPredictors[9] = Predictor9_MIPSdspR2; |
| 682 | VP8LPredictors[10] = Predictor10_MIPSdspR2; |
| 683 | VP8LPredictors[11] = Predictor11_MIPSdspR2; |
| 684 | VP8LPredictors[12] = Predictor12_MIPSdspR2; |
| 685 | VP8LPredictors[13] = Predictor13_MIPSdspR2; |
| 686 | |
| 687 | VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_MIPSdspR2; |
| 688 | VP8LTransformColorInverse = TransformColorInverse_MIPSdspR2; |
| 689 | |
| 690 | VP8LConvertBGRAToRGB = ConvertBGRAToRGB_MIPSdspR2; |
| 691 | VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_MIPSdspR2; |
| 692 | VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444_MIPSdspR2; |
| 693 | VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565_MIPSdspR2; |
| 694 | VP8LConvertBGRAToBGR = ConvertBGRAToBGR_MIPSdspR2; |
| 695 | } |
| 696 | |
| 697 | #else // !WEBP_USE_MIPS_DSP_R2 |
| 698 | |
| 699 | WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2) |
| 700 | |
| 701 | #endif // WEBP_USE_MIPS_DSP_R2 |
| 702 | |