| 1 | // Copyright 2016 Google Inc. All Rights Reserved. |
| 2 | // |
| 3 | // Use of this source code is governed by a BSD-style license |
| 4 | // that can be found in the COPYING file in the root of the source |
| 5 | // tree. An additional intellectual property rights grant can be found |
| 6 | // in the file PATENTS. All contributing project authors may |
| 7 | // be found in the AUTHORS file in the root of the source tree. |
| 8 | // ----------------------------------------------------------------------------- |
| 9 | // |
| 10 | // MSA version of rescaling functions |
| 11 | // |
| 12 | // Author: Prashant Patil (prashant.patil@imgtec.com) |
| 13 | |
| 14 | #include "src/dsp/dsp.h" |
| 15 | |
| 16 | #if defined(WEBP_USE_MSA) && !defined(WEBP_REDUCE_SIZE) |
| 17 | |
| 18 | #include <assert.h> |
| 19 | |
| 20 | #include "src/utils/rescaler_utils.h" |
| 21 | #include "src/dsp/msa_macro.h" |
| 22 | |
| 23 | #define ROUNDER (WEBP_RESCALER_ONE >> 1) |
| 24 | #define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX) |
| 25 | #define MULT_FIX_FLOOR(x, y) (((uint64_t)(x) * (y)) >> WEBP_RESCALER_RFIX) |
| 26 | |
| 27 | #define CALC_MULT_FIX_16(in0, in1, in2, in3, scale, shift, dst) do { \ |
| 28 | v4u32 tmp0, tmp1, tmp2, tmp3; \ |
| 29 | v16u8 t0, t1, t2, t3, t4, t5; \ |
| 30 | v2u64 out0, out1, out2, out3; \ |
| 31 | ILVRL_W2_UW(zero, in0, tmp0, tmp1); \ |
| 32 | ILVRL_W2_UW(zero, in1, tmp2, tmp3); \ |
| 33 | DOTP_UW2_UD(tmp0, tmp1, scale, scale, out0, out1); \ |
| 34 | DOTP_UW2_UD(tmp2, tmp3, scale, scale, out2, out3); \ |
| 35 | SRAR_D4_UD(out0, out1, out2, out3, shift); \ |
| 36 | PCKEV_B2_UB(out1, out0, out3, out2, t0, t1); \ |
| 37 | ILVRL_W2_UW(zero, in2, tmp0, tmp1); \ |
| 38 | ILVRL_W2_UW(zero, in3, tmp2, tmp3); \ |
| 39 | DOTP_UW2_UD(tmp0, tmp1, scale, scale, out0, out1); \ |
| 40 | DOTP_UW2_UD(tmp2, tmp3, scale, scale, out2, out3); \ |
| 41 | SRAR_D4_UD(out0, out1, out2, out3, shift); \ |
| 42 | PCKEV_B2_UB(out1, out0, out3, out2, t2, t3); \ |
| 43 | PCKEV_B2_UB(t1, t0, t3, t2, t4, t5); \ |
| 44 | dst = (v16u8)__msa_pckev_b((v16i8)t5, (v16i8)t4); \ |
| 45 | } while (0) |
| 46 | |
| 47 | #define CALC_MULT_FIX_4(in0, scale, shift, dst) do { \ |
| 48 | v4u32 tmp0, tmp1; \ |
| 49 | v16i8 t0, t1; \ |
| 50 | v2u64 out0, out1; \ |
| 51 | ILVRL_W2_UW(zero, in0, tmp0, tmp1); \ |
| 52 | DOTP_UW2_UD(tmp0, tmp1, scale, scale, out0, out1); \ |
| 53 | SRAR_D2_UD(out0, out1, shift); \ |
| 54 | t0 = __msa_pckev_b((v16i8)out1, (v16i8)out0); \ |
| 55 | t1 = __msa_pckev_b(t0, t0); \ |
| 56 | t0 = __msa_pckev_b(t1, t1); \ |
| 57 | dst = __msa_copy_s_w((v4i32)t0, 0); \ |
| 58 | } while (0) |
| 59 | |
| 60 | #define CALC_MULT_FIX1_16(in0, in1, in2, in3, fyscale, shift, \ |
| 61 | dst0, dst1, dst2, dst3) do { \ |
| 62 | v4u32 tmp0, tmp1, tmp2, tmp3; \ |
| 63 | v2u64 out0, out1, out2, out3; \ |
| 64 | ILVRL_W2_UW(zero, in0, tmp0, tmp1); \ |
| 65 | ILVRL_W2_UW(zero, in1, tmp2, tmp3); \ |
| 66 | DOTP_UW2_UD(tmp0, tmp1, fyscale, fyscale, out0, out1); \ |
| 67 | DOTP_UW2_UD(tmp2, tmp3, fyscale, fyscale, out2, out3); \ |
| 68 | SRAR_D4_UD(out0, out1, out2, out3, shift); \ |
| 69 | PCKEV_W2_UW(out1, out0, out3, out2, dst0, dst1); \ |
| 70 | ILVRL_W2_UW(zero, in2, tmp0, tmp1); \ |
| 71 | ILVRL_W2_UW(zero, in3, tmp2, tmp3); \ |
| 72 | DOTP_UW2_UD(tmp0, tmp1, fyscale, fyscale, out0, out1); \ |
| 73 | DOTP_UW2_UD(tmp2, tmp3, fyscale, fyscale, out2, out3); \ |
| 74 | SRAR_D4_UD(out0, out1, out2, out3, shift); \ |
| 75 | PCKEV_W2_UW(out1, out0, out3, out2, dst2, dst3); \ |
| 76 | } while (0) |
| 77 | |
| 78 | #define CALC_MULT_FIX1_4(in0, scale, shift, dst) do { \ |
| 79 | v4u32 tmp0, tmp1; \ |
| 80 | v2u64 out0, out1; \ |
| 81 | ILVRL_W2_UW(zero, in0, tmp0, tmp1); \ |
| 82 | DOTP_UW2_UD(tmp0, tmp1, scale, scale, out0, out1); \ |
| 83 | SRAR_D2_UD(out0, out1, shift); \ |
| 84 | dst = (v4u32)__msa_pckev_w((v4i32)out1, (v4i32)out0); \ |
| 85 | } while (0) |
| 86 | |
| 87 | #define CALC_MULT_FIX2_16(in0, in1, in2, in3, mult, scale, shift, \ |
| 88 | dst0, dst1) do { \ |
| 89 | v4u32 tmp0, tmp1, tmp2, tmp3; \ |
| 90 | v2u64 out0, out1, out2, out3; \ |
| 91 | ILVRL_W2_UW(in0, in2, tmp0, tmp1); \ |
| 92 | ILVRL_W2_UW(in1, in3, tmp2, tmp3); \ |
| 93 | DOTP_UW2_UD(tmp0, tmp1, mult, mult, out0, out1); \ |
| 94 | DOTP_UW2_UD(tmp2, tmp3, mult, mult, out2, out3); \ |
| 95 | SRAR_D4_UD(out0, out1, out2, out3, shift); \ |
| 96 | DOTP_UW2_UD(out0, out1, scale, scale, out0, out1); \ |
| 97 | DOTP_UW2_UD(out2, out3, scale, scale, out2, out3); \ |
| 98 | SRAR_D4_UD(out0, out1, out2, out3, shift); \ |
| 99 | PCKEV_B2_UB(out1, out0, out3, out2, dst0, dst1); \ |
| 100 | } while (0) |
| 101 | |
| 102 | #define CALC_MULT_FIX2_4(in0, in1, mult, scale, shift, dst) do { \ |
| 103 | v4u32 tmp0, tmp1; \ |
| 104 | v2u64 out0, out1; \ |
| 105 | v16i8 t0, t1; \ |
| 106 | ILVRL_W2_UW(in0, in1, tmp0, tmp1); \ |
| 107 | DOTP_UW2_UD(tmp0, tmp1, mult, mult, out0, out1); \ |
| 108 | SRAR_D2_UD(out0, out1, shift); \ |
| 109 | DOTP_UW2_UD(out0, out1, scale, scale, out0, out1); \ |
| 110 | SRAR_D2_UD(out0, out1, shift); \ |
| 111 | t0 = __msa_pckev_b((v16i8)out1, (v16i8)out0); \ |
| 112 | t1 = __msa_pckev_b(t0, t0); \ |
| 113 | t0 = __msa_pckev_b(t1, t1); \ |
| 114 | dst = __msa_copy_s_w((v4i32)t0, 0); \ |
| 115 | } while (0) |
| 116 | |
| 117 | static WEBP_INLINE void ExportRowExpand_0(const uint32_t* frow, uint8_t* dst, |
| 118 | int length, |
| 119 | WebPRescaler* const wrk) { |
| 120 | const v4u32 scale = (v4u32)__msa_fill_w(wrk->fy_scale); |
| 121 | const v4u32 shift = (v4u32)__msa_fill_w(WEBP_RESCALER_RFIX); |
| 122 | const v4i32 zero = { 0 }; |
| 123 | |
| 124 | while (length >= 16) { |
| 125 | v4u32 src0, src1, src2, src3; |
| 126 | v16u8 out; |
| 127 | LD_UW4(frow, 4, src0, src1, src2, src3); |
| 128 | CALC_MULT_FIX_16(src0, src1, src2, src3, scale, shift, out); |
| 129 | ST_UB(out, dst); |
| 130 | length -= 16; |
| 131 | frow += 16; |
| 132 | dst += 16; |
| 133 | } |
| 134 | if (length > 0) { |
| 135 | int x_out; |
| 136 | if (length >= 12) { |
| 137 | uint32_t val0_m, val1_m, val2_m; |
| 138 | v4u32 src0, src1, src2; |
| 139 | LD_UW3(frow, 4, src0, src1, src2); |
| 140 | CALC_MULT_FIX_4(src0, scale, shift, val0_m); |
| 141 | CALC_MULT_FIX_4(src1, scale, shift, val1_m); |
| 142 | CALC_MULT_FIX_4(src2, scale, shift, val2_m); |
| 143 | SW3(val0_m, val1_m, val2_m, dst, 4); |
| 144 | length -= 12; |
| 145 | frow += 12; |
| 146 | dst += 12; |
| 147 | } else if (length >= 8) { |
| 148 | uint32_t val0_m, val1_m; |
| 149 | v4u32 src0, src1; |
| 150 | LD_UW2(frow, 4, src0, src1); |
| 151 | CALC_MULT_FIX_4(src0, scale, shift, val0_m); |
| 152 | CALC_MULT_FIX_4(src1, scale, shift, val1_m); |
| 153 | SW2(val0_m, val1_m, dst, 4); |
| 154 | length -= 8; |
| 155 | frow += 8; |
| 156 | dst += 8; |
| 157 | } else if (length >= 4) { |
| 158 | uint32_t val0_m; |
| 159 | const v4u32 src0 = LD_UW(frow); |
| 160 | CALC_MULT_FIX_4(src0, scale, shift, val0_m); |
| 161 | SW(val0_m, dst); |
| 162 | length -= 4; |
| 163 | frow += 4; |
| 164 | dst += 4; |
| 165 | } |
| 166 | for (x_out = 0; x_out < length; ++x_out) { |
| 167 | const uint32_t J = frow[x_out]; |
| 168 | const int v = (int)MULT_FIX(J, wrk->fy_scale); |
| 169 | dst[x_out] = (v > 255) ? 255u : (uint8_t)v; |
| 170 | } |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | static WEBP_INLINE void ExportRowExpand_1(const uint32_t* frow, uint32_t* irow, |
| 175 | uint8_t* dst, int length, |
| 176 | WebPRescaler* const wrk) { |
| 177 | const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub); |
| 178 | const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B); |
| 179 | const v4i32 B1 = __msa_fill_w(B); |
| 180 | const v4i32 A1 = __msa_fill_w(A); |
| 181 | const v4i32 AB = __msa_ilvr_w(A1, B1); |
| 182 | const v4u32 scale = (v4u32)__msa_fill_w(wrk->fy_scale); |
| 183 | const v4u32 shift = (v4u32)__msa_fill_w(WEBP_RESCALER_RFIX); |
| 184 | |
| 185 | while (length >= 16) { |
| 186 | v4u32 frow0, frow1, frow2, frow3, irow0, irow1, irow2, irow3; |
| 187 | v16u8 t0, t1, t2, t3, t4, t5; |
| 188 | LD_UW4(frow, 4, frow0, frow1, frow2, frow3); |
| 189 | LD_UW4(irow, 4, irow0, irow1, irow2, irow3); |
| 190 | CALC_MULT_FIX2_16(frow0, frow1, irow0, irow1, AB, scale, shift, t0, t1); |
| 191 | CALC_MULT_FIX2_16(frow2, frow3, irow2, irow3, AB, scale, shift, t2, t3); |
| 192 | PCKEV_B2_UB(t1, t0, t3, t2, t4, t5); |
| 193 | t0 = (v16u8)__msa_pckev_b((v16i8)t5, (v16i8)t4); |
| 194 | ST_UB(t0, dst); |
| 195 | frow += 16; |
| 196 | irow += 16; |
| 197 | dst += 16; |
| 198 | length -= 16; |
| 199 | } |
| 200 | if (length > 0) { |
| 201 | int x_out; |
| 202 | if (length >= 12) { |
| 203 | uint32_t val0_m, val1_m, val2_m; |
| 204 | v4u32 frow0, frow1, frow2, irow0, irow1, irow2; |
| 205 | LD_UW3(frow, 4, frow0, frow1, frow2); |
| 206 | LD_UW3(irow, 4, irow0, irow1, irow2); |
| 207 | CALC_MULT_FIX2_4(frow0, irow0, AB, scale, shift, val0_m); |
| 208 | CALC_MULT_FIX2_4(frow1, irow1, AB, scale, shift, val1_m); |
| 209 | CALC_MULT_FIX2_4(frow2, irow2, AB, scale, shift, val2_m); |
| 210 | SW3(val0_m, val1_m, val2_m, dst, 4); |
| 211 | frow += 12; |
| 212 | irow += 12; |
| 213 | dst += 12; |
| 214 | length -= 12; |
| 215 | } else if (length >= 8) { |
| 216 | uint32_t val0_m, val1_m; |
| 217 | v4u32 frow0, frow1, irow0, irow1; |
| 218 | LD_UW2(frow, 4, frow0, frow1); |
| 219 | LD_UW2(irow, 4, irow0, irow1); |
| 220 | CALC_MULT_FIX2_4(frow0, irow0, AB, scale, shift, val0_m); |
| 221 | CALC_MULT_FIX2_4(frow1, irow1, AB, scale, shift, val1_m); |
| 222 | SW2(val0_m, val1_m, dst, 4); |
| 223 | frow += 4; |
| 224 | irow += 4; |
| 225 | dst += 4; |
| 226 | length -= 4; |
| 227 | } else if (length >= 4) { |
| 228 | uint32_t val0_m; |
| 229 | const v4u32 frow0 = LD_UW(frow + 0); |
| 230 | const v4u32 irow0 = LD_UW(irow + 0); |
| 231 | CALC_MULT_FIX2_4(frow0, irow0, AB, scale, shift, val0_m); |
| 232 | SW(val0_m, dst); |
| 233 | frow += 4; |
| 234 | irow += 4; |
| 235 | dst += 4; |
| 236 | length -= 4; |
| 237 | } |
| 238 | for (x_out = 0; x_out < length; ++x_out) { |
| 239 | const uint64_t I = (uint64_t)A * frow[x_out] |
| 240 | + (uint64_t)B * irow[x_out]; |
| 241 | const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX); |
| 242 | const int v = (int)MULT_FIX(J, wrk->fy_scale); |
| 243 | dst[x_out] = (v > 255) ? 255u : (uint8_t)v; |
| 244 | } |
| 245 | } |
| 246 | } |
| 247 | |
| 248 | static void RescalerExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) { |
| 249 | uint8_t* dst = wrk->dst; |
| 250 | rescaler_t* irow = wrk->irow; |
| 251 | const int x_out_max = wrk->dst_width * wrk->num_channels; |
| 252 | const rescaler_t* frow = wrk->frow; |
| 253 | assert(!WebPRescalerOutputDone(wrk)); |
| 254 | assert(wrk->y_accum <= 0); |
| 255 | assert(wrk->y_expand); |
| 256 | assert(wrk->y_sub != 0); |
| 257 | if (wrk->y_accum == 0) { |
| 258 | ExportRowExpand_0(frow, dst, x_out_max, wrk); |
| 259 | } else { |
| 260 | ExportRowExpand_1(frow, irow, dst, x_out_max, wrk); |
| 261 | } |
| 262 | } |
| 263 | |
| 264 | #if 0 // disabled for now. TODO(skal): make match the C-code |
| 265 | static WEBP_INLINE void ExportRowShrink_0(const uint32_t* frow, uint32_t* irow, |
| 266 | uint8_t* dst, int length, |
| 267 | const uint32_t yscale, |
| 268 | WebPRescaler* const wrk) { |
| 269 | const v4u32 y_scale = (v4u32)__msa_fill_w(yscale); |
| 270 | const v4u32 fxyscale = (v4u32)__msa_fill_w(wrk->fxy_scale); |
| 271 | const v4u32 shiftval = (v4u32)__msa_fill_w(WEBP_RESCALER_RFIX); |
| 272 | const v4i32 zero = { 0 }; |
| 273 | |
| 274 | while (length >= 16) { |
| 275 | v4u32 src0, src1, src2, src3, frac0, frac1, frac2, frac3; |
| 276 | v16u8 out; |
| 277 | LD_UW4(frow, 4, src0, src1, src2, src3); |
| 278 | CALC_MULT_FIX1_16(src0, src1, src2, src3, y_scale, shiftval, |
| 279 | frac0, frac1, frac2, frac3); |
| 280 | LD_UW4(irow, 4, src0, src1, src2, src3); |
| 281 | SUB4(src0, frac0, src1, frac1, src2, frac2, src3, frac3, |
| 282 | src0, src1, src2, src3); |
| 283 | CALC_MULT_FIX_16(src0, src1, src2, src3, fxyscale, shiftval, out); |
| 284 | ST_UB(out, dst); |
| 285 | ST_UW4(frac0, frac1, frac2, frac3, irow, 4); |
| 286 | frow += 16; |
| 287 | irow += 16; |
| 288 | dst += 16; |
| 289 | length -= 16; |
| 290 | } |
| 291 | if (length > 0) { |
| 292 | int x_out; |
| 293 | if (length >= 12) { |
| 294 | uint32_t val0_m, val1_m, val2_m; |
| 295 | v4u32 src0, src1, src2, frac0, frac1, frac2; |
| 296 | LD_UW3(frow, 4, src0, src1, src2); |
| 297 | CALC_MULT_FIX1_4(src0, y_scale, shiftval, frac0); |
| 298 | CALC_MULT_FIX1_4(src1, y_scale, shiftval, frac1); |
| 299 | CALC_MULT_FIX1_4(src2, y_scale, shiftval, frac2); |
| 300 | LD_UW3(irow, 4, src0, src1, src2); |
| 301 | SUB3(src0, frac0, src1, frac1, src2, frac2, src0, src1, src2); |
| 302 | CALC_MULT_FIX_4(src0, fxyscale, shiftval, val0_m); |
| 303 | CALC_MULT_FIX_4(src1, fxyscale, shiftval, val1_m); |
| 304 | CALC_MULT_FIX_4(src2, fxyscale, shiftval, val2_m); |
| 305 | SW3(val0_m, val1_m, val2_m, dst, 4); |
| 306 | ST_UW3(frac0, frac1, frac2, irow, 4); |
| 307 | frow += 12; |
| 308 | irow += 12; |
| 309 | dst += 12; |
| 310 | length -= 12; |
| 311 | } else if (length >= 8) { |
| 312 | uint32_t val0_m, val1_m; |
| 313 | v4u32 src0, src1, frac0, frac1; |
| 314 | LD_UW2(frow, 4, src0, src1); |
| 315 | CALC_MULT_FIX1_4(src0, y_scale, shiftval, frac0); |
| 316 | CALC_MULT_FIX1_4(src1, y_scale, shiftval, frac1); |
| 317 | LD_UW2(irow, 4, src0, src1); |
| 318 | SUB2(src0, frac0, src1, frac1, src0, src1); |
| 319 | CALC_MULT_FIX_4(src0, fxyscale, shiftval, val0_m); |
| 320 | CALC_MULT_FIX_4(src1, fxyscale, shiftval, val1_m); |
| 321 | SW2(val0_m, val1_m, dst, 4); |
| 322 | ST_UW2(frac0, frac1, irow, 4); |
| 323 | frow += 8; |
| 324 | irow += 8; |
| 325 | dst += 8; |
| 326 | length -= 8; |
| 327 | } else if (length >= 4) { |
| 328 | uint32_t val0_m; |
| 329 | v4u32 frac0; |
| 330 | v4u32 src0 = LD_UW(frow); |
| 331 | CALC_MULT_FIX1_4(src0, y_scale, shiftval, frac0); |
| 332 | src0 = LD_UW(irow); |
| 333 | src0 = src0 - frac0; |
| 334 | CALC_MULT_FIX_4(src0, fxyscale, shiftval, val0_m); |
| 335 | SW(val0_m, dst); |
| 336 | ST_UW(frac0, irow); |
| 337 | frow += 4; |
| 338 | irow += 4; |
| 339 | dst += 4; |
| 340 | length -= 4; |
| 341 | } |
| 342 | for (x_out = 0; x_out < length; ++x_out) { |
| 343 | const uint32_t frac = (uint32_t)MULT_FIX_FLOOR(frow[x_out], yscale); |
| 344 | const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale); |
| 345 | dst[x_out] = (v > 255) ? 255u : (uint8_t)v; |
| 346 | irow[x_out] = frac; |
| 347 | } |
| 348 | } |
| 349 | } |
| 350 | |
| 351 | static WEBP_INLINE void ExportRowShrink_1(uint32_t* irow, uint8_t* dst, |
| 352 | int length, |
| 353 | WebPRescaler* const wrk) { |
| 354 | const v4u32 scale = (v4u32)__msa_fill_w(wrk->fxy_scale); |
| 355 | const v4u32 shift = (v4u32)__msa_fill_w(WEBP_RESCALER_RFIX); |
| 356 | const v4i32 zero = { 0 }; |
| 357 | |
| 358 | while (length >= 16) { |
| 359 | v4u32 src0, src1, src2, src3; |
| 360 | v16u8 dst0; |
| 361 | LD_UW4(irow, 4, src0, src1, src2, src3); |
| 362 | CALC_MULT_FIX_16(src0, src1, src2, src3, scale, shift, dst0); |
| 363 | ST_UB(dst0, dst); |
| 364 | ST_SW4(zero, zero, zero, zero, irow, 4); |
| 365 | length -= 16; |
| 366 | irow += 16; |
| 367 | dst += 16; |
| 368 | } |
| 369 | if (length > 0) { |
| 370 | int x_out; |
| 371 | if (length >= 12) { |
| 372 | uint32_t val0_m, val1_m, val2_m; |
| 373 | v4u32 src0, src1, src2; |
| 374 | LD_UW3(irow, 4, src0, src1, src2); |
| 375 | CALC_MULT_FIX_4(src0, scale, shift, val0_m); |
| 376 | CALC_MULT_FIX_4(src1, scale, shift, val1_m); |
| 377 | CALC_MULT_FIX_4(src2, scale, shift, val2_m); |
| 378 | SW3(val0_m, val1_m, val2_m, dst, 4); |
| 379 | ST_SW3(zero, zero, zero, irow, 4); |
| 380 | length -= 12; |
| 381 | irow += 12; |
| 382 | dst += 12; |
| 383 | } else if (length >= 8) { |
| 384 | uint32_t val0_m, val1_m; |
| 385 | v4u32 src0, src1; |
| 386 | LD_UW2(irow, 4, src0, src1); |
| 387 | CALC_MULT_FIX_4(src0, scale, shift, val0_m); |
| 388 | CALC_MULT_FIX_4(src1, scale, shift, val1_m); |
| 389 | SW2(val0_m, val1_m, dst, 4); |
| 390 | ST_SW2(zero, zero, irow, 4); |
| 391 | length -= 8; |
| 392 | irow += 8; |
| 393 | dst += 8; |
| 394 | } else if (length >= 4) { |
| 395 | uint32_t val0_m; |
| 396 | const v4u32 src0 = LD_UW(irow + 0); |
| 397 | CALC_MULT_FIX_4(src0, scale, shift, val0_m); |
| 398 | SW(val0_m, dst); |
| 399 | ST_SW(zero, irow); |
| 400 | length -= 4; |
| 401 | irow += 4; |
| 402 | dst += 4; |
| 403 | } |
| 404 | for (x_out = 0; x_out < length; ++x_out) { |
| 405 | const int v = (int)MULT_FIX(irow[x_out], wrk->fxy_scale); |
| 406 | dst[x_out] = (v > 255) ? 255u : (uint8_t)v; |
| 407 | irow[x_out] = 0; |
| 408 | } |
| 409 | } |
| 410 | } |
| 411 | |
| 412 | static void RescalerExportRowShrink_MIPSdspR2(WebPRescaler* const wrk) { |
| 413 | uint8_t* dst = wrk->dst; |
| 414 | rescaler_t* irow = wrk->irow; |
| 415 | const int x_out_max = wrk->dst_width * wrk->num_channels; |
| 416 | const rescaler_t* frow = wrk->frow; |
| 417 | const uint32_t yscale = wrk->fy_scale * (-wrk->y_accum); |
| 418 | assert(!WebPRescalerOutputDone(wrk)); |
| 419 | assert(wrk->y_accum <= 0); |
| 420 | assert(!wrk->y_expand); |
| 421 | if (yscale) { |
| 422 | ExportRowShrink_0(frow, irow, dst, x_out_max, yscale, wrk); |
| 423 | } else { |
| 424 | ExportRowShrink_1(irow, dst, x_out_max, wrk); |
| 425 | } |
| 426 | } |
| 427 | #endif // 0 |
| 428 | |
| 429 | //------------------------------------------------------------------------------ |
| 430 | // Entry point |
| 431 | |
| 432 | extern void WebPRescalerDspInitMSA(void); |
| 433 | |
| 434 | WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMSA(void) { |
| 435 | WebPRescalerExportRowExpand = RescalerExportRowExpand_MIPSdspR2; |
| 436 | // WebPRescalerExportRowShrink = RescalerExportRowShrink_MIPSdspR2; |
| 437 | } |
| 438 | |
| 439 | #else // !WEBP_USE_MSA |
| 440 | |
| 441 | WEBP_DSP_INIT_STUB(WebPRescalerDspInitMSA) |
| 442 | |
| 443 | #endif // WEBP_USE_MSA |
| 444 | |