| 1 | // Copyright 2014 Google Inc. All Rights Reserved. | 
|---|
| 2 | // | 
|---|
| 3 | // Use of this source code is governed by a BSD-style license | 
|---|
| 4 | // that can be found in the COPYING file in the root of the source | 
|---|
| 5 | // tree. An additional intellectual property rights grant can be found | 
|---|
| 6 | // in the file PATENTS. All contributing project authors may | 
|---|
| 7 | // be found in the AUTHORS file in the root of the source tree. | 
|---|
| 8 | // ----------------------------------------------------------------------------- | 
|---|
| 9 | // | 
|---|
| 10 | // MIPS version of rescaling functions | 
|---|
| 11 | // | 
|---|
| 12 | // Author(s): Djordje Pesut (djordje.pesut@imgtec.com) | 
|---|
| 13 |  | 
|---|
| 14 | #include "./dsp.h" | 
|---|
| 15 |  | 
|---|
| 16 | #if defined(WEBP_USE_MIPS32) | 
|---|
| 17 |  | 
|---|
| 18 | #include <assert.h> | 
|---|
| 19 | #include "../utils/rescaler_utils.h" | 
|---|
| 20 |  | 
|---|
| 21 | //------------------------------------------------------------------------------ | 
|---|
| 22 | // Row import | 
|---|
| 23 |  | 
|---|
| 24 | static void ImportRowShrink(WebPRescaler* const wrk, const uint8_t* src) { | 
|---|
| 25 | const int x_stride = wrk->num_channels; | 
|---|
| 26 | const int x_out_max = wrk->dst_width * wrk->num_channels; | 
|---|
| 27 | const int fx_scale = wrk->fx_scale; | 
|---|
| 28 | const int x_add = wrk->x_add; | 
|---|
| 29 | const int x_sub = wrk->x_sub; | 
|---|
| 30 | const int x_stride1 = x_stride << 2; | 
|---|
| 31 | int channel; | 
|---|
| 32 | assert(!wrk->x_expand); | 
|---|
| 33 | assert(!WebPRescalerInputDone(wrk)); | 
|---|
| 34 |  | 
|---|
| 35 | for (channel = 0; channel < x_stride; ++channel) { | 
|---|
| 36 | const uint8_t* src1 = src + channel; | 
|---|
| 37 | rescaler_t* frow = wrk->frow + channel; | 
|---|
| 38 | int temp1, temp2, temp3; | 
|---|
| 39 | int base, frac, sum; | 
|---|
| 40 | int accum, accum1; | 
|---|
| 41 | int loop_c = x_out_max - channel; | 
|---|
| 42 |  | 
|---|
| 43 | __asm__ volatile ( | 
|---|
| 44 | "li     %[temp1],   0x8000                    \n\t" | 
|---|
| 45 | "li     %[temp2],   0x10000                   \n\t" | 
|---|
| 46 | "li     %[sum],     0                         \n\t" | 
|---|
| 47 | "li     %[accum],   0                         \n\t" | 
|---|
| 48 | "1:                                             \n\t" | 
|---|
| 49 | "addu   %[accum],   %[accum],   %[x_add]      \n\t" | 
|---|
| 50 | "li     %[base],    0                         \n\t" | 
|---|
| 51 | "blez   %[accum],   3f                        \n\t" | 
|---|
| 52 | "2:                                             \n\t" | 
|---|
| 53 | "lbu    %[base],    0(%[src1])                \n\t" | 
|---|
| 54 | "subu   %[accum],   %[accum],   %[x_sub]      \n\t" | 
|---|
| 55 | "addu   %[src1],    %[src1],    %[x_stride]   \n\t" | 
|---|
| 56 | "addu   %[sum],     %[sum],     %[base]       \n\t" | 
|---|
| 57 | "bgtz   %[accum],   2b                        \n\t" | 
|---|
| 58 | "3:                                             \n\t" | 
|---|
| 59 | "negu   %[accum1],  %[accum]                  \n\t" | 
|---|
| 60 | "mul    %[frac],    %[base],    %[accum1]     \n\t" | 
|---|
| 61 | "mul    %[temp3],   %[sum],     %[x_sub]      \n\t" | 
|---|
| 62 | "subu   %[loop_c],  %[loop_c],  %[x_stride]   \n\t" | 
|---|
| 63 | "mult   %[temp1],   %[temp2]                  \n\t" | 
|---|
| 64 | "maddu  %[frac],    %[fx_scale]               \n\t" | 
|---|
| 65 | "mfhi   %[sum]                                \n\t" | 
|---|
| 66 | "subu   %[temp3],   %[temp3],   %[frac]       \n\t" | 
|---|
| 67 | "sw     %[temp3],   0(%[frow])                \n\t" | 
|---|
| 68 | "addu   %[frow],    %[frow],    %[x_stride1]  \n\t" | 
|---|
| 69 | "bgtz   %[loop_c],  1b                        \n\t" | 
|---|
| 70 | : [accum] "=&r"(accum), [src1] "+r"(src1), [temp3] "=&r"(temp3), | 
|---|
| 71 | [sum] "=&r"(sum), [base] "=&r"(base), [frac] "=&r"(frac), | 
|---|
| 72 | [frow] "+r"(frow), [accum1] "=&r"(accum1), | 
|---|
| 73 | [temp2] "=&r"(temp2), [temp1] "=&r"(temp1) | 
|---|
| 74 | : [x_stride] "r"(x_stride), [fx_scale] "r"(fx_scale), | 
|---|
| 75 | [x_sub] "r"(x_sub), [x_add] "r"(x_add), | 
|---|
| 76 | [loop_c] "r"(loop_c), [x_stride1] "r"(x_stride1) | 
|---|
| 77 | : "memory", "hi", "lo" | 
|---|
| 78 | ); | 
|---|
| 79 | assert(accum == 0); | 
|---|
| 80 | } | 
|---|
| 81 | } | 
|---|
| 82 |  | 
|---|
| 83 | static void ImportRowExpand(WebPRescaler* const wrk, const uint8_t* src) { | 
|---|
| 84 | const int x_stride = wrk->num_channels; | 
|---|
| 85 | const int x_out_max = wrk->dst_width * wrk->num_channels; | 
|---|
| 86 | const int x_add = wrk->x_add; | 
|---|
| 87 | const int x_sub = wrk->x_sub; | 
|---|
| 88 | const int src_width = wrk->src_width; | 
|---|
| 89 | const int x_stride1 = x_stride << 2; | 
|---|
| 90 | int channel; | 
|---|
| 91 | assert(wrk->x_expand); | 
|---|
| 92 | assert(!WebPRescalerInputDone(wrk)); | 
|---|
| 93 |  | 
|---|
| 94 | for (channel = 0; channel < x_stride; ++channel) { | 
|---|
| 95 | const uint8_t* src1 = src + channel; | 
|---|
| 96 | rescaler_t* frow = wrk->frow + channel; | 
|---|
| 97 | int temp1, temp2, temp3, temp4; | 
|---|
| 98 | int frac; | 
|---|
| 99 | int accum; | 
|---|
| 100 | int x_out = channel; | 
|---|
| 101 |  | 
|---|
| 102 | __asm__ volatile ( | 
|---|
| 103 | "addiu  %[temp3],   %[src_width], -1            \n\t" | 
|---|
| 104 | "lbu    %[temp2],   0(%[src1])                  \n\t" | 
|---|
| 105 | "addu   %[src1],    %[src1],      %[x_stride]   \n\t" | 
|---|
| 106 | "bgtz   %[temp3],   0f                          \n\t" | 
|---|
| 107 | "addiu  %[temp1],   %[temp2],     0             \n\t" | 
|---|
| 108 | "b      3f                                      \n\t" | 
|---|
| 109 | "0:                                               \n\t" | 
|---|
| 110 | "lbu    %[temp1],   0(%[src1])                  \n\t" | 
|---|
| 111 | "3:                                               \n\t" | 
|---|
| 112 | "addiu  %[accum],   %[x_add],     0             \n\t" | 
|---|
| 113 | "1:                                               \n\t" | 
|---|
| 114 | "subu   %[temp3],   %[temp2],     %[temp1]      \n\t" | 
|---|
| 115 | "mul    %[temp3],   %[temp3],     %[accum]      \n\t" | 
|---|
| 116 | "mul    %[temp4],   %[temp1],     %[x_add]      \n\t" | 
|---|
| 117 | "addu   %[temp3],   %[temp4],     %[temp3]      \n\t" | 
|---|
| 118 | "sw     %[temp3],   0(%[frow])                  \n\t" | 
|---|
| 119 | "addu   %[frow],    %[frow],      %[x_stride1]  \n\t" | 
|---|
| 120 | "addu   %[x_out],   %[x_out],     %[x_stride]   \n\t" | 
|---|
| 121 | "subu   %[temp3],   %[x_out],     %[x_out_max]  \n\t" | 
|---|
| 122 | "bgez   %[temp3],   2f                          \n\t" | 
|---|
| 123 | "subu   %[accum],   %[accum],     %[x_sub]      \n\t" | 
|---|
| 124 | "bgez   %[accum],   4f                          \n\t" | 
|---|
| 125 | "addiu  %[temp2],   %[temp1],     0             \n\t" | 
|---|
| 126 | "addu   %[src1],    %[src1],      %[x_stride]   \n\t" | 
|---|
| 127 | "lbu    %[temp1],   0(%[src1])                  \n\t" | 
|---|
| 128 | "addu   %[accum],   %[accum],     %[x_add]      \n\t" | 
|---|
| 129 | "4:                                               \n\t" | 
|---|
| 130 | "b      1b                                      \n\t" | 
|---|
| 131 | "2:                                               \n\t" | 
|---|
| 132 | : [src1] "+r"(src1), [accum] "=&r"(accum), [temp1] "=&r"(temp1), | 
|---|
| 133 | [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), | 
|---|
| 134 | [x_out] "+r"(x_out), [frac] "=&r"(frac), [frow] "+r"(frow) | 
|---|
| 135 | : [x_stride] "r"(x_stride), [x_add] "r"(x_add), [x_sub] "r"(x_sub), | 
|---|
| 136 | [x_stride1] "r"(x_stride1), [src_width] "r"(src_width), | 
|---|
| 137 | [x_out_max] "r"(x_out_max) | 
|---|
| 138 | : "memory", "hi", "lo" | 
|---|
| 139 | ); | 
|---|
| 140 | assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ || accum == 0); | 
|---|
| 141 | } | 
|---|
| 142 | } | 
|---|
| 143 |  | 
|---|
| 144 | //------------------------------------------------------------------------------ | 
|---|
| 145 | // Row export | 
|---|
| 146 |  | 
|---|
| 147 | static void ExportRowExpand(WebPRescaler* const wrk) { | 
|---|
| 148 | uint8_t* dst = wrk->dst; | 
|---|
| 149 | rescaler_t* irow = wrk->irow; | 
|---|
| 150 | const int x_out_max = wrk->dst_width * wrk->num_channels; | 
|---|
| 151 | const rescaler_t* frow = wrk->frow; | 
|---|
| 152 | int temp0, temp1, temp3, temp4, temp5, loop_end; | 
|---|
| 153 | const int temp2 = (int)wrk->fy_scale; | 
|---|
| 154 | const int temp6 = x_out_max << 2; | 
|---|
| 155 | assert(!WebPRescalerOutputDone(wrk)); | 
|---|
| 156 | assert(wrk->y_accum <= 0); | 
|---|
| 157 | assert(wrk->y_expand); | 
|---|
| 158 | assert(wrk->y_sub != 0); | 
|---|
| 159 | if (wrk->y_accum == 0) { | 
|---|
| 160 | __asm__ volatile ( | 
|---|
| 161 | "li       %[temp3],    0x10000                    \n\t" | 
|---|
| 162 | "li       %[temp4],    0x8000                     \n\t" | 
|---|
| 163 | "addu     %[loop_end], %[frow],     %[temp6]      \n\t" | 
|---|
| 164 | "1:                                                 \n\t" | 
|---|
| 165 | "lw       %[temp0],    0(%[frow])                 \n\t" | 
|---|
| 166 | "addiu    %[dst],      %[dst],      1             \n\t" | 
|---|
| 167 | "addiu    %[frow],     %[frow],     4             \n\t" | 
|---|
| 168 | "mult     %[temp3],    %[temp4]                   \n\t" | 
|---|
| 169 | "maddu    %[temp0],    %[temp2]                   \n\t" | 
|---|
| 170 | "mfhi     %[temp5]                                \n\t" | 
|---|
| 171 | "sb       %[temp5],    -1(%[dst])                 \n\t" | 
|---|
| 172 | "bne      %[frow],     %[loop_end], 1b            \n\t" | 
|---|
| 173 | : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp3] "=&r"(temp3), | 
|---|
| 174 | [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [frow] "+r"(frow), | 
|---|
| 175 | [dst] "+r"(dst), [loop_end] "=&r"(loop_end) | 
|---|
| 176 | : [temp2] "r"(temp2), [temp6] "r"(temp6) | 
|---|
| 177 | : "memory", "hi", "lo" | 
|---|
| 178 | ); | 
|---|
| 179 | } else { | 
|---|
| 180 | const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub); | 
|---|
| 181 | const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B); | 
|---|
| 182 | __asm__ volatile ( | 
|---|
| 183 | "li       %[temp3],    0x10000                    \n\t" | 
|---|
| 184 | "li       %[temp4],    0x8000                     \n\t" | 
|---|
| 185 | "addu     %[loop_end], %[frow],     %[temp6]      \n\t" | 
|---|
| 186 | "1:                                                 \n\t" | 
|---|
| 187 | "lw       %[temp0],    0(%[frow])                 \n\t" | 
|---|
| 188 | "lw       %[temp1],    0(%[irow])                 \n\t" | 
|---|
| 189 | "addiu    %[dst],      %[dst],      1             \n\t" | 
|---|
| 190 | "mult     %[temp3],    %[temp4]                   \n\t" | 
|---|
| 191 | "maddu    %[A],        %[temp0]                   \n\t" | 
|---|
| 192 | "maddu    %[B],        %[temp1]                   \n\t" | 
|---|
| 193 | "addiu    %[frow],     %[frow],     4             \n\t" | 
|---|
| 194 | "addiu    %[irow],     %[irow],     4             \n\t" | 
|---|
| 195 | "mfhi     %[temp5]                                \n\t" | 
|---|
| 196 | "mult     %[temp3],    %[temp4]                   \n\t" | 
|---|
| 197 | "maddu    %[temp5],    %[temp2]                   \n\t" | 
|---|
| 198 | "mfhi     %[temp5]                                \n\t" | 
|---|
| 199 | "sb       %[temp5],    -1(%[dst])                 \n\t" | 
|---|
| 200 | "bne      %[frow],     %[loop_end], 1b            \n\t" | 
|---|
| 201 | : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp3] "=&r"(temp3), | 
|---|
| 202 | [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [frow] "+r"(frow), | 
|---|
| 203 | [irow] "+r"(irow), [dst] "+r"(dst), [loop_end] "=&r"(loop_end) | 
|---|
| 204 | : [temp2] "r"(temp2), [temp6] "r"(temp6), [A] "r"(A), [B] "r"(B) | 
|---|
| 205 | : "memory", "hi", "lo" | 
|---|
| 206 | ); | 
|---|
| 207 | } | 
|---|
| 208 | } | 
|---|
| 209 |  | 
|---|
| 210 | static void ExportRowShrink(WebPRescaler* const wrk) { | 
|---|
| 211 | const int x_out_max = wrk->dst_width * wrk->num_channels; | 
|---|
| 212 | uint8_t* dst = wrk->dst; | 
|---|
| 213 | rescaler_t* irow = wrk->irow; | 
|---|
| 214 | const rescaler_t* frow = wrk->frow; | 
|---|
| 215 | const int yscale = wrk->fy_scale * (-wrk->y_accum); | 
|---|
| 216 | int temp0, temp1, temp3, temp4, temp5, loop_end; | 
|---|
| 217 | const int temp2 = (int)wrk->fxy_scale; | 
|---|
| 218 | const int temp6 = x_out_max << 2; | 
|---|
| 219 |  | 
|---|
| 220 | assert(!WebPRescalerOutputDone(wrk)); | 
|---|
| 221 | assert(wrk->y_accum <= 0); | 
|---|
| 222 | assert(!wrk->y_expand); | 
|---|
| 223 | assert(wrk->fxy_scale != 0); | 
|---|
| 224 | if (yscale) { | 
|---|
| 225 | __asm__ volatile ( | 
|---|
| 226 | "li       %[temp3],    0x10000                    \n\t" | 
|---|
| 227 | "li       %[temp4],    0x8000                     \n\t" | 
|---|
| 228 | "addu     %[loop_end], %[frow],     %[temp6]      \n\t" | 
|---|
| 229 | "1:                                                 \n\t" | 
|---|
| 230 | "lw       %[temp0],    0(%[frow])                 \n\t" | 
|---|
| 231 | "mult     %[temp3],    %[temp4]                   \n\t" | 
|---|
| 232 | "addiu    %[frow],     %[frow],     4             \n\t" | 
|---|
| 233 | "maddu    %[temp0],    %[yscale]                  \n\t" | 
|---|
| 234 | "mfhi     %[temp1]                                \n\t" | 
|---|
| 235 | "lw       %[temp0],    0(%[irow])                 \n\t" | 
|---|
| 236 | "addiu    %[dst],      %[dst],      1             \n\t" | 
|---|
| 237 | "addiu    %[irow],     %[irow],     4             \n\t" | 
|---|
| 238 | "subu     %[temp0],    %[temp0],    %[temp1]      \n\t" | 
|---|
| 239 | "mult     %[temp3],    %[temp4]                   \n\t" | 
|---|
| 240 | "maddu    %[temp0],    %[temp2]                   \n\t" | 
|---|
| 241 | "mfhi     %[temp5]                                \n\t" | 
|---|
| 242 | "sw       %[temp1],    -4(%[irow])                \n\t" | 
|---|
| 243 | "sb       %[temp5],    -1(%[dst])                 \n\t" | 
|---|
| 244 | "bne      %[frow],     %[loop_end], 1b            \n\t" | 
|---|
| 245 | : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp3] "=&r"(temp3), | 
|---|
| 246 | [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [frow] "+r"(frow), | 
|---|
| 247 | [irow] "+r"(irow), [dst] "+r"(dst), [loop_end] "=&r"(loop_end) | 
|---|
| 248 | : [temp2] "r"(temp2), [yscale] "r"(yscale), [temp6] "r"(temp6) | 
|---|
| 249 | : "memory", "hi", "lo" | 
|---|
| 250 | ); | 
|---|
| 251 | } else { | 
|---|
| 252 | __asm__ volatile ( | 
|---|
| 253 | "li       %[temp3],    0x10000                    \n\t" | 
|---|
| 254 | "li       %[temp4],    0x8000                     \n\t" | 
|---|
| 255 | "addu     %[loop_end], %[irow],     %[temp6]      \n\t" | 
|---|
| 256 | "1:                                                 \n\t" | 
|---|
| 257 | "lw       %[temp0],    0(%[irow])                 \n\t" | 
|---|
| 258 | "addiu    %[dst],      %[dst],      1             \n\t" | 
|---|
| 259 | "addiu    %[irow],     %[irow],     4             \n\t" | 
|---|
| 260 | "mult     %[temp3],    %[temp4]                   \n\t" | 
|---|
| 261 | "maddu    %[temp0],    %[temp2]                   \n\t" | 
|---|
| 262 | "mfhi     %[temp5]                                \n\t" | 
|---|
| 263 | "sw       $zero,       -4(%[irow])                \n\t" | 
|---|
| 264 | "sb       %[temp5],    -1(%[dst])                 \n\t" | 
|---|
| 265 | "bne      %[irow],     %[loop_end], 1b            \n\t" | 
|---|
| 266 | : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp3] "=&r"(temp3), | 
|---|
| 267 | [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [irow] "+r"(irow), | 
|---|
| 268 | [dst] "+r"(dst), [loop_end] "=&r"(loop_end) | 
|---|
| 269 | : [temp2] "r"(temp2), [temp6] "r"(temp6) | 
|---|
| 270 | : "memory", "hi", "lo" | 
|---|
| 271 | ); | 
|---|
| 272 | } | 
|---|
| 273 | } | 
|---|
| 274 |  | 
|---|
| 275 | //------------------------------------------------------------------------------ | 
|---|
| 276 | // Entry point | 
|---|
| 277 |  | 
|---|
| 278 | extern void WebPRescalerDspInitMIPS32(void); | 
|---|
| 279 |  | 
|---|
| 280 | WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPS32(void) { | 
|---|
| 281 | WebPRescalerImportRowExpand = ImportRowExpand; | 
|---|
| 282 | WebPRescalerImportRowShrink = ImportRowShrink; | 
|---|
| 283 | WebPRescalerExportRowExpand = ExportRowExpand; | 
|---|
| 284 | WebPRescalerExportRowShrink = ExportRowShrink; | 
|---|
| 285 | } | 
|---|
| 286 |  | 
|---|
| 287 | #else  // !WEBP_USE_MIPS32 | 
|---|
| 288 |  | 
|---|
| 289 | WEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPS32) | 
|---|
| 290 |  | 
|---|
| 291 | #endif  // WEBP_USE_MIPS32 | 
|---|
| 292 |  | 
|---|