| 1 | // Copyright 2015 Google Inc. All Rights Reserved. | 
|---|
| 2 | // | 
|---|
| 3 | // Use of this source code is governed by a BSD-style license | 
|---|
| 4 | // that can be found in the COPYING file in the root of the source | 
|---|
| 5 | // tree. An additional intellectual property rights grant can be found | 
|---|
| 6 | // in the file PATENTS. All contributing project authors may | 
|---|
| 7 | // be found in the AUTHORS file in the root of the source tree. | 
|---|
| 8 | // ----------------------------------------------------------------------------- | 
|---|
| 9 | // | 
|---|
| 10 | // Image transform methods for lossless encoder. | 
|---|
| 11 | // | 
|---|
| 12 | // Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com) | 
|---|
| 13 | //             Jovan Zelincevic (jovan.zelincevic@imgtec.com) | 
|---|
| 14 |  | 
|---|
| 15 | #include "src/dsp/dsp.h" | 
|---|
| 16 |  | 
|---|
| 17 | #if defined(WEBP_USE_MIPS_DSP_R2) | 
|---|
| 18 |  | 
|---|
| 19 | #include "src/dsp/lossless.h" | 
|---|
| 20 |  | 
|---|
| 21 | static void SubtractGreenFromBlueAndRed_MIPSdspR2(uint32_t* argb_data, | 
|---|
| 22 | int num_pixels) { | 
|---|
| 23 | uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; | 
|---|
| 24 | uint32_t* const p_loop1_end = argb_data + (num_pixels & ~3); | 
|---|
| 25 | uint32_t* const p_loop2_end = p_loop1_end + (num_pixels & 3); | 
|---|
| 26 | __asm__ volatile ( | 
|---|
| 27 | ".set       push                                          \n\t" | 
|---|
| 28 | ".set       noreorder                                     \n\t" | 
|---|
| 29 | "beq        %[argb_data],    %[p_loop1_end],     3f       \n\t" | 
|---|
| 30 | " nop                                                     \n\t" | 
|---|
| 31 | "0:                                                         \n\t" | 
|---|
| 32 | "lw         %[temp0],        0(%[argb_data])              \n\t" | 
|---|
| 33 | "lw         %[temp1],        4(%[argb_data])              \n\t" | 
|---|
| 34 | "lw         %[temp2],        8(%[argb_data])              \n\t" | 
|---|
| 35 | "lw         %[temp3],        12(%[argb_data])             \n\t" | 
|---|
| 36 | "ext        %[temp4],        %[temp0],           8,    8  \n\t" | 
|---|
| 37 | "ext        %[temp5],        %[temp1],           8,    8  \n\t" | 
|---|
| 38 | "ext        %[temp6],        %[temp2],           8,    8  \n\t" | 
|---|
| 39 | "ext        %[temp7],        %[temp3],           8,    8  \n\t" | 
|---|
| 40 | "addiu      %[argb_data],    %[argb_data],       16       \n\t" | 
|---|
| 41 | "replv.ph   %[temp4],        %[temp4]                     \n\t" | 
|---|
| 42 | "replv.ph   %[temp5],        %[temp5]                     \n\t" | 
|---|
| 43 | "replv.ph   %[temp6],        %[temp6]                     \n\t" | 
|---|
| 44 | "replv.ph   %[temp7],        %[temp7]                     \n\t" | 
|---|
| 45 | "subu.qb    %[temp0],        %[temp0],           %[temp4] \n\t" | 
|---|
| 46 | "subu.qb    %[temp1],        %[temp1],           %[temp5] \n\t" | 
|---|
| 47 | "subu.qb    %[temp2],        %[temp2],           %[temp6] \n\t" | 
|---|
| 48 | "subu.qb    %[temp3],        %[temp3],           %[temp7] \n\t" | 
|---|
| 49 | "sw         %[temp0],        -16(%[argb_data])            \n\t" | 
|---|
| 50 | "sw         %[temp1],        -12(%[argb_data])            \n\t" | 
|---|
| 51 | "sw         %[temp2],        -8(%[argb_data])             \n\t" | 
|---|
| 52 | "bne        %[argb_data],    %[p_loop1_end],     0b       \n\t" | 
|---|
| 53 | " sw        %[temp3],        -4(%[argb_data])             \n\t" | 
|---|
| 54 | "3:                                                         \n\t" | 
|---|
| 55 | "beq        %[argb_data],    %[p_loop2_end],     2f       \n\t" | 
|---|
| 56 | " nop                                                     \n\t" | 
|---|
| 57 | "1:                                                         \n\t" | 
|---|
| 58 | "lw         %[temp0],        0(%[argb_data])              \n\t" | 
|---|
| 59 | "addiu      %[argb_data],    %[argb_data],       4        \n\t" | 
|---|
| 60 | "ext        %[temp4],        %[temp0],           8,    8  \n\t" | 
|---|
| 61 | "replv.ph   %[temp4],        %[temp4]                     \n\t" | 
|---|
| 62 | "subu.qb    %[temp0],        %[temp0],           %[temp4] \n\t" | 
|---|
| 63 | "bne        %[argb_data],    %[p_loop2_end],     1b       \n\t" | 
|---|
| 64 | " sw        %[temp0],        -4(%[argb_data])             \n\t" | 
|---|
| 65 | "2:                                                         \n\t" | 
|---|
| 66 | ".set       pop                                           \n\t" | 
|---|
| 67 | : [argb_data] "+&r"(argb_data), [temp0] "=&r"(temp0), | 
|---|
| 68 | [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), | 
|---|
| 69 | [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), [temp6] "=&r"(temp6), | 
|---|
| 70 | [temp7] "=&r"(temp7) | 
|---|
| 71 | : [p_loop1_end] "r"(p_loop1_end), [p_loop2_end] "r"(p_loop2_end) | 
|---|
| 72 | : "memory" | 
|---|
| 73 | ); | 
|---|
| 74 | } | 
|---|
| 75 |  | 
|---|
| 76 | static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred, | 
|---|
| 77 | int8_t color) { | 
|---|
| 78 | return (uint32_t)((int)(color_pred) * color) >> 5; | 
|---|
| 79 | } | 
|---|
| 80 |  | 
|---|
| 81 | static void TransformColor_MIPSdspR2(const VP8LMultipliers* const m, | 
|---|
| 82 | uint32_t* data, int num_pixels) { | 
|---|
| 83 | int temp0, temp1, temp2, temp3, temp4, temp5; | 
|---|
| 84 | uint32_t argb, argb1, new_red, new_red1; | 
|---|
| 85 | const uint32_t G_to_R = m->green_to_red_; | 
|---|
| 86 | const uint32_t G_to_B = m->green_to_blue_; | 
|---|
| 87 | const uint32_t R_to_B = m->red_to_blue_; | 
|---|
| 88 | uint32_t* const p_loop_end = data + (num_pixels & ~1); | 
|---|
| 89 | __asm__ volatile ( | 
|---|
| 90 | ".set            push                                    \n\t" | 
|---|
| 91 | ".set            noreorder                               \n\t" | 
|---|
| 92 | "beq             %[data],      %[p_loop_end],  1f        \n\t" | 
|---|
| 93 | " nop                                                    \n\t" | 
|---|
| 94 | "replv.ph        %[temp0],     %[G_to_R]                 \n\t" | 
|---|
| 95 | "replv.ph        %[temp1],     %[G_to_B]                 \n\t" | 
|---|
| 96 | "replv.ph        %[temp2],     %[R_to_B]                 \n\t" | 
|---|
| 97 | "shll.ph         %[temp0],     %[temp0],       8         \n\t" | 
|---|
| 98 | "shll.ph         %[temp1],     %[temp1],       8         \n\t" | 
|---|
| 99 | "shll.ph         %[temp2],     %[temp2],       8         \n\t" | 
|---|
| 100 | "shra.ph         %[temp0],     %[temp0],       8         \n\t" | 
|---|
| 101 | "shra.ph         %[temp1],     %[temp1],       8         \n\t" | 
|---|
| 102 | "shra.ph         %[temp2],     %[temp2],       8         \n\t" | 
|---|
| 103 | "0:                                                        \n\t" | 
|---|
| 104 | "lw              %[argb],      0(%[data])                \n\t" | 
|---|
| 105 | "lw              %[argb1],     4(%[data])                \n\t" | 
|---|
| 106 | "lhu             %[new_red],   2(%[data])                \n\t" | 
|---|
| 107 | "lhu             %[new_red1],  6(%[data])                \n\t" | 
|---|
| 108 | "precrq.qb.ph    %[temp3],     %[argb],        %[argb1]  \n\t" | 
|---|
| 109 | "precr.qb.ph     %[temp4],     %[argb],        %[argb1]  \n\t" | 
|---|
| 110 | "preceu.ph.qbra  %[temp3],     %[temp3]                  \n\t" | 
|---|
| 111 | "preceu.ph.qbla  %[temp4],     %[temp4]                  \n\t" | 
|---|
| 112 | "shll.ph         %[temp3],     %[temp3],       8         \n\t" | 
|---|
| 113 | "shll.ph         %[temp4],     %[temp4],       8         \n\t" | 
|---|
| 114 | "shra.ph         %[temp3],     %[temp3],       8         \n\t" | 
|---|
| 115 | "shra.ph         %[temp4],     %[temp4],       8         \n\t" | 
|---|
| 116 | "mul.ph          %[temp5],     %[temp3],       %[temp0]  \n\t" | 
|---|
| 117 | "mul.ph          %[temp3],     %[temp3],       %[temp1]  \n\t" | 
|---|
| 118 | "mul.ph          %[temp4],     %[temp4],       %[temp2]  \n\t" | 
|---|
| 119 | "addiu           %[data],      %[data],        8         \n\t" | 
|---|
| 120 | "ins             %[new_red1],  %[new_red],     16,   16  \n\t" | 
|---|
| 121 | "ins             %[argb1],     %[argb],        16,   16  \n\t" | 
|---|
| 122 | "shra.ph         %[temp5],     %[temp5],       5         \n\t" | 
|---|
| 123 | "shra.ph         %[temp3],     %[temp3],       5         \n\t" | 
|---|
| 124 | "shra.ph         %[temp4],     %[temp4],       5         \n\t" | 
|---|
| 125 | "subu.ph         %[new_red1],  %[new_red1],    %[temp5]  \n\t" | 
|---|
| 126 | "subu.ph         %[argb1],     %[argb1],       %[temp3]  \n\t" | 
|---|
| 127 | "preceu.ph.qbra  %[temp5],     %[new_red1]               \n\t" | 
|---|
| 128 | "subu.ph         %[argb1],     %[argb1],       %[temp4]  \n\t" | 
|---|
| 129 | "preceu.ph.qbra  %[temp3],     %[argb1]                  \n\t" | 
|---|
| 130 | "sb              %[temp5],     -2(%[data])               \n\t" | 
|---|
| 131 | "sb              %[temp3],     -4(%[data])               \n\t" | 
|---|
| 132 | "sra             %[temp5],     %[temp5],       16        \n\t" | 
|---|
| 133 | "sra             %[temp3],     %[temp3],       16        \n\t" | 
|---|
| 134 | "sb              %[temp5],     -6(%[data])               \n\t" | 
|---|
| 135 | "bne             %[data],      %[p_loop_end],  0b        \n\t" | 
|---|
| 136 | " sb             %[temp3],     -8(%[data])               \n\t" | 
|---|
| 137 | "1:                                                        \n\t" | 
|---|
| 138 | ".set            pop                                     \n\t" | 
|---|
| 139 | : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), | 
|---|
| 140 | [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), [temp5] "=&r"(temp5), | 
|---|
| 141 | [new_red1] "=&r"(new_red1), [new_red] "=&r"(new_red), | 
|---|
| 142 | [argb] "=&r"(argb), [argb1] "=&r"(argb1), [data] "+&r"(data) | 
|---|
| 143 | : [G_to_R] "r"(G_to_R), [R_to_B] "r"(R_to_B), | 
|---|
| 144 | [G_to_B] "r"(G_to_B), [p_loop_end] "r"(p_loop_end) | 
|---|
| 145 | : "memory", "hi", "lo" | 
|---|
| 146 | ); | 
|---|
| 147 |  | 
|---|
| 148 | if (num_pixels & 1) { | 
|---|
| 149 | const uint32_t argb_ = data[0]; | 
|---|
| 150 | const uint32_t green = argb_ >> 8; | 
|---|
| 151 | const uint32_t red = argb_ >> 16; | 
|---|
| 152 | uint32_t new_blue = argb_; | 
|---|
| 153 | new_red = red; | 
|---|
| 154 | new_red -= ColorTransformDelta(m->green_to_red_, green); | 
|---|
| 155 | new_red &= 0xff; | 
|---|
| 156 | new_blue -= ColorTransformDelta(m->green_to_blue_, green); | 
|---|
| 157 | new_blue -= ColorTransformDelta(m->red_to_blue_, red); | 
|---|
| 158 | new_blue &= 0xff; | 
|---|
| 159 | data[0] = (argb_ & 0xff00ff00u) | (new_red << 16) | (new_blue); | 
|---|
| 160 | } | 
|---|
| 161 | } | 
|---|
| 162 |  | 
|---|
| 163 | static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue, | 
|---|
| 164 | uint8_t red_to_blue, | 
|---|
| 165 | uint32_t argb) { | 
|---|
| 166 | const uint32_t green = argb >> 8; | 
|---|
| 167 | const uint32_t red = argb >> 16; | 
|---|
| 168 | uint8_t new_blue = argb; | 
|---|
| 169 | new_blue -= ColorTransformDelta(green_to_blue, green); | 
|---|
| 170 | new_blue -= ColorTransformDelta(red_to_blue, red); | 
|---|
| 171 | return (new_blue & 0xff); | 
|---|
| 172 | } | 
|---|
| 173 |  | 
|---|
| 174 | static void CollectColorBlueTransforms_MIPSdspR2(const uint32_t* argb, | 
|---|
| 175 | int stride, | 
|---|
| 176 | int tile_width, | 
|---|
| 177 | int tile_height, | 
|---|
| 178 | int green_to_blue, | 
|---|
| 179 | int red_to_blue, | 
|---|
| 180 | int histo[]) { | 
|---|
| 181 | const int rtb = (red_to_blue << 16) | (red_to_blue & 0xffff); | 
|---|
| 182 | const int gtb = (green_to_blue << 16) | (green_to_blue & 0xffff); | 
|---|
| 183 | const uint32_t mask = 0xff00ffu; | 
|---|
| 184 | while (tile_height-- > 0) { | 
|---|
| 185 | int x; | 
|---|
| 186 | const uint32_t* p_argb = argb; | 
|---|
| 187 | argb += stride; | 
|---|
| 188 | for (x = 0; x < (tile_width >> 1); ++x) { | 
|---|
| 189 | int temp0, temp1, temp2, temp3, temp4, temp5, temp6; | 
|---|
| 190 | __asm__ volatile ( | 
|---|
| 191 | "lw           %[temp0],  0(%[p_argb])             \n\t" | 
|---|
| 192 | "lw           %[temp1],  4(%[p_argb])             \n\t" | 
|---|
| 193 | "precr.qb.ph  %[temp2],  %[temp0],  %[temp1]      \n\t" | 
|---|
| 194 | "ins          %[temp1],  %[temp0],  16,    16     \n\t" | 
|---|
| 195 | "shra.ph      %[temp2],  %[temp2],  8             \n\t" | 
|---|
| 196 | "shra.ph      %[temp3],  %[temp1],  8             \n\t" | 
|---|
| 197 | "mul.ph       %[temp5],  %[temp2],  %[rtb]        \n\t" | 
|---|
| 198 | "mul.ph       %[temp6],  %[temp3],  %[gtb]        \n\t" | 
|---|
| 199 | "and          %[temp4],  %[temp1],  %[mask]       \n\t" | 
|---|
| 200 | "addiu        %[p_argb], %[p_argb], 8             \n\t" | 
|---|
| 201 | "shra.ph      %[temp5],  %[temp5],  5             \n\t" | 
|---|
| 202 | "shra.ph      %[temp6],  %[temp6],  5             \n\t" | 
|---|
| 203 | "subu.qb      %[temp2],  %[temp4],  %[temp5]      \n\t" | 
|---|
| 204 | "subu.qb      %[temp2],  %[temp2],  %[temp6]      \n\t" | 
|---|
| 205 | : [p_argb] "+&r"(p_argb), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), | 
|---|
| 206 | [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4), | 
|---|
| 207 | [temp5] "=&r"(temp5), [temp6] "=&r"(temp6) | 
|---|
| 208 | : [rtb] "r"(rtb), [gtb] "r"(gtb), [mask] "r"(mask) | 
|---|
| 209 | : "memory", "hi", "lo" | 
|---|
| 210 | ); | 
|---|
| 211 | ++histo[(uint8_t)(temp2 >> 16)]; | 
|---|
| 212 | ++histo[(uint8_t)temp2]; | 
|---|
| 213 | } | 
|---|
| 214 | if (tile_width & 1) { | 
|---|
| 215 | ++histo[TransformColorBlue(green_to_blue, red_to_blue, *p_argb)]; | 
|---|
| 216 | } | 
|---|
| 217 | } | 
|---|
| 218 | } | 
|---|
| 219 |  | 
|---|
| 220 | static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red, | 
|---|
| 221 | uint32_t argb) { | 
|---|
| 222 | const uint32_t green = argb >> 8; | 
|---|
| 223 | uint32_t new_red = argb >> 16; | 
|---|
| 224 | new_red -= ColorTransformDelta(green_to_red, green); | 
|---|
| 225 | return (new_red & 0xff); | 
|---|
| 226 | } | 
|---|
| 227 |  | 
|---|
| 228 | static void CollectColorRedTransforms_MIPSdspR2(const uint32_t* argb, | 
|---|
| 229 | int stride, | 
|---|
| 230 | int tile_width, | 
|---|
| 231 | int tile_height, | 
|---|
| 232 | int green_to_red, | 
|---|
| 233 | int histo[]) { | 
|---|
| 234 | const int gtr = (green_to_red << 16) | (green_to_red & 0xffff); | 
|---|
| 235 | while (tile_height-- > 0) { | 
|---|
| 236 | int x; | 
|---|
| 237 | const uint32_t* p_argb = argb; | 
|---|
| 238 | argb += stride; | 
|---|
| 239 | for (x = 0; x < (tile_width >> 1); ++x) { | 
|---|
| 240 | int temp0, temp1, temp2, temp3, temp4; | 
|---|
| 241 | __asm__ volatile ( | 
|---|
| 242 | "lw           %[temp0],  0(%[p_argb])             \n\t" | 
|---|
| 243 | "lw           %[temp1],  4(%[p_argb])             \n\t" | 
|---|
| 244 | "precrq.ph.w  %[temp4],  %[temp0],  %[temp1]      \n\t" | 
|---|
| 245 | "ins          %[temp1],  %[temp0],  16,    16     \n\t" | 
|---|
| 246 | "shra.ph      %[temp3],  %[temp1],  8             \n\t" | 
|---|
| 247 | "mul.ph       %[temp2],  %[temp3],  %[gtr]        \n\t" | 
|---|
| 248 | "addiu        %[p_argb], %[p_argb], 8             \n\t" | 
|---|
| 249 | "shra.ph      %[temp2],  %[temp2],  5             \n\t" | 
|---|
| 250 | "subu.qb      %[temp2],  %[temp4],  %[temp2]      \n\t" | 
|---|
| 251 | : [p_argb] "+&r"(p_argb), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), | 
|---|
| 252 | [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [temp4] "=&r"(temp4) | 
|---|
| 253 | : [gtr] "r"(gtr) | 
|---|
| 254 | : "memory", "hi", "lo" | 
|---|
| 255 | ); | 
|---|
| 256 | ++histo[(uint8_t)(temp2 >> 16)]; | 
|---|
| 257 | ++histo[(uint8_t)temp2]; | 
|---|
| 258 | } | 
|---|
| 259 | if (tile_width & 1) { | 
|---|
| 260 | ++histo[TransformColorRed(green_to_red, *p_argb)]; | 
|---|
| 261 | } | 
|---|
| 262 | } | 
|---|
| 263 | } | 
|---|
| 264 |  | 
|---|
| 265 | //------------------------------------------------------------------------------ | 
|---|
| 266 | // Entry point | 
|---|
| 267 |  | 
|---|
| 268 | extern void VP8LEncDspInitMIPSdspR2(void); | 
|---|
| 269 |  | 
|---|
| 270 | WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPSdspR2(void) { | 
|---|
| 271 | VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_MIPSdspR2; | 
|---|
| 272 | VP8LTransformColor = TransformColor_MIPSdspR2; | 
|---|
| 273 | VP8LCollectColorBlueTransforms = CollectColorBlueTransforms_MIPSdspR2; | 
|---|
| 274 | VP8LCollectColorRedTransforms = CollectColorRedTransforms_MIPSdspR2; | 
|---|
| 275 | } | 
|---|
| 276 |  | 
|---|
| 277 | #else  // !WEBP_USE_MIPS_DSP_R2 | 
|---|
| 278 |  | 
|---|
| 279 | WEBP_DSP_INIT_STUB(VP8LEncDspInitMIPSdspR2) | 
|---|
| 280 |  | 
|---|
| 281 | #endif  // WEBP_USE_MIPS_DSP_R2 | 
|---|
| 282 |  | 
|---|