| 1 | // Copyright 2014 Google Inc. All Rights Reserved. | 
|---|
| 2 | // | 
|---|
| 3 | // Use of this source code is governed by a BSD-style license | 
|---|
| 4 | // that can be found in the COPYING file in the root of the source | 
|---|
| 5 | // tree. An additional intellectual property rights grant can be found | 
|---|
| 6 | // in the file PATENTS. All contributing project authors may | 
|---|
| 7 | // be found in the AUTHORS file in the root of the source tree. | 
|---|
| 8 | // ----------------------------------------------------------------------------- | 
|---|
| 9 | // | 
|---|
| 10 | // Utilities for processing transparent channel. | 
|---|
| 11 | // | 
|---|
| 12 | // Author(s): Branimir Vasic (branimir.vasic@imgtec.com) | 
|---|
| 13 | //            Djordje Pesut  (djordje.pesut@imgtec.com) | 
|---|
| 14 |  | 
|---|
| 15 | #include "src/dsp/dsp.h" | 
|---|
| 16 |  | 
|---|
| 17 | #if defined(WEBP_USE_MIPS_DSP_R2) | 
|---|
| 18 |  | 
|---|
| 19 | static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride, | 
|---|
| 20 | int width, int height, | 
|---|
| 21 | uint8_t* dst, int dst_stride) { | 
|---|
| 22 | uint32_t alpha_mask = 0xffffffff; | 
|---|
| 23 | int i, j, temp0; | 
|---|
| 24 |  | 
|---|
| 25 | for (j = 0; j < height; ++j) { | 
|---|
| 26 | uint8_t* pdst = dst; | 
|---|
| 27 | const uint8_t* palpha = alpha; | 
|---|
| 28 | for (i = 0; i < (width >> 2); ++i) { | 
|---|
| 29 | int temp1, temp2, temp3; | 
|---|
| 30 |  | 
|---|
| 31 | __asm__ volatile ( | 
|---|
| 32 | "ulw    %[temp0],      0(%[palpha])                \n\t" | 
|---|
| 33 | "addiu  %[palpha],     %[palpha],     4            \n\t" | 
|---|
| 34 | "addiu  %[pdst],       %[pdst],       16           \n\t" | 
|---|
| 35 | "srl    %[temp1],      %[temp0],      8            \n\t" | 
|---|
| 36 | "srl    %[temp2],      %[temp0],      16           \n\t" | 
|---|
| 37 | "srl    %[temp3],      %[temp0],      24           \n\t" | 
|---|
| 38 | "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t" | 
|---|
| 39 | "sb     %[temp0],      -16(%[pdst])                \n\t" | 
|---|
| 40 | "sb     %[temp1],      -12(%[pdst])                \n\t" | 
|---|
| 41 | "sb     %[temp2],      -8(%[pdst])                 \n\t" | 
|---|
| 42 | "sb     %[temp3],      -4(%[pdst])                 \n\t" | 
|---|
| 43 | : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), | 
|---|
| 44 | [temp3] "=&r"(temp3), [palpha] "+r"(palpha), [pdst] "+r"(pdst), | 
|---|
| 45 | [alpha_mask] "+r"(alpha_mask) | 
|---|
| 46 | : | 
|---|
| 47 | : "memory" | 
|---|
| 48 | ); | 
|---|
| 49 | } | 
|---|
| 50 |  | 
|---|
| 51 | for (i = 0; i < (width & 3); ++i) { | 
|---|
| 52 | __asm__ volatile ( | 
|---|
| 53 | "lbu    %[temp0],      0(%[palpha])                \n\t" | 
|---|
| 54 | "addiu  %[palpha],     %[palpha],     1            \n\t" | 
|---|
| 55 | "sb     %[temp0],      0(%[pdst])                  \n\t" | 
|---|
| 56 | "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t" | 
|---|
| 57 | "addiu  %[pdst],       %[pdst],       4            \n\t" | 
|---|
| 58 | : [temp0] "=&r"(temp0), [palpha] "+r"(palpha), [pdst] "+r"(pdst), | 
|---|
| 59 | [alpha_mask] "+r"(alpha_mask) | 
|---|
| 60 | : | 
|---|
| 61 | : "memory" | 
|---|
| 62 | ); | 
|---|
| 63 | } | 
|---|
| 64 | alpha += alpha_stride; | 
|---|
| 65 | dst += dst_stride; | 
|---|
| 66 | } | 
|---|
| 67 |  | 
|---|
| 68 | __asm__ volatile ( | 
|---|
| 69 | "ext    %[temp0],      %[alpha_mask], 0, 16            \n\t" | 
|---|
| 70 | "srl    %[alpha_mask], %[alpha_mask], 16               \n\t" | 
|---|
| 71 | "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t" | 
|---|
| 72 | "ext    %[temp0],      %[alpha_mask], 0, 8             \n\t" | 
|---|
| 73 | "srl    %[alpha_mask], %[alpha_mask], 8                \n\t" | 
|---|
| 74 | "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t" | 
|---|
| 75 | : [temp0] "=&r"(temp0), [alpha_mask] "+r"(alpha_mask) | 
|---|
| 76 | : | 
|---|
| 77 | ); | 
|---|
| 78 |  | 
|---|
| 79 | return (alpha_mask != 0xff); | 
|---|
| 80 | } | 
|---|
| 81 |  | 
|---|
| 82 | static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width, | 
|---|
| 83 | int inverse) { | 
|---|
| 84 | int x; | 
|---|
| 85 | const uint32_t c_00ffffff = 0x00ffffffu; | 
|---|
| 86 | const uint32_t c_ff000000 = 0xff000000u; | 
|---|
| 87 | const uint32_t c_8000000  = 0x00800000u; | 
|---|
| 88 | const uint32_t c_8000080  = 0x00800080u; | 
|---|
| 89 | for (x = 0; x < width; ++x) { | 
|---|
| 90 | const uint32_t argb = ptr[x]; | 
|---|
| 91 | if (argb < 0xff000000u) {      // alpha < 255 | 
|---|
| 92 | if (argb <= 0x00ffffffu) {   // alpha == 0 | 
|---|
| 93 | ptr[x] = 0; | 
|---|
| 94 | } else { | 
|---|
| 95 | int temp0, temp1, temp2, temp3, alpha; | 
|---|
| 96 | __asm__ volatile ( | 
|---|
| 97 | "srl          %[alpha],   %[argb],       24                \n\t" | 
|---|
| 98 | "replv.qb     %[temp0],   %[alpha]                         \n\t" | 
|---|
| 99 | "and          %[temp0],   %[temp0],      %[c_00ffffff]     \n\t" | 
|---|
| 100 | "beqz         %[inverse], 0f                               \n\t" | 
|---|
| 101 | "divu         $zero,      %[c_ff000000], %[alpha]          \n\t" | 
|---|
| 102 | "mflo         %[temp0]                                     \n\t" | 
|---|
| 103 | "0:                                                          \n\t" | 
|---|
| 104 | "andi         %[temp1],   %[argb],       0xff              \n\t" | 
|---|
| 105 | "ext          %[temp2],   %[argb],       8,             8  \n\t" | 
|---|
| 106 | "ext          %[temp3],   %[argb],       16,            8  \n\t" | 
|---|
| 107 | "mul          %[temp1],   %[temp1],      %[temp0]          \n\t" | 
|---|
| 108 | "mul          %[temp2],   %[temp2],      %[temp0]          \n\t" | 
|---|
| 109 | "mul          %[temp3],   %[temp3],      %[temp0]          \n\t" | 
|---|
| 110 | "precrq.ph.w  %[temp1],   %[temp2],      %[temp1]          \n\t" | 
|---|
| 111 | "addu         %[temp3],   %[temp3],      %[c_8000000]      \n\t" | 
|---|
| 112 | "addu         %[temp1],   %[temp1],      %[c_8000080]      \n\t" | 
|---|
| 113 | "precrq.ph.w  %[temp3],   %[argb],       %[temp3]          \n\t" | 
|---|
| 114 | "precrq.qb.ph %[temp1],   %[temp3],      %[temp1]          \n\t" | 
|---|
| 115 | : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), | 
|---|
| 116 | [temp3] "=&r"(temp3), [alpha] "=&r"(alpha) | 
|---|
| 117 | : [inverse] "r"(inverse), [c_00ffffff] "r"(c_00ffffff), | 
|---|
| 118 | [c_8000000] "r"(c_8000000), [c_8000080] "r"(c_8000080), | 
|---|
| 119 | [c_ff000000] "r"(c_ff000000), [argb] "r"(argb) | 
|---|
| 120 | : "memory", "hi", "lo" | 
|---|
| 121 | ); | 
|---|
| 122 | ptr[x] = temp1; | 
|---|
| 123 | } | 
|---|
| 124 | } | 
|---|
| 125 | } | 
|---|
| 126 | } | 
|---|
| 127 |  | 
|---|
| 128 | #ifdef WORDS_BIGENDIAN | 
|---|
| 129 | static void PackARGB_MIPSdspR2(const uint8_t* a, const uint8_t* r, | 
|---|
| 130 | const uint8_t* g, const uint8_t* b, int len, | 
|---|
| 131 | uint32_t* out) { | 
|---|
| 132 | int temp0, temp1, temp2, temp3, offset; | 
|---|
| 133 | const int rest = len & 1; | 
|---|
| 134 | const uint32_t* const loop_end = out + len - rest; | 
|---|
| 135 | const int step = 4; | 
|---|
| 136 | __asm__ volatile ( | 
|---|
| 137 | "xor          %[offset],   %[offset], %[offset]    \n\t" | 
|---|
| 138 | "beq          %[loop_end], %[out],    0f           \n\t" | 
|---|
| 139 | "2:                                                  \n\t" | 
|---|
| 140 | "lbux         %[temp0],    %[offset](%[a])         \n\t" | 
|---|
| 141 | "lbux         %[temp1],    %[offset](%[r])         \n\t" | 
|---|
| 142 | "lbux         %[temp2],    %[offset](%[g])         \n\t" | 
|---|
| 143 | "lbux         %[temp3],    %[offset](%[b])         \n\t" | 
|---|
| 144 | "ins          %[temp1],    %[temp0],  16,     16   \n\t" | 
|---|
| 145 | "ins          %[temp3],    %[temp2],  16,     16   \n\t" | 
|---|
| 146 | "addiu        %[out],      %[out],    4            \n\t" | 
|---|
| 147 | "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t" | 
|---|
| 148 | "sw           %[temp0],    -4(%[out])              \n\t" | 
|---|
| 149 | "addu         %[offset],   %[offset], %[step]      \n\t" | 
|---|
| 150 | "bne          %[loop_end], %[out],    2b           \n\t" | 
|---|
| 151 | "0:                                                  \n\t" | 
|---|
| 152 | "beq          %[rest],     $zero,     1f           \n\t" | 
|---|
| 153 | "lbux         %[temp0],    %[offset](%[a])         \n\t" | 
|---|
| 154 | "lbux         %[temp1],    %[offset](%[r])         \n\t" | 
|---|
| 155 | "lbux         %[temp2],    %[offset](%[g])         \n\t" | 
|---|
| 156 | "lbux         %[temp3],    %[offset](%[b])         \n\t" | 
|---|
| 157 | "ins          %[temp1],    %[temp0],  16,     16   \n\t" | 
|---|
| 158 | "ins          %[temp3],    %[temp2],  16,     16   \n\t" | 
|---|
| 159 | "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t" | 
|---|
| 160 | "sw           %[temp0],    0(%[out])               \n\t" | 
|---|
| 161 | "1:                                                  \n\t" | 
|---|
| 162 | : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), | 
|---|
| 163 | [temp3] "=&r"(temp3), [offset] "=&r"(offset), [out] "+&r"(out) | 
|---|
| 164 | : [a] "r"(a), [r] "r"(r), [g] "r"(g), [b] "r"(b), [step] "r"(step), | 
|---|
| 165 | [loop_end] "r"(loop_end), [rest] "r"(rest) | 
|---|
| 166 | : "memory" | 
|---|
| 167 | ); | 
|---|
| 168 | } | 
|---|
| 169 | #endif  // WORDS_BIGENDIAN | 
|---|
| 170 |  | 
|---|
| 171 | static void PackRGB_MIPSdspR2(const uint8_t* r, const uint8_t* g, | 
|---|
| 172 | const uint8_t* b, int len, int step, | 
|---|
| 173 | uint32_t* out) { | 
|---|
| 174 | int temp0, temp1, temp2, offset; | 
|---|
| 175 | const int rest = len & 1; | 
|---|
| 176 | const int a = 0xff; | 
|---|
| 177 | const uint32_t* const loop_end = out + len - rest; | 
|---|
| 178 | __asm__ volatile ( | 
|---|
| 179 | "xor          %[offset],   %[offset], %[offset]    \n\t" | 
|---|
| 180 | "beq          %[loop_end], %[out],    0f           \n\t" | 
|---|
| 181 | "2:                                                  \n\t" | 
|---|
| 182 | "lbux         %[temp0],    %[offset](%[r])         \n\t" | 
|---|
| 183 | "lbux         %[temp1],    %[offset](%[g])         \n\t" | 
|---|
| 184 | "lbux         %[temp2],    %[offset](%[b])         \n\t" | 
|---|
| 185 | "ins          %[temp0],    %[a],      16,     16   \n\t" | 
|---|
| 186 | "ins          %[temp2],    %[temp1],  16,     16   \n\t" | 
|---|
| 187 | "addiu        %[out],      %[out],    4            \n\t" | 
|---|
| 188 | "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t" | 
|---|
| 189 | "sw           %[temp0],    -4(%[out])              \n\t" | 
|---|
| 190 | "addu         %[offset],   %[offset], %[step]      \n\t" | 
|---|
| 191 | "bne          %[loop_end], %[out],    2b           \n\t" | 
|---|
| 192 | "0:                                                  \n\t" | 
|---|
| 193 | "beq          %[rest],     $zero,     1f           \n\t" | 
|---|
| 194 | "lbux         %[temp0],    %[offset](%[r])         \n\t" | 
|---|
| 195 | "lbux         %[temp1],    %[offset](%[g])         \n\t" | 
|---|
| 196 | "lbux         %[temp2],    %[offset](%[b])         \n\t" | 
|---|
| 197 | "ins          %[temp0],    %[a],      16,     16   \n\t" | 
|---|
| 198 | "ins          %[temp2],    %[temp1],  16,     16   \n\t" | 
|---|
| 199 | "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t" | 
|---|
| 200 | "sw           %[temp0],    0(%[out])               \n\t" | 
|---|
| 201 | "1:                                                  \n\t" | 
|---|
| 202 | : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), | 
|---|
| 203 | [offset] "=&r"(offset), [out] "+&r"(out) | 
|---|
| 204 | : [a] "r"(a), [r] "r"(r), [g] "r"(g), [b] "r"(b), [step] "r"(step), | 
|---|
| 205 | [loop_end] "r"(loop_end), [rest] "r"(rest) | 
|---|
| 206 | : "memory" | 
|---|
| 207 | ); | 
|---|
| 208 | } | 
|---|
| 209 |  | 
|---|
| 210 | //------------------------------------------------------------------------------ | 
|---|
| 211 | // Entry point | 
|---|
| 212 |  | 
|---|
| 213 | extern void WebPInitAlphaProcessingMIPSdspR2(void); | 
|---|
| 214 |  | 
|---|
| 215 | WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingMIPSdspR2(void) { | 
|---|
| 216 | WebPDispatchAlpha = DispatchAlpha_MIPSdspR2; | 
|---|
| 217 | WebPMultARGBRow = MultARGBRow_MIPSdspR2; | 
|---|
| 218 | #ifdef WORDS_BIGENDIAN | 
|---|
| 219 | WebPPackARGB = PackARGB_MIPSdspR2; | 
|---|
| 220 | #endif | 
|---|
| 221 | WebPPackRGB = PackRGB_MIPSdspR2; | 
|---|
| 222 | } | 
|---|
| 223 |  | 
|---|
| 224 | #else  // !WEBP_USE_MIPS_DSP_R2 | 
|---|
| 225 |  | 
|---|
| 226 | WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingMIPSdspR2) | 
|---|
| 227 |  | 
|---|
| 228 | #endif  // WEBP_USE_MIPS_DSP_R2 | 
|---|
| 229 |  | 
|---|