| 1 | // Copyright 2015 Google Inc. All Rights Reserved. | 
|---|
| 2 | // | 
|---|
| 3 | // Use of this source code is governed by a BSD-style license | 
|---|
| 4 | // that can be found in the COPYING file in the root of the source | 
|---|
| 5 | // tree. An additional intellectual property rights grant can be found | 
|---|
| 6 | // in the file PATENTS. All contributing project authors may | 
|---|
| 7 | // be found in the AUTHORS file in the root of the source tree. | 
|---|
| 8 | // ----------------------------------------------------------------------------- | 
|---|
| 9 | // | 
|---|
| 10 | // MIPS version of lossless functions | 
|---|
| 11 | // | 
|---|
| 12 | // Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com) | 
|---|
| 13 | //             Jovan Zelincevic (jovan.zelincevic@imgtec.com) | 
|---|
| 14 |  | 
|---|
| 15 | #include "./dsp.h" | 
|---|
| 16 | #include "./lossless.h" | 
|---|
| 17 | #include "./lossless_common.h" | 
|---|
| 18 |  | 
|---|
| 19 | #if defined(WEBP_USE_MIPS32) | 
|---|
| 20 |  | 
|---|
| 21 | #include <assert.h> | 
|---|
| 22 | #include <math.h> | 
|---|
| 23 | #include <stdlib.h> | 
|---|
| 24 | #include <string.h> | 
|---|
| 25 |  | 
|---|
| 26 | static float FastSLog2Slow(uint32_t v) { | 
|---|
| 27 | assert(v >= LOG_LOOKUP_IDX_MAX); | 
|---|
| 28 | if (v < APPROX_LOG_WITH_CORRECTION_MAX) { | 
|---|
| 29 | uint32_t log_cnt, y, correction; | 
|---|
| 30 | const int c24 = 24; | 
|---|
| 31 | const float v_f = (float)v; | 
|---|
| 32 | uint32_t temp; | 
|---|
| 33 |  | 
|---|
| 34 | // Xf = 256 = 2^8 | 
|---|
| 35 | // log_cnt is index of leading one in upper 24 bits | 
|---|
| 36 | __asm__ volatile( | 
|---|
| 37 | "clz      %[log_cnt], %[v]                      \n\t" | 
|---|
| 38 | "addiu    %[y],       $zero,        1           \n\t" | 
|---|
| 39 | "subu     %[log_cnt], %[c24],       %[log_cnt]  \n\t" | 
|---|
| 40 | "sllv     %[y],       %[y],         %[log_cnt]  \n\t" | 
|---|
| 41 | "srlv     %[temp],    %[v],         %[log_cnt]  \n\t" | 
|---|
| 42 | : [log_cnt] "=&r"(log_cnt), [y] "=&r"(y), | 
|---|
| 43 | [temp] "=r"(temp) | 
|---|
| 44 | : [c24] "r"(c24), [v] "r"(v) | 
|---|
| 45 | ); | 
|---|
| 46 |  | 
|---|
| 47 | // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256 | 
|---|
| 48 | // Xf = floor(Xf) * (1 + (v % y) / v) | 
|---|
| 49 | // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v) | 
|---|
| 50 | // The correction factor: log(1 + d) ~ d; for very small d values, so | 
|---|
| 51 | // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v | 
|---|
| 52 | // LOG_2_RECIPROCAL ~ 23/16 | 
|---|
| 53 |  | 
|---|
| 54 | // (v % y) = (v % 2^log_cnt) = v & (2^log_cnt - 1) | 
|---|
| 55 | correction = (23 * (v & (y - 1))) >> 4; | 
|---|
| 56 | return v_f * (kLog2Table[temp] + log_cnt) + correction; | 
|---|
| 57 | } else { | 
|---|
| 58 | return (float)(LOG_2_RECIPROCAL * v * log((double)v)); | 
|---|
| 59 | } | 
|---|
| 60 | } | 
|---|
| 61 |  | 
|---|
| 62 | static float FastLog2Slow(uint32_t v) { | 
|---|
| 63 | assert(v >= LOG_LOOKUP_IDX_MAX); | 
|---|
| 64 | if (v < APPROX_LOG_WITH_CORRECTION_MAX) { | 
|---|
| 65 | uint32_t log_cnt, y; | 
|---|
| 66 | const int c24 = 24; | 
|---|
| 67 | double log_2; | 
|---|
| 68 | uint32_t temp; | 
|---|
| 69 |  | 
|---|
| 70 | __asm__ volatile( | 
|---|
| 71 | "clz      %[log_cnt], %[v]                      \n\t" | 
|---|
| 72 | "addiu    %[y],       $zero,        1           \n\t" | 
|---|
| 73 | "subu     %[log_cnt], %[c24],       %[log_cnt]  \n\t" | 
|---|
| 74 | "sllv     %[y],       %[y],         %[log_cnt]  \n\t" | 
|---|
| 75 | "srlv     %[temp],    %[v],         %[log_cnt]  \n\t" | 
|---|
| 76 | : [log_cnt] "=&r"(log_cnt), [y] "=&r"(y), | 
|---|
| 77 | [temp] "=r"(temp) | 
|---|
| 78 | : [c24] "r"(c24), [v] "r"(v) | 
|---|
| 79 | ); | 
|---|
| 80 |  | 
|---|
| 81 | log_2 = kLog2Table[temp] + log_cnt; | 
|---|
| 82 | if (v >= APPROX_LOG_MAX) { | 
|---|
| 83 | // Since the division is still expensive, add this correction factor only | 
|---|
| 84 | // for large values of 'v'. | 
|---|
| 85 |  | 
|---|
| 86 | const uint32_t correction = (23 * (v & (y - 1))) >> 4; | 
|---|
| 87 | log_2 += (double)correction / v; | 
|---|
| 88 | } | 
|---|
| 89 | return (float)log_2; | 
|---|
| 90 | } else { | 
|---|
| 91 | return (float)(LOG_2_RECIPROCAL * log((double)v)); | 
|---|
| 92 | } | 
|---|
| 93 | } | 
|---|
| 94 |  | 
|---|
| 95 | // C version of this function: | 
|---|
| 96 | //   int i = 0; | 
|---|
| 97 | //   int64_t cost = 0; | 
|---|
| 98 | //   const uint32_t* pop = &population[4]; | 
|---|
| 99 | //   const uint32_t* LoopEnd = &population[length]; | 
|---|
| 100 | //   while (pop != LoopEnd) { | 
|---|
| 101 | //     ++i; | 
|---|
| 102 | //     cost += i * *pop; | 
|---|
| 103 | //     cost += i * *(pop + 1); | 
|---|
| 104 | //     pop += 2; | 
|---|
| 105 | //   } | 
|---|
| 106 | //   return (double)cost; | 
|---|
| 107 | static double ExtraCost(const uint32_t* const population, int length) { | 
|---|
| 108 | int i, temp0, temp1; | 
|---|
| 109 | const uint32_t* pop = &population[4]; | 
|---|
| 110 | const uint32_t* const LoopEnd = &population[length]; | 
|---|
| 111 |  | 
|---|
| 112 | __asm__ volatile( | 
|---|
| 113 | "mult   $zero,    $zero                  \n\t" | 
|---|
| 114 | "xor    %[i],     %[i],       %[i]       \n\t" | 
|---|
| 115 | "beq    %[pop],   %[LoopEnd], 2f         \n\t" | 
|---|
| 116 | "1:                                        \n\t" | 
|---|
| 117 | "lw     %[temp0], 0(%[pop])              \n\t" | 
|---|
| 118 | "lw     %[temp1], 4(%[pop])              \n\t" | 
|---|
| 119 | "addiu  %[i],     %[i],       1          \n\t" | 
|---|
| 120 | "addiu  %[pop],   %[pop],     8          \n\t" | 
|---|
| 121 | "madd   %[i],     %[temp0]               \n\t" | 
|---|
| 122 | "madd   %[i],     %[temp1]               \n\t" | 
|---|
| 123 | "bne    %[pop],   %[LoopEnd], 1b         \n\t" | 
|---|
| 124 | "2:                                        \n\t" | 
|---|
| 125 | "mfhi   %[temp0]                         \n\t" | 
|---|
| 126 | "mflo   %[temp1]                         \n\t" | 
|---|
| 127 | : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), | 
|---|
| 128 | [i] "=&r"(i), [pop] "+r"(pop) | 
|---|
| 129 | : [LoopEnd] "r"(LoopEnd) | 
|---|
| 130 | : "memory", "hi", "lo" | 
|---|
| 131 | ); | 
|---|
| 132 |  | 
|---|
| 133 | return (double)((int64_t)temp0 << 32 | temp1); | 
|---|
| 134 | } | 
|---|
| 135 |  | 
|---|
| 136 | // C version of this function: | 
|---|
| 137 | //   int i = 0; | 
|---|
| 138 | //   int64_t cost = 0; | 
|---|
| 139 | //   const uint32_t* pX = &X[4]; | 
|---|
| 140 | //   const uint32_t* pY = &Y[4]; | 
|---|
| 141 | //   const uint32_t* LoopEnd = &X[length]; | 
|---|
| 142 | //   while (pX != LoopEnd) { | 
|---|
| 143 | //     const uint32_t xy0 = *pX + *pY; | 
|---|
| 144 | //     const uint32_t xy1 = *(pX + 1) + *(pY + 1); | 
|---|
| 145 | //     ++i; | 
|---|
| 146 | //     cost += i * xy0; | 
|---|
| 147 | //     cost += i * xy1; | 
|---|
| 148 | //     pX += 2; | 
|---|
| 149 | //     pY += 2; | 
|---|
| 150 | //   } | 
|---|
| 151 | //   return (double)cost; | 
|---|
| 152 | static double ExtraCostCombined(const uint32_t* const X, | 
|---|
| 153 | const uint32_t* const Y, int length) { | 
|---|
| 154 | int i, temp0, temp1, temp2, temp3; | 
|---|
| 155 | const uint32_t* pX = &X[4]; | 
|---|
| 156 | const uint32_t* pY = &Y[4]; | 
|---|
| 157 | const uint32_t* const LoopEnd = &X[length]; | 
|---|
| 158 |  | 
|---|
| 159 | __asm__ volatile( | 
|---|
| 160 | "mult   $zero,    $zero                  \n\t" | 
|---|
| 161 | "xor    %[i],     %[i],       %[i]       \n\t" | 
|---|
| 162 | "beq    %[pX],    %[LoopEnd], 2f         \n\t" | 
|---|
| 163 | "1:                                        \n\t" | 
|---|
| 164 | "lw     %[temp0], 0(%[pX])               \n\t" | 
|---|
| 165 | "lw     %[temp1], 0(%[pY])               \n\t" | 
|---|
| 166 | "lw     %[temp2], 4(%[pX])               \n\t" | 
|---|
| 167 | "lw     %[temp3], 4(%[pY])               \n\t" | 
|---|
| 168 | "addiu  %[i],     %[i],       1          \n\t" | 
|---|
| 169 | "addu   %[temp0], %[temp0],   %[temp1]   \n\t" | 
|---|
| 170 | "addu   %[temp2], %[temp2],   %[temp3]   \n\t" | 
|---|
| 171 | "addiu  %[pX],    %[pX],      8          \n\t" | 
|---|
| 172 | "addiu  %[pY],    %[pY],      8          \n\t" | 
|---|
| 173 | "madd   %[i],     %[temp0]               \n\t" | 
|---|
| 174 | "madd   %[i],     %[temp2]               \n\t" | 
|---|
| 175 | "bne    %[pX],    %[LoopEnd], 1b         \n\t" | 
|---|
| 176 | "2:                                        \n\t" | 
|---|
| 177 | "mfhi   %[temp0]                         \n\t" | 
|---|
| 178 | "mflo   %[temp1]                         \n\t" | 
|---|
| 179 | : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), | 
|---|
| 180 | [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), | 
|---|
| 181 | [i] "=&r"(i), [pX] "+r"(pX), [pY] "+r"(pY) | 
|---|
| 182 | : [LoopEnd] "r"(LoopEnd) | 
|---|
| 183 | : "memory", "hi", "lo" | 
|---|
| 184 | ); | 
|---|
| 185 |  | 
|---|
| 186 | return (double)((int64_t)temp0 << 32 | temp1); | 
|---|
| 187 | } | 
|---|
| 188 |  | 
|---|
| 189 | #define HUFFMAN_COST_PASS                                 \ | 
|---|
| 190 | __asm__ volatile(                                       \ | 
|---|
| 191 | "sll   %[temp1],  %[temp0],    3           \n\t"      \ | 
|---|
| 192 | "addiu %[temp3],  %[streak],   -3          \n\t"      \ | 
|---|
| 193 | "addu  %[temp2],  %[pstreaks], %[temp1]    \n\t"      \ | 
|---|
| 194 | "blez  %[temp3],  1f                       \n\t"      \ | 
|---|
| 195 | "srl   %[temp1],  %[temp1],    1           \n\t"      \ | 
|---|
| 196 | "addu  %[temp3],  %[pcnts],    %[temp1]    \n\t"      \ | 
|---|
| 197 | "lw    %[temp0],  4(%[temp2])              \n\t"      \ | 
|---|
| 198 | "lw    %[temp1],  0(%[temp3])              \n\t"      \ | 
|---|
| 199 | "addu  %[temp0],  %[temp0],    %[streak]   \n\t"      \ | 
|---|
| 200 | "addiu %[temp1],  %[temp1],    1           \n\t"      \ | 
|---|
| 201 | "sw    %[temp0],  4(%[temp2])              \n\t"      \ | 
|---|
| 202 | "sw    %[temp1],  0(%[temp3])              \n\t"      \ | 
|---|
| 203 | "b     2f                                  \n\t"      \ | 
|---|
| 204 | "1:                                          \n\t"      \ | 
|---|
| 205 | "lw    %[temp0],  0(%[temp2])              \n\t"      \ | 
|---|
| 206 | "addu  %[temp0],  %[temp0],    %[streak]   \n\t"      \ | 
|---|
| 207 | "sw    %[temp0],  0(%[temp2])              \n\t"      \ | 
|---|
| 208 | "2:                                          \n\t"      \ | 
|---|
| 209 | : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),           \ | 
|---|
| 210 | [temp3]"=&r"(temp3), [temp0]"+r"(temp0)             \ | 
|---|
| 211 | : [pstreaks]"r"(pstreaks), [pcnts]"r"(pcnts),         \ | 
|---|
| 212 | [streak]"r"(streak)                                 \ | 
|---|
| 213 | : "memory"                                            \ | 
|---|
| 214 | ); | 
|---|
| 215 |  | 
|---|
| 216 | // Returns the various RLE counts | 
|---|
| 217 | static WEBP_INLINE void GetEntropyUnrefinedHelper( | 
|---|
| 218 | uint32_t val, int i, uint32_t* const val_prev, int* const i_prev, | 
|---|
| 219 | VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats) { | 
|---|
| 220 | int* const pstreaks = &stats->streaks[0][0]; | 
|---|
| 221 | int* const pcnts = &stats->counts[0]; | 
|---|
| 222 | int temp0, temp1, temp2, temp3; | 
|---|
| 223 | const int streak = i - *i_prev; | 
|---|
| 224 |  | 
|---|
| 225 | // Gather info for the bit entropy. | 
|---|
| 226 | if (*val_prev != 0) { | 
|---|
| 227 | bit_entropy->sum += (*val_prev) * streak; | 
|---|
| 228 | bit_entropy->nonzeros += streak; | 
|---|
| 229 | bit_entropy->nonzero_code = *i_prev; | 
|---|
| 230 | bit_entropy->entropy -= VP8LFastSLog2(*val_prev) * streak; | 
|---|
| 231 | if (bit_entropy->max_val < *val_prev) { | 
|---|
| 232 | bit_entropy->max_val = *val_prev; | 
|---|
| 233 | } | 
|---|
| 234 | } | 
|---|
| 235 |  | 
|---|
| 236 | // Gather info for the Huffman cost. | 
|---|
| 237 | temp0 = (*val_prev != 0); | 
|---|
| 238 | HUFFMAN_COST_PASS | 
|---|
| 239 |  | 
|---|
| 240 | *val_prev = val; | 
|---|
| 241 | *i_prev = i; | 
|---|
| 242 | } | 
|---|
| 243 |  | 
|---|
| 244 | static void GetEntropyUnrefined(const uint32_t X[], int length, | 
|---|
| 245 | VP8LBitEntropy* const bit_entropy, | 
|---|
| 246 | VP8LStreaks* const stats) { | 
|---|
| 247 | int i; | 
|---|
| 248 | int i_prev = 0; | 
|---|
| 249 | uint32_t x_prev = X[0]; | 
|---|
| 250 |  | 
|---|
| 251 | memset(stats, 0, sizeof(*stats)); | 
|---|
| 252 | VP8LBitEntropyInit(bit_entropy); | 
|---|
| 253 |  | 
|---|
| 254 | for (i = 1; i < length; ++i) { | 
|---|
| 255 | const uint32_t x = X[i]; | 
|---|
| 256 | if (x != x_prev) { | 
|---|
| 257 | GetEntropyUnrefinedHelper(x, i, &x_prev, &i_prev, bit_entropy, stats); | 
|---|
| 258 | } | 
|---|
| 259 | } | 
|---|
| 260 | GetEntropyUnrefinedHelper(0, i, &x_prev, &i_prev, bit_entropy, stats); | 
|---|
| 261 |  | 
|---|
| 262 | bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum); | 
|---|
| 263 | } | 
|---|
| 264 |  | 
|---|
| 265 | static void GetCombinedEntropyUnrefined(const uint32_t X[], const uint32_t Y[], | 
|---|
| 266 | int length, | 
|---|
| 267 | VP8LBitEntropy* const bit_entropy, | 
|---|
| 268 | VP8LStreaks* const stats) { | 
|---|
| 269 | int i = 1; | 
|---|
| 270 | int i_prev = 0; | 
|---|
| 271 | uint32_t xy_prev = X[0] + Y[0]; | 
|---|
| 272 |  | 
|---|
| 273 | memset(stats, 0, sizeof(*stats)); | 
|---|
| 274 | VP8LBitEntropyInit(bit_entropy); | 
|---|
| 275 |  | 
|---|
| 276 | for (i = 1; i < length; ++i) { | 
|---|
| 277 | const uint32_t xy = X[i] + Y[i]; | 
|---|
| 278 | if (xy != xy_prev) { | 
|---|
| 279 | GetEntropyUnrefinedHelper(xy, i, &xy_prev, &i_prev, bit_entropy, stats); | 
|---|
| 280 | } | 
|---|
| 281 | } | 
|---|
| 282 | GetEntropyUnrefinedHelper(0, i, &xy_prev, &i_prev, bit_entropy, stats); | 
|---|
| 283 |  | 
|---|
| 284 | bit_entropy->entropy += VP8LFastSLog2(bit_entropy->sum); | 
|---|
| 285 | } | 
|---|
| 286 |  | 
|---|
| 287 | #define ASM_START                                       \ | 
|---|
| 288 | __asm__ volatile(                                     \ | 
|---|
| 289 | ".set   push                            \n\t"       \ | 
|---|
| 290 | ".set   at                              \n\t"       \ | 
|---|
| 291 | ".set   macro                           \n\t"       \ | 
|---|
| 292 | "1:                                       \n\t" | 
|---|
| 293 |  | 
|---|
| 294 | // P2 = P0 + P1 | 
|---|
| 295 | // A..D - offsets | 
|---|
| 296 | // E - temp variable to tell macro | 
|---|
| 297 | //     if pointer should be incremented | 
|---|
| 298 | // literal_ and successive histograms could be unaligned | 
|---|
| 299 | // so we must use ulw and usw | 
|---|
| 300 | #define ADD_TO_OUT(A, B, C, D, E, P0, P1, P2)           \ | 
|---|
| 301 | "ulw    %[temp0], " #A "(%[" #P0 "])    \n\t"       \ | 
|---|
| 302 | "ulw    %[temp1], " #B "(%[" #P0 "])    \n\t"       \ | 
|---|
| 303 | "ulw    %[temp2], " #C "(%[" #P0 "])    \n\t"       \ | 
|---|
| 304 | "ulw    %[temp3], " #D "(%[" #P0 "])    \n\t"       \ | 
|---|
| 305 | "ulw    %[temp4], " #A "(%[" #P1 "])    \n\t"       \ | 
|---|
| 306 | "ulw    %[temp5], " #B "(%[" #P1 "])    \n\t"       \ | 
|---|
| 307 | "ulw    %[temp6], " #C "(%[" #P1 "])    \n\t"       \ | 
|---|
| 308 | "ulw    %[temp7], " #D "(%[" #P1 "])    \n\t"       \ | 
|---|
| 309 | "addu   %[temp4], %[temp4],   %[temp0]  \n\t"       \ | 
|---|
| 310 | "addu   %[temp5], %[temp5],   %[temp1]  \n\t"       \ | 
|---|
| 311 | "addu   %[temp6], %[temp6],   %[temp2]  \n\t"       \ | 
|---|
| 312 | "addu   %[temp7], %[temp7],   %[temp3]  \n\t"       \ | 
|---|
| 313 | "addiu  %[" #P0 "],  %[" #P0 "],  16    \n\t"       \ | 
|---|
| 314 | ".if " #E " == 1                          \n\t"       \ | 
|---|
| 315 | "addiu  %[" #P1 "],  %[" #P1 "],  16    \n\t"       \ | 
|---|
| 316 | ".endif                                   \n\t"       \ | 
|---|
| 317 | "usw    %[temp4], " #A "(%[" #P2 "])    \n\t"       \ | 
|---|
| 318 | "usw    %[temp5], " #B "(%[" #P2 "])    \n\t"       \ | 
|---|
| 319 | "usw    %[temp6], " #C "(%[" #P2 "])    \n\t"       \ | 
|---|
| 320 | "usw    %[temp7], " #D "(%[" #P2 "])    \n\t"       \ | 
|---|
| 321 | "addiu  %[" #P2 "], %[" #P2 "],   16    \n\t"       \ | 
|---|
| 322 | "bne    %[" #P0 "], %[LoopEnd], 1b      \n\t"       \ | 
|---|
| 323 | ".set   pop                             \n\t"       \ | 
|---|
| 324 |  | 
|---|
| 325 | #define ASM_END_COMMON_0                                \ | 
|---|
| 326 | : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),         \ | 
|---|
| 327 | [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),         \ | 
|---|
| 328 | [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),         \ | 
|---|
| 329 | [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),         \ | 
|---|
| 330 | [pa]"+r"(pa), [pout]"+r"(pout) | 
|---|
| 331 |  | 
|---|
| 332 | #define ASM_END_COMMON_1                                \ | 
|---|
| 333 | : [LoopEnd]"r"(LoopEnd)                             \ | 
|---|
| 334 | : "memory", "at"                                    \ | 
|---|
| 335 | ); | 
|---|
| 336 |  | 
|---|
| 337 | #define ASM_END_0                                       \ | 
|---|
| 338 | ASM_END_COMMON_0                                    \ | 
|---|
| 339 | , [pb]"+r"(pb)                                    \ | 
|---|
| 340 | ASM_END_COMMON_1 | 
|---|
| 341 |  | 
|---|
| 342 | #define ASM_END_1                                       \ | 
|---|
| 343 | ASM_END_COMMON_0                                    \ | 
|---|
| 344 | ASM_END_COMMON_1 | 
|---|
| 345 |  | 
|---|
| 346 | #define ADD_VECTOR(A, B, OUT, SIZE, EXTRA_SIZE)  do {   \ | 
|---|
| 347 | const uint32_t* pa = (const uint32_t*)(A);            \ | 
|---|
| 348 | const uint32_t* pb = (const uint32_t*)(B);            \ | 
|---|
| 349 | uint32_t* pout = (uint32_t*)(OUT);                    \ | 
|---|
| 350 | const uint32_t* const LoopEnd = pa + (SIZE);          \ | 
|---|
| 351 | assert((SIZE) % 4 == 0);                              \ | 
|---|
| 352 | ASM_START                                             \ | 
|---|
| 353 | ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout)              \ | 
|---|
| 354 | ASM_END_0                                             \ | 
|---|
| 355 | if ((EXTRA_SIZE) > 0) {                               \ | 
|---|
| 356 | const int last = (EXTRA_SIZE);                      \ | 
|---|
| 357 | int i;                                              \ | 
|---|
| 358 | for (i = 0; i < last; ++i) pout[i] = pa[i] + pb[i]; \ | 
|---|
| 359 | }                                                     \ | 
|---|
| 360 | } while (0) | 
|---|
| 361 |  | 
|---|
| 362 | #define ADD_VECTOR_EQ(A, OUT, SIZE, EXTRA_SIZE)  do {   \ | 
|---|
| 363 | const uint32_t* pa = (const uint32_t*)(A);            \ | 
|---|
| 364 | uint32_t* pout = (uint32_t*)(OUT);                    \ | 
|---|
| 365 | const uint32_t* const LoopEnd = pa + (SIZE);          \ | 
|---|
| 366 | assert((SIZE) % 4 == 0);                              \ | 
|---|
| 367 | ASM_START                                             \ | 
|---|
| 368 | ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout)            \ | 
|---|
| 369 | ASM_END_1                                             \ | 
|---|
| 370 | if ((EXTRA_SIZE) > 0) {                               \ | 
|---|
| 371 | const int last = (EXTRA_SIZE);                      \ | 
|---|
| 372 | int i;                                              \ | 
|---|
| 373 | for (i = 0; i < last; ++i) pout[i] += pa[i];        \ | 
|---|
| 374 | }                                                     \ | 
|---|
| 375 | } while (0) | 
|---|
| 376 |  | 
|---|
| 377 | static void HistogramAdd(const VP8LHistogram* const a, | 
|---|
| 378 | const VP8LHistogram* const b, | 
|---|
| 379 | VP8LHistogram* const out) { | 
|---|
| 380 | uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; | 
|---|
| 381 | const int extra_cache_size = VP8LHistogramNumCodes(a->palette_code_bits_) | 
|---|
| 382 | - (NUM_LITERAL_CODES + NUM_LENGTH_CODES); | 
|---|
| 383 | assert(a->palette_code_bits_ == b->palette_code_bits_); | 
|---|
| 384 |  | 
|---|
| 385 | if (b != out) { | 
|---|
| 386 | ADD_VECTOR(a->literal_, b->literal_, out->literal_, | 
|---|
| 387 | NUM_LITERAL_CODES + NUM_LENGTH_CODES, extra_cache_size); | 
|---|
| 388 | ADD_VECTOR(a->distance_, b->distance_, out->distance_, | 
|---|
| 389 | NUM_DISTANCE_CODES, 0); | 
|---|
| 390 | ADD_VECTOR(a->red_, b->red_, out->red_, NUM_LITERAL_CODES, 0); | 
|---|
| 391 | ADD_VECTOR(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES, 0); | 
|---|
| 392 | ADD_VECTOR(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES, 0); | 
|---|
| 393 | } else { | 
|---|
| 394 | ADD_VECTOR_EQ(a->literal_, out->literal_, | 
|---|
| 395 | NUM_LITERAL_CODES + NUM_LENGTH_CODES, extra_cache_size); | 
|---|
| 396 | ADD_VECTOR_EQ(a->distance_, out->distance_, NUM_DISTANCE_CODES, 0); | 
|---|
| 397 | ADD_VECTOR_EQ(a->red_, out->red_, NUM_LITERAL_CODES, 0); | 
|---|
| 398 | ADD_VECTOR_EQ(a->blue_, out->blue_, NUM_LITERAL_CODES, 0); | 
|---|
| 399 | ADD_VECTOR_EQ(a->alpha_, out->alpha_, NUM_LITERAL_CODES, 0); | 
|---|
| 400 | } | 
|---|
| 401 | } | 
|---|
| 402 |  | 
|---|
| 403 | #undef ADD_VECTOR_EQ | 
|---|
| 404 | #undef ADD_VECTOR | 
|---|
| 405 | #undef ASM_END_1 | 
|---|
| 406 | #undef ASM_END_0 | 
|---|
| 407 | #undef ASM_END_COMMON_1 | 
|---|
| 408 | #undef ASM_END_COMMON_0 | 
|---|
| 409 | #undef ADD_TO_OUT | 
|---|
| 410 | #undef ASM_START | 
|---|
| 411 |  | 
|---|
| 412 | //------------------------------------------------------------------------------ | 
|---|
| 413 | // Entry point | 
|---|
| 414 |  | 
|---|
| 415 | extern void VP8LEncDspInitMIPS32(void); | 
|---|
| 416 |  | 
|---|
| 417 | WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPS32(void) { | 
|---|
| 418 | VP8LFastSLog2Slow = FastSLog2Slow; | 
|---|
| 419 | VP8LFastLog2Slow = FastLog2Slow; | 
|---|
| 420 | VP8LExtraCost = ExtraCost; | 
|---|
| 421 | VP8LExtraCostCombined = ExtraCostCombined; | 
|---|
| 422 | VP8LGetEntropyUnrefined = GetEntropyUnrefined; | 
|---|
| 423 | VP8LGetCombinedEntropyUnrefined = GetCombinedEntropyUnrefined; | 
|---|
| 424 | VP8LHistogramAdd = HistogramAdd; | 
|---|
| 425 | } | 
|---|
| 426 |  | 
|---|
| 427 | #else  // !WEBP_USE_MIPS32 | 
|---|
| 428 |  | 
|---|
| 429 | WEBP_DSP_INIT_STUB(VP8LEncDspInitMIPS32) | 
|---|
| 430 |  | 
|---|
| 431 | #endif  // WEBP_USE_MIPS32 | 
|---|
| 432 |  | 
|---|