| 1 | // Copyright 2014 Google Inc. All Rights Reserved. |
| 2 | // |
| 3 | // Use of this source code is governed by a BSD-style license |
| 4 | // that can be found in the COPYING file in the root of the source |
| 5 | // tree. An additional intellectual property rights grant can be found |
| 6 | // in the file PATENTS. All contributing project authors may |
| 7 | // be found in the AUTHORS file in the root of the source tree. |
| 8 | // ----------------------------------------------------------------------------- |
| 9 | // |
| 10 | // WebPPicture utils for colorspace conversion |
| 11 | // |
| 12 | // Author: Skal (pascal.massimino@gmail.com) |
| 13 | |
| 14 | #include <assert.h> |
| 15 | #include <stdlib.h> |
| 16 | #include <math.h> |
| 17 | |
| 18 | #include "src/enc/vp8i_enc.h" |
| 19 | #include "src/utils/random_utils.h" |
| 20 | #include "src/utils/utils.h" |
| 21 | #include "src/dsp/dsp.h" |
| 22 | #include "src/dsp/lossless.h" |
| 23 | #include "src/dsp/yuv.h" |
| 24 | |
| 25 | // Uncomment to disable gamma-compression during RGB->U/V averaging |
| 26 | #define USE_GAMMA_COMPRESSION |
| 27 | |
| 28 | // If defined, use table to compute x / alpha. |
| 29 | #define USE_INVERSE_ALPHA_TABLE |
| 30 | |
| 31 | #ifdef WORDS_BIGENDIAN |
| 32 | // uint32_t 0xff000000 is 0xff,00,00,00 in memory |
| 33 | #define CHANNEL_OFFSET(i) (i) |
| 34 | #else |
| 35 | // uint32_t 0xff000000 is 0x00,00,00,ff in memory |
| 36 | #define CHANNEL_OFFSET(i) (3-(i)) |
| 37 | #endif |
| 38 | |
| 39 | #define ALPHA_OFFSET CHANNEL_OFFSET(0) |
| 40 | |
| 41 | //------------------------------------------------------------------------------ |
| 42 | // Detection of non-trivial transparency |
| 43 | |
| 44 | // Returns true if alpha[] has non-0xff values. |
| 45 | static int CheckNonOpaque(const uint8_t* alpha, int width, int height, |
| 46 | int x_step, int y_step) { |
| 47 | if (alpha == NULL) return 0; |
| 48 | WebPInitAlphaProcessing(); |
| 49 | if (x_step == 1) { |
| 50 | for (; height-- > 0; alpha += y_step) { |
| 51 | if (WebPHasAlpha8b(alpha, width)) return 1; |
| 52 | } |
| 53 | } else { |
| 54 | for (; height-- > 0; alpha += y_step) { |
| 55 | if (WebPHasAlpha32b(alpha, width)) return 1; |
| 56 | } |
| 57 | } |
| 58 | return 0; |
| 59 | } |
| 60 | |
| 61 | // Checking for the presence of non-opaque alpha. |
| 62 | int WebPPictureHasTransparency(const WebPPicture* picture) { |
| 63 | if (picture == NULL) return 0; |
| 64 | if (!picture->use_argb) { |
| 65 | return CheckNonOpaque(picture->a, picture->width, picture->height, |
| 66 | 1, picture->a_stride); |
| 67 | } else { |
| 68 | const int alpha_offset = ALPHA_OFFSET; |
| 69 | return CheckNonOpaque((const uint8_t*)picture->argb + alpha_offset, |
| 70 | picture->width, picture->height, |
| 71 | 4, picture->argb_stride * sizeof(*picture->argb)); |
| 72 | } |
| 73 | return 0; |
| 74 | } |
| 75 | |
| 76 | //------------------------------------------------------------------------------ |
| 77 | // Code for gamma correction |
| 78 | |
| 79 | #if defined(USE_GAMMA_COMPRESSION) |
| 80 | |
| 81 | // gamma-compensates loss of resolution during chroma subsampling |
| 82 | #define kGamma 0.80 // for now we use a different gamma value than kGammaF |
| 83 | #define kGammaFix 12 // fixed-point precision for linear values |
| 84 | #define kGammaScale ((1 << kGammaFix) - 1) |
| 85 | #define kGammaTabFix 7 // fixed-point fractional bits precision |
| 86 | #define kGammaTabScale (1 << kGammaTabFix) |
| 87 | #define kGammaTabRounder (kGammaTabScale >> 1) |
| 88 | #define kGammaTabSize (1 << (kGammaFix - kGammaTabFix)) |
| 89 | |
| 90 | static int kLinearToGammaTab[kGammaTabSize + 1]; |
| 91 | static uint16_t kGammaToLinearTab[256]; |
| 92 | static volatile int kGammaTablesOk = 0; |
| 93 | |
| 94 | static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) { |
| 95 | if (!kGammaTablesOk) { |
| 96 | int v; |
| 97 | const double scale = (double)(1 << kGammaTabFix) / kGammaScale; |
| 98 | const double norm = 1. / 255.; |
| 99 | for (v = 0; v <= 255; ++v) { |
| 100 | kGammaToLinearTab[v] = |
| 101 | (uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5); |
| 102 | } |
| 103 | for (v = 0; v <= kGammaTabSize; ++v) { |
| 104 | kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5); |
| 105 | } |
| 106 | kGammaTablesOk = 1; |
| 107 | } |
| 108 | } |
| 109 | |
| 110 | static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { |
| 111 | return kGammaToLinearTab[v]; |
| 112 | } |
| 113 | |
| 114 | static WEBP_INLINE int Interpolate(int v) { |
| 115 | const int tab_pos = v >> (kGammaTabFix + 2); // integer part |
| 116 | const int x = v & ((kGammaTabScale << 2) - 1); // fractional part |
| 117 | const int v0 = kLinearToGammaTab[tab_pos]; |
| 118 | const int v1 = kLinearToGammaTab[tab_pos + 1]; |
| 119 | const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x); // interpolate |
| 120 | assert(tab_pos + 1 < kGammaTabSize + 1); |
| 121 | return y; |
| 122 | } |
| 123 | |
| 124 | // Convert a linear value 'v' to YUV_FIX+2 fixed-point precision |
| 125 | // U/V value, suitable for RGBToU/V calls. |
| 126 | static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) { |
| 127 | const int y = Interpolate(base_value << shift); // final uplifted value |
| 128 | return (y + kGammaTabRounder) >> kGammaTabFix; // descale |
| 129 | } |
| 130 | |
| 131 | #else |
| 132 | |
| 133 | static void InitGammaTables(void) {} |
| 134 | static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { return v; } |
| 135 | static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) { |
| 136 | return (int)(base_value << shift); |
| 137 | } |
| 138 | |
| 139 | #endif // USE_GAMMA_COMPRESSION |
| 140 | |
| 141 | //------------------------------------------------------------------------------ |
| 142 | // RGB -> YUV conversion |
| 143 | |
| 144 | static int RGBToY(int r, int g, int b, VP8Random* const rg) { |
| 145 | return (rg == NULL) ? VP8RGBToY(r, g, b, YUV_HALF) |
| 146 | : VP8RGBToY(r, g, b, VP8RandomBits(rg, YUV_FIX)); |
| 147 | } |
| 148 | |
| 149 | static int RGBToU(int r, int g, int b, VP8Random* const rg) { |
| 150 | return (rg == NULL) ? VP8RGBToU(r, g, b, YUV_HALF << 2) |
| 151 | : VP8RGBToU(r, g, b, VP8RandomBits(rg, YUV_FIX + 2)); |
| 152 | } |
| 153 | |
| 154 | static int RGBToV(int r, int g, int b, VP8Random* const rg) { |
| 155 | return (rg == NULL) ? VP8RGBToV(r, g, b, YUV_HALF << 2) |
| 156 | : VP8RGBToV(r, g, b, VP8RandomBits(rg, YUV_FIX + 2)); |
| 157 | } |
| 158 | |
| 159 | //------------------------------------------------------------------------------ |
| 160 | // Sharp RGB->YUV conversion |
| 161 | |
| 162 | static const int kNumIterations = 4; |
| 163 | static const int kMinDimensionIterativeConversion = 4; |
| 164 | |
| 165 | // We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some |
| 166 | // banding sometimes. Better use extra precision. |
| 167 | #define SFIX 2 // fixed-point precision of RGB and Y/W |
| 168 | typedef int16_t fixed_t; // signed type with extra SFIX precision for UV |
| 169 | typedef uint16_t fixed_y_t; // unsigned type with extra SFIX precision for W |
| 170 | |
| 171 | #define SHALF (1 << SFIX >> 1) |
| 172 | #define MAX_Y_T ((256 << SFIX) - 1) |
| 173 | #define SROUNDER (1 << (YUV_FIX + SFIX - 1)) |
| 174 | |
| 175 | #if defined(USE_GAMMA_COMPRESSION) |
| 176 | |
| 177 | // We use tables of different size and precision for the Rec709 / BT2020 |
| 178 | // transfer function. |
| 179 | #define kGammaF (1./0.45) |
| 180 | static uint32_t kLinearToGammaTabS[kGammaTabSize + 2]; |
| 181 | #define GAMMA_TO_LINEAR_BITS 14 |
| 182 | static uint32_t kGammaToLinearTabS[MAX_Y_T + 1]; // size scales with Y_FIX |
| 183 | static volatile int kGammaTablesSOk = 0; |
| 184 | |
| 185 | static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesS(void) { |
| 186 | assert(2 * GAMMA_TO_LINEAR_BITS < 32); // we use uint32_t intermediate values |
| 187 | if (!kGammaTablesSOk) { |
| 188 | int v; |
| 189 | const double norm = 1. / MAX_Y_T; |
| 190 | const double scale = 1. / kGammaTabSize; |
| 191 | const double a = 0.09929682680944; |
| 192 | const double thresh = 0.018053968510807; |
| 193 | const double final_scale = 1 << GAMMA_TO_LINEAR_BITS; |
| 194 | for (v = 0; v <= MAX_Y_T; ++v) { |
| 195 | const double g = norm * v; |
| 196 | double value; |
| 197 | if (g <= thresh * 4.5) { |
| 198 | value = g / 4.5; |
| 199 | } else { |
| 200 | const double a_rec = 1. / (1. + a); |
| 201 | value = pow(a_rec * (g + a), kGammaF); |
| 202 | } |
| 203 | kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5); |
| 204 | } |
| 205 | for (v = 0; v <= kGammaTabSize; ++v) { |
| 206 | const double g = scale * v; |
| 207 | double value; |
| 208 | if (g <= thresh) { |
| 209 | value = 4.5 * g; |
| 210 | } else { |
| 211 | value = (1. + a) * pow(g, 1. / kGammaF) - a; |
| 212 | } |
| 213 | // we already incorporate the 1/2 rounding constant here |
| 214 | kLinearToGammaTabS[v] = |
| 215 | (uint32_t)(MAX_Y_T * value) + (1 << GAMMA_TO_LINEAR_BITS >> 1); |
| 216 | } |
| 217 | // to prevent small rounding errors to cause read-overflow: |
| 218 | kLinearToGammaTabS[kGammaTabSize + 1] = kLinearToGammaTabS[kGammaTabSize]; |
| 219 | kGammaTablesSOk = 1; |
| 220 | } |
| 221 | } |
| 222 | |
| 223 | // return value has a fixed-point precision of GAMMA_TO_LINEAR_BITS |
| 224 | static WEBP_INLINE uint32_t GammaToLinearS(int v) { |
| 225 | return kGammaToLinearTabS[v]; |
| 226 | } |
| 227 | |
| 228 | static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) { |
| 229 | // 'value' is in GAMMA_TO_LINEAR_BITS fractional precision |
| 230 | const uint32_t v = value * kGammaTabSize; |
| 231 | const uint32_t tab_pos = v >> GAMMA_TO_LINEAR_BITS; |
| 232 | // fractional part, in GAMMA_TO_LINEAR_BITS fixed-point precision |
| 233 | const uint32_t x = v - (tab_pos << GAMMA_TO_LINEAR_BITS); // fractional part |
| 234 | // v0 / v1 are in GAMMA_TO_LINEAR_BITS fixed-point precision (range [0..1]) |
| 235 | const uint32_t v0 = kLinearToGammaTabS[tab_pos + 0]; |
| 236 | const uint32_t v1 = kLinearToGammaTabS[tab_pos + 1]; |
| 237 | // Final interpolation. Note that rounding is already included. |
| 238 | const uint32_t v2 = (v1 - v0) * x; // note: v1 >= v0. |
| 239 | const uint32_t result = v0 + (v2 >> GAMMA_TO_LINEAR_BITS); |
| 240 | return result; |
| 241 | } |
| 242 | |
| 243 | #else |
| 244 | |
| 245 | static void InitGammaTablesS(void) {} |
| 246 | static WEBP_INLINE uint32_t GammaToLinearS(int v) { |
| 247 | return (v << GAMMA_TO_LINEAR_BITS) / MAX_Y_T; |
| 248 | } |
| 249 | static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) { |
| 250 | return (MAX_Y_T * value) >> GAMMA_TO_LINEAR_BITS; |
| 251 | } |
| 252 | |
| 253 | #endif // USE_GAMMA_COMPRESSION |
| 254 | |
| 255 | //------------------------------------------------------------------------------ |
| 256 | |
| 257 | static uint8_t clip_8b(fixed_t v) { |
| 258 | return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u; |
| 259 | } |
| 260 | |
| 261 | static fixed_y_t clip_y(int y) { |
| 262 | return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T; |
| 263 | } |
| 264 | |
| 265 | //------------------------------------------------------------------------------ |
| 266 | |
| 267 | static int RGBToGray(int r, int g, int b) { |
| 268 | const int luma = 13933 * r + 46871 * g + 4732 * b + YUV_HALF; |
| 269 | return (luma >> YUV_FIX); |
| 270 | } |
| 271 | |
| 272 | static uint32_t ScaleDown(int a, int b, int c, int d) { |
| 273 | const uint32_t A = GammaToLinearS(a); |
| 274 | const uint32_t B = GammaToLinearS(b); |
| 275 | const uint32_t C = GammaToLinearS(c); |
| 276 | const uint32_t D = GammaToLinearS(d); |
| 277 | return LinearToGammaS((A + B + C + D + 2) >> 2); |
| 278 | } |
| 279 | |
| 280 | static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w) { |
| 281 | int i; |
| 282 | for (i = 0; i < w; ++i) { |
| 283 | const uint32_t R = GammaToLinearS(src[0 * w + i]); |
| 284 | const uint32_t G = GammaToLinearS(src[1 * w + i]); |
| 285 | const uint32_t B = GammaToLinearS(src[2 * w + i]); |
| 286 | const uint32_t Y = RGBToGray(R, G, B); |
| 287 | dst[i] = (fixed_y_t)LinearToGammaS(Y); |
| 288 | } |
| 289 | } |
| 290 | |
| 291 | static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2, |
| 292 | fixed_t* dst, int uv_w) { |
| 293 | int i; |
| 294 | for (i = 0; i < uv_w; ++i) { |
| 295 | const int r = ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], |
| 296 | src2[0 * uv_w + 0], src2[0 * uv_w + 1]); |
| 297 | const int g = ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], |
| 298 | src2[2 * uv_w + 0], src2[2 * uv_w + 1]); |
| 299 | const int b = ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], |
| 300 | src2[4 * uv_w + 0], src2[4 * uv_w + 1]); |
| 301 | const int W = RGBToGray(r, g, b); |
| 302 | dst[0 * uv_w] = (fixed_t)(r - W); |
| 303 | dst[1 * uv_w] = (fixed_t)(g - W); |
| 304 | dst[2 * uv_w] = (fixed_t)(b - W); |
| 305 | dst += 1; |
| 306 | src1 += 2; |
| 307 | src2 += 2; |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) { |
| 312 | int i; |
| 313 | for (i = 0; i < w; ++i) { |
| 314 | y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]); |
| 315 | } |
| 316 | } |
| 317 | |
| 318 | //------------------------------------------------------------------------------ |
| 319 | |
| 320 | static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0) { |
| 321 | const int v0 = (A * 3 + B + 2) >> 2; |
| 322 | return clip_y(v0 + W0); |
| 323 | } |
| 324 | |
| 325 | //------------------------------------------------------------------------------ |
| 326 | |
| 327 | static WEBP_INLINE fixed_y_t UpLift(uint8_t a) { // 8bit -> SFIX |
| 328 | return ((fixed_y_t)a << SFIX) | SHALF; |
| 329 | } |
| 330 | |
| 331 | static void ImportOneRow(const uint8_t* const r_ptr, |
| 332 | const uint8_t* const g_ptr, |
| 333 | const uint8_t* const b_ptr, |
| 334 | int step, |
| 335 | int pic_width, |
| 336 | fixed_y_t* const dst) { |
| 337 | int i; |
| 338 | const int w = (pic_width + 1) & ~1; |
| 339 | for (i = 0; i < pic_width; ++i) { |
| 340 | const int off = i * step; |
| 341 | dst[i + 0 * w] = UpLift(r_ptr[off]); |
| 342 | dst[i + 1 * w] = UpLift(g_ptr[off]); |
| 343 | dst[i + 2 * w] = UpLift(b_ptr[off]); |
| 344 | } |
| 345 | if (pic_width & 1) { // replicate rightmost pixel |
| 346 | dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1]; |
| 347 | dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1]; |
| 348 | dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1]; |
| 349 | } |
| 350 | } |
| 351 | |
| 352 | static void InterpolateTwoRows(const fixed_y_t* const best_y, |
| 353 | const fixed_t* prev_uv, |
| 354 | const fixed_t* cur_uv, |
| 355 | const fixed_t* next_uv, |
| 356 | int w, |
| 357 | fixed_y_t* out1, |
| 358 | fixed_y_t* out2) { |
| 359 | const int uv_w = w >> 1; |
| 360 | const int len = (w - 1) >> 1; // length to filter |
| 361 | int k = 3; |
| 362 | while (k-- > 0) { // process each R/G/B segments in turn |
| 363 | // special boundary case for i==0 |
| 364 | out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0]); |
| 365 | out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w]); |
| 366 | |
| 367 | WebPSharpYUVFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1); |
| 368 | WebPSharpYUVFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1); |
| 369 | |
| 370 | // special boundary case for i == w - 1 when w is even |
| 371 | if (!(w & 1)) { |
| 372 | out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1], |
| 373 | best_y[w - 1 + 0]); |
| 374 | out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1], |
| 375 | best_y[w - 1 + w]); |
| 376 | } |
| 377 | out1 += w; |
| 378 | out2 += w; |
| 379 | prev_uv += uv_w; |
| 380 | cur_uv += uv_w; |
| 381 | next_uv += uv_w; |
| 382 | } |
| 383 | } |
| 384 | |
| 385 | static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) { |
| 386 | const int luma = 16839 * r + 33059 * g + 6420 * b + SROUNDER; |
| 387 | return clip_8b(16 + (luma >> (YUV_FIX + SFIX))); |
| 388 | } |
| 389 | |
| 390 | static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) { |
| 391 | const int u = -9719 * r - 19081 * g + 28800 * b + SROUNDER; |
| 392 | return clip_8b(128 + (u >> (YUV_FIX + SFIX))); |
| 393 | } |
| 394 | |
| 395 | static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) { |
| 396 | const int v = +28800 * r - 24116 * g - 4684 * b + SROUNDER; |
| 397 | return clip_8b(128 + (v >> (YUV_FIX + SFIX))); |
| 398 | } |
| 399 | |
| 400 | static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv, |
| 401 | WebPPicture* const picture) { |
| 402 | int i, j; |
| 403 | uint8_t* dst_y = picture->y; |
| 404 | uint8_t* dst_u = picture->u; |
| 405 | uint8_t* dst_v = picture->v; |
| 406 | const fixed_t* const best_uv_base = best_uv; |
| 407 | const int w = (picture->width + 1) & ~1; |
| 408 | const int h = (picture->height + 1) & ~1; |
| 409 | const int uv_w = w >> 1; |
| 410 | const int uv_h = h >> 1; |
| 411 | for (best_uv = best_uv_base, j = 0; j < picture->height; ++j) { |
| 412 | for (i = 0; i < picture->width; ++i) { |
| 413 | const int off = (i >> 1); |
| 414 | const int W = best_y[i]; |
| 415 | const int r = best_uv[off + 0 * uv_w] + W; |
| 416 | const int g = best_uv[off + 1 * uv_w] + W; |
| 417 | const int b = best_uv[off + 2 * uv_w] + W; |
| 418 | dst_y[i] = ConvertRGBToY(r, g, b); |
| 419 | } |
| 420 | best_y += w; |
| 421 | best_uv += (j & 1) * 3 * uv_w; |
| 422 | dst_y += picture->y_stride; |
| 423 | } |
| 424 | for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) { |
| 425 | for (i = 0; i < uv_w; ++i) { |
| 426 | const int off = i; |
| 427 | const int r = best_uv[off + 0 * uv_w]; |
| 428 | const int g = best_uv[off + 1 * uv_w]; |
| 429 | const int b = best_uv[off + 2 * uv_w]; |
| 430 | dst_u[i] = ConvertRGBToU(r, g, b); |
| 431 | dst_v[i] = ConvertRGBToV(r, g, b); |
| 432 | } |
| 433 | best_uv += 3 * uv_w; |
| 434 | dst_u += picture->uv_stride; |
| 435 | dst_v += picture->uv_stride; |
| 436 | } |
| 437 | return 1; |
| 438 | } |
| 439 | |
| 440 | //------------------------------------------------------------------------------ |
| 441 | // Main function |
| 442 | |
| 443 | #define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T))) |
| 444 | |
| 445 | static int PreprocessARGB(const uint8_t* r_ptr, |
| 446 | const uint8_t* g_ptr, |
| 447 | const uint8_t* b_ptr, |
| 448 | int step, int rgb_stride, |
| 449 | WebPPicture* const picture) { |
| 450 | // we expand the right/bottom border if needed |
| 451 | const int w = (picture->width + 1) & ~1; |
| 452 | const int h = (picture->height + 1) & ~1; |
| 453 | const int uv_w = w >> 1; |
| 454 | const int uv_h = h >> 1; |
| 455 | uint64_t prev_diff_y_sum = ~0; |
| 456 | int j, iter; |
| 457 | |
| 458 | // TODO(skal): allocate one big memory chunk. But for now, it's easier |
| 459 | // for valgrind debugging to have several chunks. |
| 460 | fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch |
| 461 | fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t); |
| 462 | fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t); |
| 463 | fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t); |
| 464 | fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); |
| 465 | fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); |
| 466 | fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t); |
| 467 | fixed_y_t* best_y = best_y_base; |
| 468 | fixed_y_t* target_y = target_y_base; |
| 469 | fixed_t* best_uv = best_uv_base; |
| 470 | fixed_t* target_uv = target_uv_base; |
| 471 | const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h); |
| 472 | int ok; |
| 473 | |
| 474 | if (best_y_base == NULL || best_uv_base == NULL || |
| 475 | target_y_base == NULL || target_uv_base == NULL || |
| 476 | best_rgb_y == NULL || best_rgb_uv == NULL || |
| 477 | tmp_buffer == NULL) { |
| 478 | ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); |
| 479 | goto End; |
| 480 | } |
| 481 | assert(picture->width >= kMinDimensionIterativeConversion); |
| 482 | assert(picture->height >= kMinDimensionIterativeConversion); |
| 483 | |
| 484 | WebPInitConvertARGBToYUV(); |
| 485 | |
| 486 | // Import RGB samples to W/RGB representation. |
| 487 | for (j = 0; j < picture->height; j += 2) { |
| 488 | const int is_last_row = (j == picture->height - 1); |
| 489 | fixed_y_t* const src1 = tmp_buffer + 0 * w; |
| 490 | fixed_y_t* const src2 = tmp_buffer + 3 * w; |
| 491 | |
| 492 | // prepare two rows of input |
| 493 | ImportOneRow(r_ptr, g_ptr, b_ptr, step, picture->width, src1); |
| 494 | if (!is_last_row) { |
| 495 | ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride, |
| 496 | step, picture->width, src2); |
| 497 | } else { |
| 498 | memcpy(src2, src1, 3 * w * sizeof(*src2)); |
| 499 | } |
| 500 | StoreGray(src1, best_y + 0, w); |
| 501 | StoreGray(src2, best_y + w, w); |
| 502 | |
| 503 | UpdateW(src1, target_y, w); |
| 504 | UpdateW(src2, target_y + w, w); |
| 505 | UpdateChroma(src1, src2, target_uv, uv_w); |
| 506 | memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv)); |
| 507 | best_y += 2 * w; |
| 508 | best_uv += 3 * uv_w; |
| 509 | target_y += 2 * w; |
| 510 | target_uv += 3 * uv_w; |
| 511 | r_ptr += 2 * rgb_stride; |
| 512 | g_ptr += 2 * rgb_stride; |
| 513 | b_ptr += 2 * rgb_stride; |
| 514 | } |
| 515 | |
| 516 | // Iterate and resolve clipping conflicts. |
| 517 | for (iter = 0; iter < kNumIterations; ++iter) { |
| 518 | const fixed_t* cur_uv = best_uv_base; |
| 519 | const fixed_t* prev_uv = best_uv_base; |
| 520 | uint64_t diff_y_sum = 0; |
| 521 | |
| 522 | best_y = best_y_base; |
| 523 | best_uv = best_uv_base; |
| 524 | target_y = target_y_base; |
| 525 | target_uv = target_uv_base; |
| 526 | for (j = 0; j < h; j += 2) { |
| 527 | fixed_y_t* const src1 = tmp_buffer + 0 * w; |
| 528 | fixed_y_t* const src2 = tmp_buffer + 3 * w; |
| 529 | { |
| 530 | const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0); |
| 531 | InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, src1, src2); |
| 532 | prev_uv = cur_uv; |
| 533 | cur_uv = next_uv; |
| 534 | } |
| 535 | |
| 536 | UpdateW(src1, best_rgb_y + 0 * w, w); |
| 537 | UpdateW(src2, best_rgb_y + 1 * w, w); |
| 538 | UpdateChroma(src1, src2, best_rgb_uv, uv_w); |
| 539 | |
| 540 | // update two rows of Y and one row of RGB |
| 541 | diff_y_sum += WebPSharpYUVUpdateY(target_y, best_rgb_y, best_y, 2 * w); |
| 542 | WebPSharpYUVUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w); |
| 543 | |
| 544 | best_y += 2 * w; |
| 545 | best_uv += 3 * uv_w; |
| 546 | target_y += 2 * w; |
| 547 | target_uv += 3 * uv_w; |
| 548 | } |
| 549 | // test exit condition |
| 550 | if (iter > 0) { |
| 551 | if (diff_y_sum < diff_y_threshold) break; |
| 552 | if (diff_y_sum > prev_diff_y_sum) break; |
| 553 | } |
| 554 | prev_diff_y_sum = diff_y_sum; |
| 555 | } |
| 556 | // final reconstruction |
| 557 | ok = ConvertWRGBToYUV(best_y_base, best_uv_base, picture); |
| 558 | |
| 559 | End: |
| 560 | WebPSafeFree(best_y_base); |
| 561 | WebPSafeFree(best_uv_base); |
| 562 | WebPSafeFree(target_y_base); |
| 563 | WebPSafeFree(target_uv_base); |
| 564 | WebPSafeFree(best_rgb_y); |
| 565 | WebPSafeFree(best_rgb_uv); |
| 566 | WebPSafeFree(tmp_buffer); |
| 567 | return ok; |
| 568 | } |
| 569 | #undef SAFE_ALLOC |
| 570 | |
| 571 | //------------------------------------------------------------------------------ |
| 572 | // "Fast" regular RGB->YUV |
| 573 | |
| 574 | #define SUM4(ptr, step) LinearToGamma( \ |
| 575 | GammaToLinear((ptr)[0]) + \ |
| 576 | GammaToLinear((ptr)[(step)]) + \ |
| 577 | GammaToLinear((ptr)[rgb_stride]) + \ |
| 578 | GammaToLinear((ptr)[rgb_stride + (step)]), 0) \ |
| 579 | |
| 580 | #define SUM2(ptr) \ |
| 581 | LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1) |
| 582 | |
| 583 | #define SUM2ALPHA(ptr) ((ptr)[0] + (ptr)[rgb_stride]) |
| 584 | #define SUM4ALPHA(ptr) (SUM2ALPHA(ptr) + SUM2ALPHA((ptr) + 4)) |
| 585 | |
| 586 | #if defined(USE_INVERSE_ALPHA_TABLE) |
| 587 | |
| 588 | static const int kAlphaFix = 19; |
| 589 | // Following table is (1 << kAlphaFix) / a. The (v * kInvAlpha[a]) >> kAlphaFix |
| 590 | // formula is then equal to v / a in most (99.6%) cases. Note that this table |
| 591 | // and constant are adjusted very tightly to fit 32b arithmetic. |
| 592 | // In particular, they use the fact that the operands for 'v / a' are actually |
| 593 | // derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3 |
| 594 | // with ai in [0..255] and pi in [0..1<<kGammaFix). The constraint to avoid |
| 595 | // overflow is: kGammaFix + kAlphaFix <= 31. |
| 596 | static const uint32_t kInvAlpha[4 * 0xff + 1] = { |
| 597 | 0, /* alpha = 0 */ |
| 598 | 524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536, |
| 599 | 58254, 52428, 47662, 43690, 40329, 37449, 34952, 32768, |
| 600 | 30840, 29127, 27594, 26214, 24966, 23831, 22795, 21845, |
| 601 | 20971, 20164, 19418, 18724, 18078, 17476, 16912, 16384, |
| 602 | 15887, 15420, 14979, 14563, 14169, 13797, 13443, 13107, |
| 603 | 12787, 12483, 12192, 11915, 11650, 11397, 11155, 10922, |
| 604 | 10699, 10485, 10280, 10082, 9892, 9709, 9532, 9362, |
| 605 | 9198, 9039, 8886, 8738, 8594, 8456, 8322, 8192, |
| 606 | 8065, 7943, 7825, 7710, 7598, 7489, 7384, 7281, |
| 607 | 7182, 7084, 6990, 6898, 6808, 6721, 6636, 6553, |
| 608 | 6472, 6393, 6316, 6241, 6168, 6096, 6026, 5957, |
| 609 | 5890, 5825, 5761, 5698, 5637, 5577, 5518, 5461, |
| 610 | 5405, 5349, 5295, 5242, 5190, 5140, 5090, 5041, |
| 611 | 4993, 4946, 4899, 4854, 4809, 4766, 4723, 4681, |
| 612 | 4639, 4599, 4559, 4519, 4481, 4443, 4405, 4369, |
| 613 | 4332, 4297, 4262, 4228, 4194, 4161, 4128, 4096, |
| 614 | 4064, 4032, 4002, 3971, 3942, 3912, 3883, 3855, |
| 615 | 3826, 3799, 3771, 3744, 3718, 3692, 3666, 3640, |
| 616 | 3615, 3591, 3566, 3542, 3518, 3495, 3472, 3449, |
| 617 | 3426, 3404, 3382, 3360, 3339, 3318, 3297, 3276, |
| 618 | 3256, 3236, 3216, 3196, 3177, 3158, 3139, 3120, |
| 619 | 3102, 3084, 3066, 3048, 3030, 3013, 2995, 2978, |
| 620 | 2962, 2945, 2928, 2912, 2896, 2880, 2864, 2849, |
| 621 | 2833, 2818, 2803, 2788, 2774, 2759, 2744, 2730, |
| 622 | 2716, 2702, 2688, 2674, 2661, 2647, 2634, 2621, |
| 623 | 2608, 2595, 2582, 2570, 2557, 2545, 2532, 2520, |
| 624 | 2508, 2496, 2484, 2473, 2461, 2449, 2438, 2427, |
| 625 | 2416, 2404, 2394, 2383, 2372, 2361, 2351, 2340, |
| 626 | 2330, 2319, 2309, 2299, 2289, 2279, 2269, 2259, |
| 627 | 2250, 2240, 2231, 2221, 2212, 2202, 2193, 2184, |
| 628 | 2175, 2166, 2157, 2148, 2139, 2131, 2122, 2114, |
| 629 | 2105, 2097, 2088, 2080, 2072, 2064, 2056, 2048, |
| 630 | 2040, 2032, 2024, 2016, 2008, 2001, 1993, 1985, |
| 631 | 1978, 1971, 1963, 1956, 1949, 1941, 1934, 1927, |
| 632 | 1920, 1913, 1906, 1899, 1892, 1885, 1879, 1872, |
| 633 | 1865, 1859, 1852, 1846, 1839, 1833, 1826, 1820, |
| 634 | 1814, 1807, 1801, 1795, 1789, 1783, 1777, 1771, |
| 635 | 1765, 1759, 1753, 1747, 1741, 1736, 1730, 1724, |
| 636 | 1718, 1713, 1707, 1702, 1696, 1691, 1685, 1680, |
| 637 | 1675, 1669, 1664, 1659, 1653, 1648, 1643, 1638, |
| 638 | 1633, 1628, 1623, 1618, 1613, 1608, 1603, 1598, |
| 639 | 1593, 1588, 1583, 1579, 1574, 1569, 1565, 1560, |
| 640 | 1555, 1551, 1546, 1542, 1537, 1533, 1528, 1524, |
| 641 | 1519, 1515, 1510, 1506, 1502, 1497, 1493, 1489, |
| 642 | 1485, 1481, 1476, 1472, 1468, 1464, 1460, 1456, |
| 643 | 1452, 1448, 1444, 1440, 1436, 1432, 1428, 1424, |
| 644 | 1420, 1416, 1413, 1409, 1405, 1401, 1398, 1394, |
| 645 | 1390, 1387, 1383, 1379, 1376, 1372, 1368, 1365, |
| 646 | 1361, 1358, 1354, 1351, 1347, 1344, 1340, 1337, |
| 647 | 1334, 1330, 1327, 1323, 1320, 1317, 1314, 1310, |
| 648 | 1307, 1304, 1300, 1297, 1294, 1291, 1288, 1285, |
| 649 | 1281, 1278, 1275, 1272, 1269, 1266, 1263, 1260, |
| 650 | 1257, 1254, 1251, 1248, 1245, 1242, 1239, 1236, |
| 651 | 1233, 1230, 1227, 1224, 1222, 1219, 1216, 1213, |
| 652 | 1210, 1208, 1205, 1202, 1199, 1197, 1194, 1191, |
| 653 | 1188, 1186, 1183, 1180, 1178, 1175, 1172, 1170, |
| 654 | 1167, 1165, 1162, 1159, 1157, 1154, 1152, 1149, |
| 655 | 1147, 1144, 1142, 1139, 1137, 1134, 1132, 1129, |
| 656 | 1127, 1125, 1122, 1120, 1117, 1115, 1113, 1110, |
| 657 | 1108, 1106, 1103, 1101, 1099, 1096, 1094, 1092, |
| 658 | 1089, 1087, 1085, 1083, 1081, 1078, 1076, 1074, |
| 659 | 1072, 1069, 1067, 1065, 1063, 1061, 1059, 1057, |
| 660 | 1054, 1052, 1050, 1048, 1046, 1044, 1042, 1040, |
| 661 | 1038, 1036, 1034, 1032, 1030, 1028, 1026, 1024, |
| 662 | 1022, 1020, 1018, 1016, 1014, 1012, 1010, 1008, |
| 663 | 1006, 1004, 1002, 1000, 998, 996, 994, 992, |
| 664 | 991, 989, 987, 985, 983, 981, 979, 978, |
| 665 | 976, 974, 972, 970, 969, 967, 965, 963, |
| 666 | 961, 960, 958, 956, 954, 953, 951, 949, |
| 667 | 948, 946, 944, 942, 941, 939, 937, 936, |
| 668 | 934, 932, 931, 929, 927, 926, 924, 923, |
| 669 | 921, 919, 918, 916, 914, 913, 911, 910, |
| 670 | 908, 907, 905, 903, 902, 900, 899, 897, |
| 671 | 896, 894, 893, 891, 890, 888, 887, 885, |
| 672 | 884, 882, 881, 879, 878, 876, 875, 873, |
| 673 | 872, 870, 869, 868, 866, 865, 863, 862, |
| 674 | 860, 859, 858, 856, 855, 853, 852, 851, |
| 675 | 849, 848, 846, 845, 844, 842, 841, 840, |
| 676 | 838, 837, 836, 834, 833, 832, 830, 829, |
| 677 | 828, 826, 825, 824, 823, 821, 820, 819, |
| 678 | 817, 816, 815, 814, 812, 811, 810, 809, |
| 679 | 807, 806, 805, 804, 802, 801, 800, 799, |
| 680 | 798, 796, 795, 794, 793, 791, 790, 789, |
| 681 | 788, 787, 786, 784, 783, 782, 781, 780, |
| 682 | 779, 777, 776, 775, 774, 773, 772, 771, |
| 683 | 769, 768, 767, 766, 765, 764, 763, 762, |
| 684 | 760, 759, 758, 757, 756, 755, 754, 753, |
| 685 | 752, 751, 750, 748, 747, 746, 745, 744, |
| 686 | 743, 742, 741, 740, 739, 738, 737, 736, |
| 687 | 735, 734, 733, 732, 731, 730, 729, 728, |
| 688 | 727, 726, 725, 724, 723, 722, 721, 720, |
| 689 | 719, 718, 717, 716, 715, 714, 713, 712, |
| 690 | 711, 710, 709, 708, 707, 706, 705, 704, |
| 691 | 703, 702, 701, 700, 699, 699, 698, 697, |
| 692 | 696, 695, 694, 693, 692, 691, 690, 689, |
| 693 | 688, 688, 687, 686, 685, 684, 683, 682, |
| 694 | 681, 680, 680, 679, 678, 677, 676, 675, |
| 695 | 674, 673, 673, 672, 671, 670, 669, 668, |
| 696 | 667, 667, 666, 665, 664, 663, 662, 661, |
| 697 | 661, 660, 659, 658, 657, 657, 656, 655, |
| 698 | 654, 653, 652, 652, 651, 650, 649, 648, |
| 699 | 648, 647, 646, 645, 644, 644, 643, 642, |
| 700 | 641, 640, 640, 639, 638, 637, 637, 636, |
| 701 | 635, 634, 633, 633, 632, 631, 630, 630, |
| 702 | 629, 628, 627, 627, 626, 625, 624, 624, |
| 703 | 623, 622, 621, 621, 620, 619, 618, 618, |
| 704 | 617, 616, 616, 615, 614, 613, 613, 612, |
| 705 | 611, 611, 610, 609, 608, 608, 607, 606, |
| 706 | 606, 605, 604, 604, 603, 602, 601, 601, |
| 707 | 600, 599, 599, 598, 597, 597, 596, 595, |
| 708 | 595, 594, 593, 593, 592, 591, 591, 590, |
| 709 | 589, 589, 588, 587, 587, 586, 585, 585, |
| 710 | 584, 583, 583, 582, 581, 581, 580, 579, |
| 711 | 579, 578, 578, 577, 576, 576, 575, 574, |
| 712 | 574, 573, 572, 572, 571, 571, 570, 569, |
| 713 | 569, 568, 568, 567, 566, 566, 565, 564, |
| 714 | 564, 563, 563, 562, 561, 561, 560, 560, |
| 715 | 559, 558, 558, 557, 557, 556, 555, 555, |
| 716 | 554, 554, 553, 553, 552, 551, 551, 550, |
| 717 | 550, 549, 548, 548, 547, 547, 546, 546, |
| 718 | 545, 544, 544, 543, 543, 542, 542, 541, |
| 719 | 541, 540, 539, 539, 538, 538, 537, 537, |
| 720 | 536, 536, 535, 534, 534, 533, 533, 532, |
| 721 | 532, 531, 531, 530, 530, 529, 529, 528, |
| 722 | 527, 527, 526, 526, 525, 525, 524, 524, |
| 723 | 523, 523, 522, 522, 521, 521, 520, 520, |
| 724 | 519, 519, 518, 518, 517, 517, 516, 516, |
| 725 | 515, 515, 514, 514 |
| 726 | }; |
| 727 | |
| 728 | // Note that LinearToGamma() expects the values to be premultiplied by 4, |
| 729 | // so we incorporate this factor 4 inside the DIVIDE_BY_ALPHA macro directly. |
| 730 | #define DIVIDE_BY_ALPHA(sum, a) (((sum) * kInvAlpha[(a)]) >> (kAlphaFix - 2)) |
| 731 | |
| 732 | #else |
| 733 | |
| 734 | #define DIVIDE_BY_ALPHA(sum, a) (4 * (sum) / (a)) |
| 735 | |
| 736 | #endif // USE_INVERSE_ALPHA_TABLE |
| 737 | |
| 738 | static WEBP_INLINE int LinearToGammaWeighted(const uint8_t* src, |
| 739 | const uint8_t* a_ptr, |
| 740 | uint32_t total_a, int step, |
| 741 | int rgb_stride) { |
| 742 | const uint32_t sum = |
| 743 | a_ptr[0] * GammaToLinear(src[0]) + |
| 744 | a_ptr[step] * GammaToLinear(src[step]) + |
| 745 | a_ptr[rgb_stride] * GammaToLinear(src[rgb_stride]) + |
| 746 | a_ptr[rgb_stride + step] * GammaToLinear(src[rgb_stride + step]); |
| 747 | assert(total_a > 0 && total_a <= 4 * 0xff); |
| 748 | #if defined(USE_INVERSE_ALPHA_TABLE) |
| 749 | assert((uint64_t)sum * kInvAlpha[total_a] < ((uint64_t)1 << 32)); |
| 750 | #endif |
| 751 | return LinearToGamma(DIVIDE_BY_ALPHA(sum, total_a), 0); |
| 752 | } |
| 753 | |
| 754 | static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr, |
| 755 | const uint8_t* const g_ptr, |
| 756 | const uint8_t* const b_ptr, |
| 757 | int step, |
| 758 | uint8_t* const dst_y, |
| 759 | int width, |
| 760 | VP8Random* const rg) { |
| 761 | int i, j; |
| 762 | for (i = 0, j = 0; i < width; i += 1, j += step) { |
| 763 | dst_y[i] = RGBToY(r_ptr[j], g_ptr[j], b_ptr[j], rg); |
| 764 | } |
| 765 | } |
| 766 | |
| 767 | static WEBP_INLINE void AccumulateRGBA(const uint8_t* const r_ptr, |
| 768 | const uint8_t* const g_ptr, |
| 769 | const uint8_t* const b_ptr, |
| 770 | const uint8_t* const a_ptr, |
| 771 | int rgb_stride, |
| 772 | uint16_t* dst, int width) { |
| 773 | int i, j; |
| 774 | // we loop over 2x2 blocks and produce one R/G/B/A value for each. |
| 775 | for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * 4, dst += 4) { |
| 776 | const uint32_t a = SUM4ALPHA(a_ptr + j); |
| 777 | int r, g, b; |
| 778 | if (a == 4 * 0xff || a == 0) { |
| 779 | r = SUM4(r_ptr + j, 4); |
| 780 | g = SUM4(g_ptr + j, 4); |
| 781 | b = SUM4(b_ptr + j, 4); |
| 782 | } else { |
| 783 | r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 4, rgb_stride); |
| 784 | g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride); |
| 785 | b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride); |
| 786 | } |
| 787 | dst[0] = r; |
| 788 | dst[1] = g; |
| 789 | dst[2] = b; |
| 790 | dst[3] = a; |
| 791 | } |
| 792 | if (width & 1) { |
| 793 | const uint32_t a = 2u * SUM2ALPHA(a_ptr + j); |
| 794 | int r, g, b; |
| 795 | if (a == 4 * 0xff || a == 0) { |
| 796 | r = SUM2(r_ptr + j); |
| 797 | g = SUM2(g_ptr + j); |
| 798 | b = SUM2(b_ptr + j); |
| 799 | } else { |
| 800 | r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 0, rgb_stride); |
| 801 | g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride); |
| 802 | b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride); |
| 803 | } |
| 804 | dst[0] = r; |
| 805 | dst[1] = g; |
| 806 | dst[2] = b; |
| 807 | dst[3] = a; |
| 808 | } |
| 809 | } |
| 810 | |
| 811 | static WEBP_INLINE void AccumulateRGB(const uint8_t* const r_ptr, |
| 812 | const uint8_t* const g_ptr, |
| 813 | const uint8_t* const b_ptr, |
| 814 | int step, int rgb_stride, |
| 815 | uint16_t* dst, int width) { |
| 816 | int i, j; |
| 817 | for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * step, dst += 4) { |
| 818 | dst[0] = SUM4(r_ptr + j, step); |
| 819 | dst[1] = SUM4(g_ptr + j, step); |
| 820 | dst[2] = SUM4(b_ptr + j, step); |
| 821 | } |
| 822 | if (width & 1) { |
| 823 | dst[0] = SUM2(r_ptr + j); |
| 824 | dst[1] = SUM2(g_ptr + j); |
| 825 | dst[2] = SUM2(b_ptr + j); |
| 826 | } |
| 827 | } |
| 828 | |
| 829 | static WEBP_INLINE void ConvertRowsToUV(const uint16_t* rgb, |
| 830 | uint8_t* const dst_u, |
| 831 | uint8_t* const dst_v, |
| 832 | int width, |
| 833 | VP8Random* const rg) { |
| 834 | int i; |
| 835 | for (i = 0; i < width; i += 1, rgb += 4) { |
| 836 | const int r = rgb[0], g = rgb[1], b = rgb[2]; |
| 837 | dst_u[i] = RGBToU(r, g, b, rg); |
| 838 | dst_v[i] = RGBToV(r, g, b, rg); |
| 839 | } |
| 840 | } |
| 841 | |
| 842 | static int ImportYUVAFromRGBA(const uint8_t* r_ptr, |
| 843 | const uint8_t* g_ptr, |
| 844 | const uint8_t* b_ptr, |
| 845 | const uint8_t* a_ptr, |
| 846 | int step, // bytes per pixel |
| 847 | int rgb_stride, // bytes per scanline |
| 848 | float dithering, |
| 849 | int use_iterative_conversion, |
| 850 | WebPPicture* const picture) { |
| 851 | int y; |
| 852 | const int width = picture->width; |
| 853 | const int height = picture->height; |
| 854 | const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride); |
| 855 | const int is_rgb = (r_ptr < b_ptr); // otherwise it's bgr |
| 856 | |
| 857 | picture->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420; |
| 858 | picture->use_argb = 0; |
| 859 | |
| 860 | // disable smart conversion if source is too small (overkill). |
| 861 | if (width < kMinDimensionIterativeConversion || |
| 862 | height < kMinDimensionIterativeConversion) { |
| 863 | use_iterative_conversion = 0; |
| 864 | } |
| 865 | |
| 866 | if (!WebPPictureAllocYUVA(picture, width, height)) { |
| 867 | return 0; |
| 868 | } |
| 869 | if (has_alpha) { |
| 870 | assert(step == 4); |
| 871 | #if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE) |
| 872 | assert(kAlphaFix + kGammaFix <= 31); |
| 873 | #endif |
| 874 | } |
| 875 | |
| 876 | if (use_iterative_conversion) { |
| 877 | InitGammaTablesS(); |
| 878 | if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) { |
| 879 | return 0; |
| 880 | } |
| 881 | if (has_alpha) { |
| 882 | WebPExtractAlpha(a_ptr, rgb_stride, width, height, |
| 883 | picture->a, picture->a_stride); |
| 884 | } |
| 885 | } else { |
| 886 | const int uv_width = (width + 1) >> 1; |
| 887 | int use_dsp = (step == 3); // use special function in this case |
| 888 | // temporary storage for accumulated R/G/B values during conversion to U/V |
| 889 | uint16_t* const tmp_rgb = |
| 890 | (uint16_t*)WebPSafeMalloc(4 * uv_width, sizeof(*tmp_rgb)); |
| 891 | uint8_t* dst_y = picture->y; |
| 892 | uint8_t* dst_u = picture->u; |
| 893 | uint8_t* dst_v = picture->v; |
| 894 | uint8_t* dst_a = picture->a; |
| 895 | |
| 896 | VP8Random base_rg; |
| 897 | VP8Random* rg = NULL; |
| 898 | if (dithering > 0.) { |
| 899 | VP8InitRandom(&base_rg, dithering); |
| 900 | rg = &base_rg; |
| 901 | use_dsp = 0; // can't use dsp in this case |
| 902 | } |
| 903 | WebPInitConvertARGBToYUV(); |
| 904 | InitGammaTables(); |
| 905 | |
| 906 | if (tmp_rgb == NULL) return 0; // malloc error |
| 907 | |
| 908 | // Downsample Y/U/V planes, two rows at a time |
| 909 | for (y = 0; y < (height >> 1); ++y) { |
| 910 | int rows_have_alpha = has_alpha; |
| 911 | if (use_dsp) { |
| 912 | if (is_rgb) { |
| 913 | WebPConvertRGB24ToY(r_ptr, dst_y, width); |
| 914 | WebPConvertRGB24ToY(r_ptr + rgb_stride, |
| 915 | dst_y + picture->y_stride, width); |
| 916 | } else { |
| 917 | WebPConvertBGR24ToY(b_ptr, dst_y, width); |
| 918 | WebPConvertBGR24ToY(b_ptr + rgb_stride, |
| 919 | dst_y + picture->y_stride, width); |
| 920 | } |
| 921 | } else { |
| 922 | ConvertRowToY(r_ptr, g_ptr, b_ptr, step, dst_y, width, rg); |
| 923 | ConvertRowToY(r_ptr + rgb_stride, |
| 924 | g_ptr + rgb_stride, |
| 925 | b_ptr + rgb_stride, step, |
| 926 | dst_y + picture->y_stride, width, rg); |
| 927 | } |
| 928 | dst_y += 2 * picture->y_stride; |
| 929 | if (has_alpha) { |
| 930 | rows_have_alpha &= !WebPExtractAlpha(a_ptr, rgb_stride, width, 2, |
| 931 | dst_a, picture->a_stride); |
| 932 | dst_a += 2 * picture->a_stride; |
| 933 | } |
| 934 | // Collect averaged R/G/B(/A) |
| 935 | if (!rows_have_alpha) { |
| 936 | AccumulateRGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, tmp_rgb, width); |
| 937 | } else { |
| 938 | AccumulateRGBA(r_ptr, g_ptr, b_ptr, a_ptr, rgb_stride, tmp_rgb, width); |
| 939 | } |
| 940 | // Convert to U/V |
| 941 | if (rg == NULL) { |
| 942 | WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width); |
| 943 | } else { |
| 944 | ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg); |
| 945 | } |
| 946 | dst_u += picture->uv_stride; |
| 947 | dst_v += picture->uv_stride; |
| 948 | r_ptr += 2 * rgb_stride; |
| 949 | b_ptr += 2 * rgb_stride; |
| 950 | g_ptr += 2 * rgb_stride; |
| 951 | if (has_alpha) a_ptr += 2 * rgb_stride; |
| 952 | } |
| 953 | if (height & 1) { // extra last row |
| 954 | int row_has_alpha = has_alpha; |
| 955 | if (use_dsp) { |
| 956 | if (r_ptr < b_ptr) { |
| 957 | WebPConvertRGB24ToY(r_ptr, dst_y, width); |
| 958 | } else { |
| 959 | WebPConvertBGR24ToY(b_ptr, dst_y, width); |
| 960 | } |
| 961 | } else { |
| 962 | ConvertRowToY(r_ptr, g_ptr, b_ptr, step, dst_y, width, rg); |
| 963 | } |
| 964 | if (row_has_alpha) { |
| 965 | row_has_alpha &= !WebPExtractAlpha(a_ptr, 0, width, 1, dst_a, 0); |
| 966 | } |
| 967 | // Collect averaged R/G/B(/A) |
| 968 | if (!row_has_alpha) { |
| 969 | // Collect averaged R/G/B |
| 970 | AccumulateRGB(r_ptr, g_ptr, b_ptr, step, /* rgb_stride = */ 0, |
| 971 | tmp_rgb, width); |
| 972 | } else { |
| 973 | AccumulateRGBA(r_ptr, g_ptr, b_ptr, a_ptr, /* rgb_stride = */ 0, |
| 974 | tmp_rgb, width); |
| 975 | } |
| 976 | if (rg == NULL) { |
| 977 | WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width); |
| 978 | } else { |
| 979 | ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg); |
| 980 | } |
| 981 | } |
| 982 | WebPSafeFree(tmp_rgb); |
| 983 | } |
| 984 | return 1; |
| 985 | } |
| 986 | |
| 987 | #undef SUM4 |
| 988 | #undef SUM2 |
| 989 | #undef SUM4ALPHA |
| 990 | #undef SUM2ALPHA |
| 991 | |
| 992 | //------------------------------------------------------------------------------ |
| 993 | // call for ARGB->YUVA conversion |
| 994 | |
| 995 | static int PictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace, |
| 996 | float dithering, int use_iterative_conversion) { |
| 997 | if (picture == NULL) return 0; |
| 998 | if (picture->argb == NULL) { |
| 999 | return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); |
| 1000 | } else if ((colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) { |
| 1001 | return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); |
| 1002 | } else { |
| 1003 | const uint8_t* const argb = (const uint8_t*)picture->argb; |
| 1004 | const uint8_t* const a = argb + CHANNEL_OFFSET(0); |
| 1005 | const uint8_t* const r = argb + CHANNEL_OFFSET(1); |
| 1006 | const uint8_t* const g = argb + CHANNEL_OFFSET(2); |
| 1007 | const uint8_t* const b = argb + CHANNEL_OFFSET(3); |
| 1008 | |
| 1009 | picture->colorspace = WEBP_YUV420; |
| 1010 | return ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride, |
| 1011 | dithering, use_iterative_conversion, picture); |
| 1012 | } |
| 1013 | } |
| 1014 | |
| 1015 | int WebPPictureARGBToYUVADithered(WebPPicture* picture, WebPEncCSP colorspace, |
| 1016 | float dithering) { |
| 1017 | return PictureARGBToYUVA(picture, colorspace, dithering, 0); |
| 1018 | } |
| 1019 | |
| 1020 | int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) { |
| 1021 | return PictureARGBToYUVA(picture, colorspace, 0.f, 0); |
| 1022 | } |
| 1023 | |
| 1024 | int WebPPictureSharpARGBToYUVA(WebPPicture* picture) { |
| 1025 | return PictureARGBToYUVA(picture, WEBP_YUV420, 0.f, 1); |
| 1026 | } |
| 1027 | // for backward compatibility |
| 1028 | int WebPPictureSmartARGBToYUVA(WebPPicture* picture) { |
| 1029 | return WebPPictureSharpARGBToYUVA(picture); |
| 1030 | } |
| 1031 | |
| 1032 | //------------------------------------------------------------------------------ |
| 1033 | // call for YUVA -> ARGB conversion |
| 1034 | |
| 1035 | int WebPPictureYUVAToARGB(WebPPicture* picture) { |
| 1036 | if (picture == NULL) return 0; |
| 1037 | if (picture->y == NULL || picture->u == NULL || picture->v == NULL) { |
| 1038 | return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); |
| 1039 | } |
| 1040 | if ((picture->colorspace & WEBP_CSP_ALPHA_BIT) && picture->a == NULL) { |
| 1041 | return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); |
| 1042 | } |
| 1043 | if ((picture->colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) { |
| 1044 | return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); |
| 1045 | } |
| 1046 | // Allocate a new argb buffer (discarding the previous one). |
| 1047 | if (!WebPPictureAllocARGB(picture, picture->width, picture->height)) return 0; |
| 1048 | picture->use_argb = 1; |
| 1049 | |
| 1050 | // Convert |
| 1051 | { |
| 1052 | int y; |
| 1053 | const int width = picture->width; |
| 1054 | const int height = picture->height; |
| 1055 | const int argb_stride = 4 * picture->argb_stride; |
| 1056 | uint8_t* dst = (uint8_t*)picture->argb; |
| 1057 | const uint8_t* cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y; |
| 1058 | WebPUpsampleLinePairFunc upsample = |
| 1059 | WebPGetLinePairConverter(ALPHA_OFFSET > 0); |
| 1060 | |
| 1061 | // First row, with replicated top samples. |
| 1062 | upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width); |
| 1063 | cur_y += picture->y_stride; |
| 1064 | dst += argb_stride; |
| 1065 | // Center rows. |
| 1066 | for (y = 1; y + 1 < height; y += 2) { |
| 1067 | const uint8_t* const top_u = cur_u; |
| 1068 | const uint8_t* const top_v = cur_v; |
| 1069 | cur_u += picture->uv_stride; |
| 1070 | cur_v += picture->uv_stride; |
| 1071 | upsample(cur_y, cur_y + picture->y_stride, top_u, top_v, cur_u, cur_v, |
| 1072 | dst, dst + argb_stride, width); |
| 1073 | cur_y += 2 * picture->y_stride; |
| 1074 | dst += 2 * argb_stride; |
| 1075 | } |
| 1076 | // Last row (if needed), with replicated bottom samples. |
| 1077 | if (height > 1 && !(height & 1)) { |
| 1078 | upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width); |
| 1079 | } |
| 1080 | // Insert alpha values if needed, in replacement for the default 0xff ones. |
| 1081 | if (picture->colorspace & WEBP_CSP_ALPHA_BIT) { |
| 1082 | for (y = 0; y < height; ++y) { |
| 1083 | uint32_t* const argb_dst = picture->argb + y * picture->argb_stride; |
| 1084 | const uint8_t* const src = picture->a + y * picture->a_stride; |
| 1085 | int x; |
| 1086 | for (x = 0; x < width; ++x) { |
| 1087 | argb_dst[x] = (argb_dst[x] & 0x00ffffffu) | ((uint32_t)src[x] << 24); |
| 1088 | } |
| 1089 | } |
| 1090 | } |
| 1091 | } |
| 1092 | return 1; |
| 1093 | } |
| 1094 | |
| 1095 | //------------------------------------------------------------------------------ |
| 1096 | // automatic import / conversion |
| 1097 | |
| 1098 | static int Import(WebPPicture* const picture, |
| 1099 | const uint8_t* rgb, int rgb_stride, |
| 1100 | int step, int swap_rb, int import_alpha) { |
| 1101 | int y; |
| 1102 | // swap_rb -> b,g,r,a , !swap_rb -> r,g,b,a |
| 1103 | const uint8_t* r_ptr = rgb + (swap_rb ? 2 : 0); |
| 1104 | const uint8_t* g_ptr = rgb + 1; |
| 1105 | const uint8_t* b_ptr = rgb + (swap_rb ? 0 : 2); |
| 1106 | const int width = picture->width; |
| 1107 | const int height = picture->height; |
| 1108 | |
| 1109 | if (!picture->use_argb) { |
| 1110 | const uint8_t* a_ptr = import_alpha ? rgb + 3 : NULL; |
| 1111 | return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride, |
| 1112 | 0.f /* no dithering */, 0, picture); |
| 1113 | } |
| 1114 | if (!WebPPictureAlloc(picture)) return 0; |
| 1115 | |
| 1116 | VP8LDspInit(); |
| 1117 | WebPInitAlphaProcessing(); |
| 1118 | |
| 1119 | if (import_alpha) { |
| 1120 | // dst[] byte order is {a,r,g,b} for big-endian, {b,g,r,a} for little endian |
| 1121 | uint32_t* dst = picture->argb; |
| 1122 | const int do_copy = (ALPHA_OFFSET == 3) && swap_rb; |
| 1123 | assert(step == 4); |
| 1124 | if (do_copy) { |
| 1125 | for (y = 0; y < height; ++y) { |
| 1126 | memcpy(dst, rgb, width * 4); |
| 1127 | rgb += rgb_stride; |
| 1128 | dst += picture->argb_stride; |
| 1129 | } |
| 1130 | } else { |
| 1131 | for (y = 0; y < height; ++y) { |
| 1132 | #ifdef WORDS_BIGENDIAN |
| 1133 | // BGRA or RGBA input order. |
| 1134 | const uint8_t* a_ptr = rgb + 3; |
| 1135 | WebPPackARGB(a_ptr, r_ptr, g_ptr, b_ptr, width, dst); |
| 1136 | r_ptr += rgb_stride; |
| 1137 | g_ptr += rgb_stride; |
| 1138 | b_ptr += rgb_stride; |
| 1139 | #else |
| 1140 | // RGBA input order. Need to swap R and B. |
| 1141 | VP8LConvertBGRAToRGBA((const uint32_t*)rgb, width, (uint8_t*)dst); |
| 1142 | #endif |
| 1143 | rgb += rgb_stride; |
| 1144 | dst += picture->argb_stride; |
| 1145 | } |
| 1146 | } |
| 1147 | } else { |
| 1148 | uint32_t* dst = picture->argb; |
| 1149 | assert(step >= 3); |
| 1150 | for (y = 0; y < height; ++y) { |
| 1151 | WebPPackRGB(r_ptr, g_ptr, b_ptr, width, step, dst); |
| 1152 | r_ptr += rgb_stride; |
| 1153 | g_ptr += rgb_stride; |
| 1154 | b_ptr += rgb_stride; |
| 1155 | dst += picture->argb_stride; |
| 1156 | } |
| 1157 | } |
| 1158 | return 1; |
| 1159 | } |
| 1160 | |
| 1161 | // Public API |
| 1162 | |
| 1163 | #if !defined(WEBP_REDUCE_CSP) |
| 1164 | |
| 1165 | int WebPPictureImportBGR(WebPPicture* picture, |
| 1166 | const uint8_t* rgb, int rgb_stride) { |
| 1167 | return (picture != NULL && rgb != NULL) |
| 1168 | ? Import(picture, rgb, rgb_stride, 3, 1, 0) |
| 1169 | : 0; |
| 1170 | } |
| 1171 | |
| 1172 | int WebPPictureImportBGRA(WebPPicture* picture, |
| 1173 | const uint8_t* rgba, int rgba_stride) { |
| 1174 | return (picture != NULL && rgba != NULL) |
| 1175 | ? Import(picture, rgba, rgba_stride, 4, 1, 1) |
| 1176 | : 0; |
| 1177 | } |
| 1178 | |
| 1179 | |
| 1180 | int WebPPictureImportBGRX(WebPPicture* picture, |
| 1181 | const uint8_t* rgba, int rgba_stride) { |
| 1182 | return (picture != NULL && rgba != NULL) |
| 1183 | ? Import(picture, rgba, rgba_stride, 4, 1, 0) |
| 1184 | : 0; |
| 1185 | } |
| 1186 | |
| 1187 | #endif // WEBP_REDUCE_CSP |
| 1188 | |
| 1189 | int WebPPictureImportRGB(WebPPicture* picture, |
| 1190 | const uint8_t* rgb, int rgb_stride) { |
| 1191 | return (picture != NULL && rgb != NULL) |
| 1192 | ? Import(picture, rgb, rgb_stride, 3, 0, 0) |
| 1193 | : 0; |
| 1194 | } |
| 1195 | |
| 1196 | int WebPPictureImportRGBA(WebPPicture* picture, |
| 1197 | const uint8_t* rgba, int rgba_stride) { |
| 1198 | return (picture != NULL && rgba != NULL) |
| 1199 | ? Import(picture, rgba, rgba_stride, 4, 0, 1) |
| 1200 | : 0; |
| 1201 | } |
| 1202 | |
| 1203 | int WebPPictureImportRGBX(WebPPicture* picture, |
| 1204 | const uint8_t* rgba, int rgba_stride) { |
| 1205 | return (picture != NULL && rgba != NULL) |
| 1206 | ? Import(picture, rgba, rgba_stride, 4, 0, 0) |
| 1207 | : 0; |
| 1208 | } |
| 1209 | |
| 1210 | //------------------------------------------------------------------------------ |
| 1211 | |