| 1 | // Copyright 2022 Google Inc. All Rights Reserved. |
| 2 | // |
| 3 | // Use of this source code is governed by a BSD-style license |
| 4 | // that can be found in the COPYING file in the root of the source |
| 5 | // tree. An additional intellectual property rights grant can be found |
| 6 | // in the file PATENTS. All contributing project authors may |
| 7 | // be found in the AUTHORS file in the root of the source tree. |
| 8 | // ----------------------------------------------------------------------------- |
| 9 | // |
| 10 | // Sharp RGB to YUV conversion. |
| 11 | // |
| 12 | // Author: Skal (pascal.massimino@gmail.com) |
| 13 | |
| 14 | #include "sharpyuv/sharpyuv.h" |
| 15 | |
| 16 | #include <assert.h> |
| 17 | #include <limits.h> |
| 18 | #include <stddef.h> |
| 19 | #include <stdlib.h> |
| 20 | #include <string.h> |
| 21 | |
| 22 | #include "src/webp/types.h" |
| 23 | #include "sharpyuv/sharpyuv_cpu.h" |
| 24 | #include "sharpyuv/sharpyuv_dsp.h" |
| 25 | #include "sharpyuv/sharpyuv_gamma.h" |
| 26 | |
| 27 | //------------------------------------------------------------------------------ |
| 28 | |
| 29 | int SharpYuvGetVersion(void) { |
| 30 | return SHARPYUV_VERSION; |
| 31 | } |
| 32 | |
| 33 | //------------------------------------------------------------------------------ |
| 34 | // Sharp RGB->YUV conversion |
| 35 | |
| 36 | static const int kNumIterations = 4; |
| 37 | |
| 38 | #define YUV_FIX 16 // fixed-point precision for RGB->YUV |
| 39 | static const int kYuvHalf = 1 << (YUV_FIX - 1); |
| 40 | |
| 41 | // Max bit depth so that intermediate calculations fit in 16 bits. |
| 42 | static const int kMaxBitDepth = 14; |
| 43 | |
| 44 | // Returns the precision shift to use based on the input rgb_bit_depth. |
| 45 | static int GetPrecisionShift(int rgb_bit_depth) { |
| 46 | // Try to add 2 bits of precision if it fits in kMaxBitDepth. Otherwise remove |
| 47 | // bits if needed. |
| 48 | return ((rgb_bit_depth + 2) <= kMaxBitDepth) ? 2 |
| 49 | : (kMaxBitDepth - rgb_bit_depth); |
| 50 | } |
| 51 | |
| 52 | typedef int16_t fixed_t; // signed type with extra precision for UV |
| 53 | typedef uint16_t fixed_y_t; // unsigned type with extra precision for W |
| 54 | |
| 55 | //------------------------------------------------------------------------------ |
| 56 | |
| 57 | static uint8_t clip_8b(fixed_t v) { |
| 58 | return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u; |
| 59 | } |
| 60 | |
| 61 | static uint16_t clip(fixed_t v, int max) { |
| 62 | return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v; |
| 63 | } |
| 64 | |
| 65 | static fixed_y_t clip_bit_depth(int y, int bit_depth) { |
| 66 | const int max = (1 << bit_depth) - 1; |
| 67 | return (!(y & ~max)) ? (fixed_y_t)y : (y < 0) ? 0 : max; |
| 68 | } |
| 69 | |
| 70 | //------------------------------------------------------------------------------ |
| 71 | |
| 72 | static int RGBToGray(int64_t r, int64_t g, int64_t b) { |
| 73 | const int64_t luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf; |
| 74 | return (int)(luma >> YUV_FIX); |
| 75 | } |
| 76 | |
| 77 | static uint32_t ScaleDown(uint16_t a, uint16_t b, uint16_t c, uint16_t d, |
| 78 | int rgb_bit_depth) { |
| 79 | const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); |
| 80 | const uint32_t A = SharpYuvGammaToLinear(a, bit_depth); |
| 81 | const uint32_t B = SharpYuvGammaToLinear(b, bit_depth); |
| 82 | const uint32_t C = SharpYuvGammaToLinear(c, bit_depth); |
| 83 | const uint32_t D = SharpYuvGammaToLinear(d, bit_depth); |
| 84 | return SharpYuvLinearToGamma((A + B + C + D + 2) >> 2, bit_depth); |
| 85 | } |
| 86 | |
| 87 | static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w, |
| 88 | int rgb_bit_depth) { |
| 89 | const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); |
| 90 | int i; |
| 91 | for (i = 0; i < w; ++i) { |
| 92 | const uint32_t R = SharpYuvGammaToLinear(src[0 * w + i], bit_depth); |
| 93 | const uint32_t G = SharpYuvGammaToLinear(src[1 * w + i], bit_depth); |
| 94 | const uint32_t B = SharpYuvGammaToLinear(src[2 * w + i], bit_depth); |
| 95 | const uint32_t Y = RGBToGray(R, G, B); |
| 96 | dst[i] = (fixed_y_t)SharpYuvLinearToGamma(Y, bit_depth); |
| 97 | } |
| 98 | } |
| 99 | |
| 100 | static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2, |
| 101 | fixed_t* dst, int uv_w, int rgb_bit_depth) { |
| 102 | int i; |
| 103 | for (i = 0; i < uv_w; ++i) { |
| 104 | const int r = |
| 105 | ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], src2[0 * uv_w + 0], |
| 106 | src2[0 * uv_w + 1], rgb_bit_depth); |
| 107 | const int g = |
| 108 | ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], src2[2 * uv_w + 0], |
| 109 | src2[2 * uv_w + 1], rgb_bit_depth); |
| 110 | const int b = |
| 111 | ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], src2[4 * uv_w + 0], |
| 112 | src2[4 * uv_w + 1], rgb_bit_depth); |
| 113 | const int W = RGBToGray(r, g, b); |
| 114 | dst[0 * uv_w] = (fixed_t)(r - W); |
| 115 | dst[1 * uv_w] = (fixed_t)(g - W); |
| 116 | dst[2 * uv_w] = (fixed_t)(b - W); |
| 117 | dst += 1; |
| 118 | src1 += 2; |
| 119 | src2 += 2; |
| 120 | } |
| 121 | } |
| 122 | |
| 123 | static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) { |
| 124 | int i; |
| 125 | assert(w > 0); |
| 126 | for (i = 0; i < w; ++i) { |
| 127 | y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]); |
| 128 | } |
| 129 | } |
| 130 | |
| 131 | //------------------------------------------------------------------------------ |
| 132 | |
| 133 | static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0, int bit_depth) { |
| 134 | const int v0 = (A * 3 + B + 2) >> 2; |
| 135 | return clip_bit_depth(v0 + W0, bit_depth); |
| 136 | } |
| 137 | |
| 138 | //------------------------------------------------------------------------------ |
| 139 | |
| 140 | static WEBP_INLINE int Shift(int v, int shift) { |
| 141 | return (shift >= 0) ? (v << shift) : (v >> -shift); |
| 142 | } |
| 143 | |
| 144 | static void ImportOneRow(const uint8_t* const r_ptr, |
| 145 | const uint8_t* const g_ptr, |
| 146 | const uint8_t* const b_ptr, |
| 147 | int rgb_step, |
| 148 | int rgb_bit_depth, |
| 149 | int pic_width, |
| 150 | fixed_y_t* const dst) { |
| 151 | // Convert the rgb_step from a number of bytes to a number of uint8_t or |
| 152 | // uint16_t values depending the bit depth. |
| 153 | const int step = (rgb_bit_depth > 8) ? rgb_step / 2 : rgb_step; |
| 154 | int i; |
| 155 | const int w = (pic_width + 1) & ~1; |
| 156 | for (i = 0; i < pic_width; ++i) { |
| 157 | const int off = i * step; |
| 158 | const int shift = GetPrecisionShift(rgb_bit_depth); |
| 159 | if (rgb_bit_depth == 8) { |
| 160 | dst[i + 0 * w] = Shift(r_ptr[off], shift); |
| 161 | dst[i + 1 * w] = Shift(g_ptr[off], shift); |
| 162 | dst[i + 2 * w] = Shift(b_ptr[off], shift); |
| 163 | } else { |
| 164 | dst[i + 0 * w] = Shift(((uint16_t*)r_ptr)[off], shift); |
| 165 | dst[i + 1 * w] = Shift(((uint16_t*)g_ptr)[off], shift); |
| 166 | dst[i + 2 * w] = Shift(((uint16_t*)b_ptr)[off], shift); |
| 167 | } |
| 168 | } |
| 169 | if (pic_width & 1) { // replicate rightmost pixel |
| 170 | dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1]; |
| 171 | dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1]; |
| 172 | dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1]; |
| 173 | } |
| 174 | } |
| 175 | |
| 176 | static void InterpolateTwoRows(const fixed_y_t* const best_y, |
| 177 | const fixed_t* prev_uv, |
| 178 | const fixed_t* cur_uv, |
| 179 | const fixed_t* next_uv, |
| 180 | int w, |
| 181 | fixed_y_t* out1, |
| 182 | fixed_y_t* out2, |
| 183 | int rgb_bit_depth) { |
| 184 | const int uv_w = w >> 1; |
| 185 | const int len = (w - 1) >> 1; // length to filter |
| 186 | int k = 3; |
| 187 | const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); |
| 188 | while (k-- > 0) { // process each R/G/B segments in turn |
| 189 | // special boundary case for i==0 |
| 190 | out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0], bit_depth); |
| 191 | out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w], bit_depth); |
| 192 | |
| 193 | SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1, |
| 194 | bit_depth); |
| 195 | SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1, |
| 196 | bit_depth); |
| 197 | |
| 198 | // special boundary case for i == w - 1 when w is even |
| 199 | if (!(w & 1)) { |
| 200 | out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1], |
| 201 | best_y[w - 1 + 0], bit_depth); |
| 202 | out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1], |
| 203 | best_y[w - 1 + w], bit_depth); |
| 204 | } |
| 205 | out1 += w; |
| 206 | out2 += w; |
| 207 | prev_uv += uv_w; |
| 208 | cur_uv += uv_w; |
| 209 | next_uv += uv_w; |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b, |
| 214 | const int coeffs[4], int sfix) { |
| 215 | const int srounder = 1 << (YUV_FIX + sfix - 1); |
| 216 | const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b + |
| 217 | coeffs[3] + srounder; |
| 218 | return (luma >> (YUV_FIX + sfix)); |
| 219 | } |
| 220 | |
| 221 | static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv, |
| 222 | uint8_t* y_ptr, int y_stride, uint8_t* u_ptr, |
| 223 | int u_stride, uint8_t* v_ptr, int v_stride, |
| 224 | int rgb_bit_depth, |
| 225 | int yuv_bit_depth, int width, int height, |
| 226 | const SharpYuvConversionMatrix* yuv_matrix) { |
| 227 | int i, j; |
| 228 | const fixed_t* const best_uv_base = best_uv; |
| 229 | const int w = (width + 1) & ~1; |
| 230 | const int h = (height + 1) & ~1; |
| 231 | const int uv_w = w >> 1; |
| 232 | const int uv_h = h >> 1; |
| 233 | const int sfix = GetPrecisionShift(rgb_bit_depth); |
| 234 | const int yuv_max = (1 << yuv_bit_depth) - 1; |
| 235 | |
| 236 | for (best_uv = best_uv_base, j = 0; j < height; ++j) { |
| 237 | for (i = 0; i < width; ++i) { |
| 238 | const int off = (i >> 1); |
| 239 | const int W = best_y[i]; |
| 240 | const int r = best_uv[off + 0 * uv_w] + W; |
| 241 | const int g = best_uv[off + 1 * uv_w] + W; |
| 242 | const int b = best_uv[off + 2 * uv_w] + W; |
| 243 | const int y = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_y, sfix); |
| 244 | if (yuv_bit_depth <= 8) { |
| 245 | y_ptr[i] = clip_8b(y); |
| 246 | } else { |
| 247 | ((uint16_t*)y_ptr)[i] = clip(y, yuv_max); |
| 248 | } |
| 249 | } |
| 250 | best_y += w; |
| 251 | best_uv += (j & 1) * 3 * uv_w; |
| 252 | y_ptr += y_stride; |
| 253 | } |
| 254 | for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) { |
| 255 | for (i = 0; i < uv_w; ++i) { |
| 256 | const int off = i; |
| 257 | // Note r, g and b values here are off by W, but a constant offset on all |
| 258 | // 3 components doesn't change the value of u and v with a YCbCr matrix. |
| 259 | const int r = best_uv[off + 0 * uv_w]; |
| 260 | const int g = best_uv[off + 1 * uv_w]; |
| 261 | const int b = best_uv[off + 2 * uv_w]; |
| 262 | const int u = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_u, sfix); |
| 263 | const int v = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_v, sfix); |
| 264 | if (yuv_bit_depth <= 8) { |
| 265 | u_ptr[i] = clip_8b(u); |
| 266 | v_ptr[i] = clip_8b(v); |
| 267 | } else { |
| 268 | ((uint16_t*)u_ptr)[i] = clip(u, yuv_max); |
| 269 | ((uint16_t*)v_ptr)[i] = clip(v, yuv_max); |
| 270 | } |
| 271 | } |
| 272 | best_uv += 3 * uv_w; |
| 273 | u_ptr += u_stride; |
| 274 | v_ptr += v_stride; |
| 275 | } |
| 276 | return 1; |
| 277 | } |
| 278 | |
| 279 | //------------------------------------------------------------------------------ |
| 280 | // Main function |
| 281 | |
| 282 | static void* SafeMalloc(uint64_t nmemb, size_t size) { |
| 283 | const uint64_t total_size = nmemb * (uint64_t)size; |
| 284 | if (total_size != (size_t)total_size) return NULL; |
| 285 | return malloc((size_t)total_size); |
| 286 | } |
| 287 | |
| 288 | #define SAFE_ALLOC(W, H, T) ((T*)SafeMalloc((W) * (H), sizeof(T))) |
| 289 | |
| 290 | static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr, |
| 291 | const uint8_t* b_ptr, int rgb_step, int rgb_stride, |
| 292 | int rgb_bit_depth, uint8_t* y_ptr, int y_stride, |
| 293 | uint8_t* u_ptr, int u_stride, uint8_t* v_ptr, |
| 294 | int v_stride, int yuv_bit_depth, int width, |
| 295 | int height, |
| 296 | const SharpYuvConversionMatrix* yuv_matrix) { |
| 297 | // we expand the right/bottom border if needed |
| 298 | const int w = (width + 1) & ~1; |
| 299 | const int h = (height + 1) & ~1; |
| 300 | const int uv_w = w >> 1; |
| 301 | const int uv_h = h >> 1; |
| 302 | uint64_t prev_diff_y_sum = ~0; |
| 303 | int j, iter; |
| 304 | |
| 305 | // TODO(skal): allocate one big memory chunk. But for now, it's easier |
| 306 | // for valgrind debugging to have several chunks. |
| 307 | fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch |
| 308 | fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t); |
| 309 | fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t); |
| 310 | fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t); |
| 311 | fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); |
| 312 | fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); |
| 313 | fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t); |
| 314 | fixed_y_t* best_y = best_y_base; |
| 315 | fixed_y_t* target_y = target_y_base; |
| 316 | fixed_t* best_uv = best_uv_base; |
| 317 | fixed_t* target_uv = target_uv_base; |
| 318 | const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h); |
| 319 | int ok; |
| 320 | assert(w > 0); |
| 321 | assert(h > 0); |
| 322 | |
| 323 | if (best_y_base == NULL || best_uv_base == NULL || |
| 324 | target_y_base == NULL || target_uv_base == NULL || |
| 325 | best_rgb_y == NULL || best_rgb_uv == NULL || |
| 326 | tmp_buffer == NULL) { |
| 327 | ok = 0; |
| 328 | goto End; |
| 329 | } |
| 330 | |
| 331 | // Import RGB samples to W/RGB representation. |
| 332 | for (j = 0; j < height; j += 2) { |
| 333 | const int is_last_row = (j == height - 1); |
| 334 | fixed_y_t* const src1 = tmp_buffer + 0 * w; |
| 335 | fixed_y_t* const src2 = tmp_buffer + 3 * w; |
| 336 | |
| 337 | // prepare two rows of input |
| 338 | ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width, |
| 339 | src1); |
| 340 | if (!is_last_row) { |
| 341 | ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride, |
| 342 | rgb_step, rgb_bit_depth, width, src2); |
| 343 | } else { |
| 344 | memcpy(src2, src1, 3 * w * sizeof(*src2)); |
| 345 | } |
| 346 | StoreGray(src1, best_y + 0, w); |
| 347 | StoreGray(src2, best_y + w, w); |
| 348 | |
| 349 | UpdateW(src1, target_y, w, rgb_bit_depth); |
| 350 | UpdateW(src2, target_y + w, w, rgb_bit_depth); |
| 351 | UpdateChroma(src1, src2, target_uv, uv_w, rgb_bit_depth); |
| 352 | memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv)); |
| 353 | best_y += 2 * w; |
| 354 | best_uv += 3 * uv_w; |
| 355 | target_y += 2 * w; |
| 356 | target_uv += 3 * uv_w; |
| 357 | r_ptr += 2 * rgb_stride; |
| 358 | g_ptr += 2 * rgb_stride; |
| 359 | b_ptr += 2 * rgb_stride; |
| 360 | } |
| 361 | |
| 362 | // Iterate and resolve clipping conflicts. |
| 363 | for (iter = 0; iter < kNumIterations; ++iter) { |
| 364 | const fixed_t* cur_uv = best_uv_base; |
| 365 | const fixed_t* prev_uv = best_uv_base; |
| 366 | uint64_t diff_y_sum = 0; |
| 367 | |
| 368 | best_y = best_y_base; |
| 369 | best_uv = best_uv_base; |
| 370 | target_y = target_y_base; |
| 371 | target_uv = target_uv_base; |
| 372 | for (j = 0; j < h; j += 2) { |
| 373 | fixed_y_t* const src1 = tmp_buffer + 0 * w; |
| 374 | fixed_y_t* const src2 = tmp_buffer + 3 * w; |
| 375 | { |
| 376 | const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0); |
| 377 | InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, |
| 378 | src1, src2, rgb_bit_depth); |
| 379 | prev_uv = cur_uv; |
| 380 | cur_uv = next_uv; |
| 381 | } |
| 382 | |
| 383 | UpdateW(src1, best_rgb_y + 0 * w, w, rgb_bit_depth); |
| 384 | UpdateW(src2, best_rgb_y + 1 * w, w, rgb_bit_depth); |
| 385 | UpdateChroma(src1, src2, best_rgb_uv, uv_w, rgb_bit_depth); |
| 386 | |
| 387 | // update two rows of Y and one row of RGB |
| 388 | diff_y_sum += |
| 389 | SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w, |
| 390 | rgb_bit_depth + GetPrecisionShift(rgb_bit_depth)); |
| 391 | SharpYuvUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w); |
| 392 | |
| 393 | best_y += 2 * w; |
| 394 | best_uv += 3 * uv_w; |
| 395 | target_y += 2 * w; |
| 396 | target_uv += 3 * uv_w; |
| 397 | } |
| 398 | // test exit condition |
| 399 | if (iter > 0) { |
| 400 | if (diff_y_sum < diff_y_threshold) break; |
| 401 | if (diff_y_sum > prev_diff_y_sum) break; |
| 402 | } |
| 403 | prev_diff_y_sum = diff_y_sum; |
| 404 | } |
| 405 | |
| 406 | // final reconstruction |
| 407 | ok = ConvertWRGBToYUV(best_y_base, best_uv_base, y_ptr, y_stride, u_ptr, |
| 408 | u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth, |
| 409 | width, height, yuv_matrix); |
| 410 | |
| 411 | End: |
| 412 | free(best_y_base); |
| 413 | free(best_uv_base); |
| 414 | free(target_y_base); |
| 415 | free(target_uv_base); |
| 416 | free(best_rgb_y); |
| 417 | free(best_rgb_uv); |
| 418 | free(tmp_buffer); |
| 419 | return ok; |
| 420 | } |
| 421 | #undef SAFE_ALLOC |
| 422 | |
| 423 | #if defined(WEBP_USE_THREAD) && !defined(_WIN32) |
| 424 | #include <pthread.h> // NOLINT |
| 425 | |
| 426 | #define LOCK_ACCESS \ |
| 427 | static pthread_mutex_t sharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; \ |
| 428 | if (pthread_mutex_lock(&sharpyuv_lock)) return |
| 429 | #define UNLOCK_ACCESS_AND_RETURN \ |
| 430 | do { \ |
| 431 | (void)pthread_mutex_unlock(&sharpyuv_lock); \ |
| 432 | return; \ |
| 433 | } while (0) |
| 434 | #else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32)) |
| 435 | #define LOCK_ACCESS do {} while (0) |
| 436 | #define UNLOCK_ACCESS_AND_RETURN return |
| 437 | #endif // defined(WEBP_USE_THREAD) && !defined(_WIN32) |
| 438 | |
| 439 | // Hidden exported init function. |
| 440 | // By default SharpYuvConvert calls it with SharpYuvGetCPUInfo. If needed, |
| 441 | // users can declare it as extern and call it with an alternate VP8CPUInfo |
| 442 | // function. |
| 443 | extern VP8CPUInfo SharpYuvGetCPUInfo; |
| 444 | SHARPYUV_EXTERN void SharpYuvInit(VP8CPUInfo cpu_info_func); |
| 445 | void SharpYuvInit(VP8CPUInfo cpu_info_func) { |
| 446 | static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used = |
| 447 | (VP8CPUInfo)&sharpyuv_last_cpuinfo_used; |
| 448 | LOCK_ACCESS; |
| 449 | // Only update SharpYuvGetCPUInfo when called from external code to avoid a |
| 450 | // race on reading the value in SharpYuvConvert(). |
| 451 | if (cpu_info_func != (VP8CPUInfo)&SharpYuvGetCPUInfo) { |
| 452 | SharpYuvGetCPUInfo = cpu_info_func; |
| 453 | } |
| 454 | if (sharpyuv_last_cpuinfo_used == SharpYuvGetCPUInfo) { |
| 455 | UNLOCK_ACCESS_AND_RETURN; |
| 456 | } |
| 457 | |
| 458 | SharpYuvInitDsp(); |
| 459 | SharpYuvInitGammaTables(); |
| 460 | |
| 461 | sharpyuv_last_cpuinfo_used = SharpYuvGetCPUInfo; |
| 462 | UNLOCK_ACCESS_AND_RETURN; |
| 463 | } |
| 464 | |
| 465 | int SharpYuvConvert(const void* r_ptr, const void* g_ptr, |
| 466 | const void* b_ptr, int rgb_step, int rgb_stride, |
| 467 | int rgb_bit_depth, void* y_ptr, int y_stride, |
| 468 | void* u_ptr, int u_stride, void* v_ptr, |
| 469 | int v_stride, int yuv_bit_depth, int width, |
| 470 | int height, const SharpYuvConversionMatrix* yuv_matrix) { |
| 471 | SharpYuvConversionMatrix scaled_matrix; |
| 472 | const int rgb_max = (1 << rgb_bit_depth) - 1; |
| 473 | const int rgb_round = 1 << (rgb_bit_depth - 1); |
| 474 | const int yuv_max = (1 << yuv_bit_depth) - 1; |
| 475 | const int sfix = GetPrecisionShift(rgb_bit_depth); |
| 476 | |
| 477 | if (width < 1 || height < 1 || width == INT_MAX || height == INT_MAX || |
| 478 | r_ptr == NULL || g_ptr == NULL || b_ptr == NULL || y_ptr == NULL || |
| 479 | u_ptr == NULL || v_ptr == NULL) { |
| 480 | return 0; |
| 481 | } |
| 482 | if (rgb_bit_depth != 8 && rgb_bit_depth != 10 && rgb_bit_depth != 12 && |
| 483 | rgb_bit_depth != 16) { |
| 484 | return 0; |
| 485 | } |
| 486 | if (yuv_bit_depth != 8 && yuv_bit_depth != 10 && yuv_bit_depth != 12) { |
| 487 | return 0; |
| 488 | } |
| 489 | if (rgb_bit_depth > 8 && (rgb_step % 2 != 0 || rgb_stride %2 != 0)) { |
| 490 | // Step/stride should be even for uint16_t buffers. |
| 491 | return 0; |
| 492 | } |
| 493 | if (yuv_bit_depth > 8 && |
| 494 | (y_stride % 2 != 0 || u_stride % 2 != 0 || v_stride % 2 != 0)) { |
| 495 | // Stride should be even for uint16_t buffers. |
| 496 | return 0; |
| 497 | } |
| 498 | // The address of the function pointer is used to avoid a read race. |
| 499 | SharpYuvInit((VP8CPUInfo)&SharpYuvGetCPUInfo); |
| 500 | |
| 501 | // Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the |
| 502 | // rgb->yuv conversion matrix. |
| 503 | if (rgb_bit_depth == yuv_bit_depth) { |
| 504 | memcpy(&scaled_matrix, yuv_matrix, sizeof(scaled_matrix)); |
| 505 | } else { |
| 506 | int i; |
| 507 | for (i = 0; i < 3; ++i) { |
| 508 | scaled_matrix.rgb_to_y[i] = |
| 509 | (yuv_matrix->rgb_to_y[i] * yuv_max + rgb_round) / rgb_max; |
| 510 | scaled_matrix.rgb_to_u[i] = |
| 511 | (yuv_matrix->rgb_to_u[i] * yuv_max + rgb_round) / rgb_max; |
| 512 | scaled_matrix.rgb_to_v[i] = |
| 513 | (yuv_matrix->rgb_to_v[i] * yuv_max + rgb_round) / rgb_max; |
| 514 | } |
| 515 | } |
| 516 | // Also incorporate precision change scaling. |
| 517 | scaled_matrix.rgb_to_y[3] = Shift(yuv_matrix->rgb_to_y[3], sfix); |
| 518 | scaled_matrix.rgb_to_u[3] = Shift(yuv_matrix->rgb_to_u[3], sfix); |
| 519 | scaled_matrix.rgb_to_v[3] = Shift(yuv_matrix->rgb_to_v[3], sfix); |
| 520 | |
| 521 | return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride, |
| 522 | rgb_bit_depth, y_ptr, y_stride, u_ptr, u_stride, |
| 523 | v_ptr, v_stride, yuv_bit_depth, width, height, |
| 524 | &scaled_matrix); |
| 525 | } |
| 526 | |
| 527 | //------------------------------------------------------------------------------ |
| 528 | |