| 1 | // Copyright 2015 Google Inc. All Rights Reserved. | 
|---|
| 2 | // | 
|---|
| 3 | // Use of this source code is governed by a BSD-style license | 
|---|
| 4 | // that can be found in the COPYING file in the root of the source | 
|---|
| 5 | // tree. An additional intellectual property rights grant can be found | 
|---|
| 6 | // in the file PATENTS. All contributing project authors may | 
|---|
| 7 | // be found in the AUTHORS file in the root of the source tree. | 
|---|
| 8 | // ----------------------------------------------------------------------------- | 
|---|
| 9 | // | 
|---|
| 10 | // NEON variant of methods for lossless encoder | 
|---|
| 11 | // | 
|---|
| 12 | // Author: Skal (pascal.massimino@gmail.com) | 
|---|
| 13 |  | 
|---|
| 14 | #include "./dsp.h" | 
|---|
| 15 |  | 
|---|
| 16 | #if defined(WEBP_USE_NEON) | 
|---|
| 17 |  | 
|---|
| 18 | #include <arm_neon.h> | 
|---|
| 19 |  | 
|---|
| 20 | #include "./lossless.h" | 
|---|
| 21 | #include "./neon.h" | 
|---|
| 22 |  | 
|---|
| 23 | //------------------------------------------------------------------------------ | 
|---|
| 24 | // Subtract-Green Transform | 
|---|
| 25 |  | 
|---|
| 26 | // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use | 
|---|
| 27 | // non-standard versions there. | 
|---|
| 28 | #if defined(__APPLE__) && defined(__aarch64__) && \ | 
|---|
| 29 | defined(__apple_build_version__) && (__apple_build_version__< 6020037) | 
|---|
| 30 | #define USE_VTBLQ | 
|---|
| 31 | #endif | 
|---|
| 32 |  | 
|---|
| 33 | #ifdef USE_VTBLQ | 
|---|
| 34 | // 255 = byte will be zeroed | 
|---|
| 35 | static const uint8_t kGreenShuffle[16] = { | 
|---|
| 36 | 1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255 | 
|---|
| 37 | }; | 
|---|
| 38 |  | 
|---|
| 39 | static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb, | 
|---|
| 40 | const uint8x16_t shuffle) { | 
|---|
| 41 | return vcombine_u8(vtbl1q_u8(argb, vget_low_u8(shuffle)), | 
|---|
| 42 | vtbl1q_u8(argb, vget_high_u8(shuffle))); | 
|---|
| 43 | } | 
|---|
| 44 | #else  // !USE_VTBLQ | 
|---|
| 45 | // 255 = byte will be zeroed | 
|---|
| 46 | static const uint8_t kGreenShuffle[8] = { 1, 255, 1, 255, 5, 255, 5, 255  }; | 
|---|
| 47 |  | 
|---|
| 48 | static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb, | 
|---|
| 49 | const uint8x8_t shuffle) { | 
|---|
| 50 | return vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle), | 
|---|
| 51 | vtbl1_u8(vget_high_u8(argb), shuffle)); | 
|---|
| 52 | } | 
|---|
| 53 | #endif  // USE_VTBLQ | 
|---|
| 54 |  | 
|---|
| 55 | static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) { | 
|---|
| 56 | const uint32_t* const end = argb_data + (num_pixels & ~3); | 
|---|
| 57 | #ifdef USE_VTBLQ | 
|---|
| 58 | const uint8x16_t shuffle = vld1q_u8(kGreenShuffle); | 
|---|
| 59 | #else | 
|---|
| 60 | const uint8x8_t shuffle = vld1_u8(kGreenShuffle); | 
|---|
| 61 | #endif | 
|---|
| 62 | for (; argb_data < end; argb_data += 4) { | 
|---|
| 63 | const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data); | 
|---|
| 64 | const uint8x16_t greens = DoGreenShuffle(argb, shuffle); | 
|---|
| 65 | vst1q_u8((uint8_t*)argb_data, vsubq_u8(argb, greens)); | 
|---|
| 66 | } | 
|---|
| 67 | // fallthrough and finish off with plain-C | 
|---|
| 68 | VP8LSubtractGreenFromBlueAndRed_C(argb_data, num_pixels & 3); | 
|---|
| 69 | } | 
|---|
| 70 |  | 
|---|
| 71 | //------------------------------------------------------------------------------ | 
|---|
| 72 | // Color Transform | 
|---|
| 73 |  | 
|---|
| 74 | static void TransformColor(const VP8LMultipliers* const m, | 
|---|
| 75 | uint32_t* argb_data, int num_pixels) { | 
|---|
| 76 | // sign-extended multiplying constants, pre-shifted by 6. | 
|---|
| 77 | #define CST(X)  (((int16_t)(m->X << 8)) >> 6) | 
|---|
| 78 | const int16_t rb[8] = { | 
|---|
| 79 | CST(green_to_blue_), CST(green_to_red_), | 
|---|
| 80 | CST(green_to_blue_), CST(green_to_red_), | 
|---|
| 81 | CST(green_to_blue_), CST(green_to_red_), | 
|---|
| 82 | CST(green_to_blue_), CST(green_to_red_) | 
|---|
| 83 | }; | 
|---|
| 84 | const int16x8_t mults_rb = vld1q_s16(rb); | 
|---|
| 85 | const int16_t b2[8] = { | 
|---|
| 86 | 0, CST(red_to_blue_), 0, CST(red_to_blue_), | 
|---|
| 87 | 0, CST(red_to_blue_), 0, CST(red_to_blue_), | 
|---|
| 88 | }; | 
|---|
| 89 | const int16x8_t mults_b2 = vld1q_s16(b2); | 
|---|
| 90 | #undef CST | 
|---|
| 91 | #ifdef USE_VTBLQ | 
|---|
| 92 | static const uint8_t kg0g0[16] = { | 
|---|
| 93 | 255, 1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13 | 
|---|
| 94 | }; | 
|---|
| 95 | const uint8x16_t shuffle = vld1q_u8(kg0g0); | 
|---|
| 96 | #else | 
|---|
| 97 | static const uint8_t k0g0g[8] = { 255, 1, 255, 1, 255, 5, 255, 5 }; | 
|---|
| 98 | const uint8x8_t shuffle = vld1_u8(k0g0g); | 
|---|
| 99 | #endif | 
|---|
| 100 | const uint32x4_t mask_rb = vdupq_n_u32(0x00ff00ffu);  // red-blue masks | 
|---|
| 101 | int i; | 
|---|
| 102 | for (i = 0; i + 4 <= num_pixels; i += 4) { | 
|---|
| 103 | const uint8x16_t in = vld1q_u8((uint8_t*)(argb_data + i)); | 
|---|
| 104 | // 0 g 0 g | 
|---|
| 105 | const uint8x16_t greens = DoGreenShuffle(in, shuffle); | 
|---|
| 106 | // x dr  x db1 | 
|---|
| 107 | const int16x8_t A = vqdmulhq_s16(vreinterpretq_s16_u8(greens), mults_rb); | 
|---|
| 108 | // r 0   b   0 | 
|---|
| 109 | const int16x8_t B = vshlq_n_s16(vreinterpretq_s16_u8(in), 8); | 
|---|
| 110 | // x db2 0   0 | 
|---|
| 111 | const int16x8_t C = vqdmulhq_s16(B, mults_b2); | 
|---|
| 112 | // 0 0   x db2 | 
|---|
| 113 | const uint32x4_t D = vshrq_n_u32(vreinterpretq_u32_s16(C), 16); | 
|---|
| 114 | // x dr  x  db | 
|---|
| 115 | const int8x16_t E = vaddq_s8(vreinterpretq_s8_u32(D), | 
|---|
| 116 | vreinterpretq_s8_s16(A)); | 
|---|
| 117 | // 0 dr  0  db | 
|---|
| 118 | const uint32x4_t F = vandq_u32(vreinterpretq_u32_s8(E), mask_rb); | 
|---|
| 119 | const int8x16_t out = vsubq_s8(vreinterpretq_s8_u8(in), | 
|---|
| 120 | vreinterpretq_s8_u32(F)); | 
|---|
| 121 | vst1q_s8((int8_t*)(argb_data + i), out); | 
|---|
| 122 | } | 
|---|
| 123 | // fallthrough and finish off with plain-C | 
|---|
| 124 | VP8LTransformColor_C(m, argb_data + i, num_pixels - i); | 
|---|
| 125 | } | 
|---|
| 126 |  | 
|---|
| 127 | #undef USE_VTBLQ | 
|---|
| 128 |  | 
|---|
| 129 | //------------------------------------------------------------------------------ | 
|---|
| 130 | // Entry point | 
|---|
| 131 |  | 
|---|
| 132 | extern void VP8LEncDspInitNEON(void); | 
|---|
| 133 |  | 
|---|
| 134 | WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitNEON(void) { | 
|---|
| 135 | VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed; | 
|---|
| 136 | VP8LTransformColor = TransformColor; | 
|---|
| 137 | } | 
|---|
| 138 |  | 
|---|
| 139 | #else  // !WEBP_USE_NEON | 
|---|
| 140 |  | 
|---|
| 141 | WEBP_DSP_INIT_STUB(VP8LEncDspInitNEON) | 
|---|
| 142 |  | 
|---|
| 143 | #endif  // WEBP_USE_NEON | 
|---|
| 144 |  | 
|---|