| 1 | /* | 
|---|
| 2 | * Copyright 2014 Google Inc. | 
|---|
| 3 | * | 
|---|
| 4 | * Use of this source code is governed by a BSD-style license that can be | 
|---|
| 5 | * found in the LICENSE file. | 
|---|
| 6 | */ | 
|---|
| 7 |  | 
|---|
| 8 | #ifndef SkHalf_DEFINED | 
|---|
| 9 | #define SkHalf_DEFINED | 
|---|
| 10 |  | 
|---|
| 11 | #include "include/core/SkTypes.h" | 
|---|
| 12 | #include "include/private/SkNx.h" | 
|---|
| 13 |  | 
|---|
| 14 | // 16-bit floating point value | 
|---|
| 15 | // format is 1 bit sign, 5 bits exponent, 10 bits mantissa | 
|---|
| 16 | // only used for storage | 
|---|
| 17 | typedef uint16_t SkHalf; | 
|---|
| 18 |  | 
|---|
| 19 | static constexpr uint16_t SK_HalfMin     = 0x0400; // 2^-14  (minimum positive normal value) | 
|---|
| 20 | static constexpr uint16_t SK_HalfMax     = 0x7bff; // 65504 | 
|---|
| 21 | static constexpr uint16_t SK_HalfEpsilon = 0x1400; // 2^-10 | 
|---|
| 22 | static constexpr uint16_t SK_Half1       = 0x3C00; // 1 | 
|---|
| 23 |  | 
|---|
| 24 | // convert between half and single precision floating point | 
|---|
| 25 | float SkHalfToFloat(SkHalf h); | 
|---|
| 26 | SkHalf SkFloatToHalf(float f); | 
|---|
| 27 |  | 
|---|
| 28 | // Convert between half and single precision floating point, | 
|---|
| 29 | // assuming inputs and outputs are both finite, and may | 
|---|
| 30 | // flush values which would be denormal half floats to zero. | 
|---|
| 31 | static inline Sk4f SkHalfToFloat_finite_ftz(uint64_t); | 
|---|
| 32 | static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f&); | 
|---|
| 33 |  | 
|---|
| 34 | // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ // | 
|---|
| 35 |  | 
|---|
| 36 | // Like the serial versions in SkHalf.cpp, these are based on | 
|---|
| 37 | // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ | 
|---|
| 38 |  | 
|---|
| 39 | // GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use inline assembly. | 
|---|
| 40 |  | 
|---|
| 41 | static inline Sk4f SkHalfToFloat_finite_ftz(uint64_t rgba) { | 
|---|
| 42 | Sk4h hs = Sk4h::Load(&rgba); | 
|---|
| 43 | #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) | 
|---|
| 44 | float32x4_t fs; | 
|---|
| 45 | asm ( "fcvtl %[fs].4s, %[hs].4h   \n"// vcvt_f32_f16(...) | 
|---|
| 46 | : [fs] "=w"(fs)                   // =w: write-only NEON register | 
|---|
| 47 | : [hs] "w"(hs.fVec));             //  w: read-only NEON register | 
|---|
| 48 | return fs; | 
|---|
| 49 | #else | 
|---|
| 50 | Sk4i bits     = SkNx_cast<int>(hs),  // Expand to 32 bit. | 
|---|
| 51 | sign     = bits & 0x00008000,   // Save the sign bit for later... | 
|---|
| 52 | positive = bits ^ sign,         // ...but strip it off for now. | 
|---|
| 53 | is_norm  = 0x03ff < positive;   // Exponent > 0? | 
|---|
| 54 |  | 
|---|
| 55 | // For normal half floats, extend the mantissa by 13 zero bits, | 
|---|
| 56 | // then adjust the exponent from 15 bias to 127 bias. | 
|---|
| 57 | Sk4i norm = (positive << 13) + ((127 - 15) << 23); | 
|---|
| 58 |  | 
|---|
| 59 | Sk4i merged = (sign << 16) | (norm & is_norm); | 
|---|
| 60 | return Sk4f::Load(&merged); | 
|---|
| 61 | #endif | 
|---|
| 62 | } | 
|---|
| 63 |  | 
|---|
| 64 | static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f& fs) { | 
|---|
| 65 | #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) | 
|---|
| 66 | float32x4_t vec = fs.fVec; | 
|---|
| 67 | asm ( "fcvtn %[vec].4h, %[vec].4s  \n"// vcvt_f16_f32(vec) | 
|---|
| 68 | : [vec] "+w"(vec));                // +w: read-write NEON register | 
|---|
| 69 | return vreinterpret_u16_f32(vget_low_f32(vec)); | 
|---|
| 70 | #else | 
|---|
| 71 | Sk4i bits         = Sk4i::Load(&fs), | 
|---|
| 72 | sign         = bits & 0x80000000,      // Save the sign bit for later... | 
|---|
| 73 | positive     = bits ^ sign,            // ...but strip it off for now. | 
|---|
| 74 | will_be_norm = 0x387fdfff < positive;  // greater than largest denorm half? | 
|---|
| 75 |  | 
|---|
| 76 | // For normal half floats, adjust the exponent from 127 bias to 15 bias, | 
|---|
| 77 | // then drop the bottom 13 mantissa bits. | 
|---|
| 78 | Sk4i norm = (positive - ((127 - 15) << 23)) >> 13; | 
|---|
| 79 |  | 
|---|
| 80 | Sk4i merged = (sign >> 16) | (will_be_norm & norm); | 
|---|
| 81 | return SkNx_cast<uint16_t>(merged); | 
|---|
| 82 | #endif | 
|---|
| 83 | } | 
|---|
| 84 |  | 
|---|
| 85 | #endif | 
|---|
| 86 |  | 
|---|