| 1 | // Copyright 2016 The SwiftShader Authors. All Rights Reserved. | 
|---|
| 2 | // | 
|---|
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|---|
| 4 | // you may not use this file except in compliance with the License. | 
|---|
| 5 | // You may obtain a copy of the License at | 
|---|
| 6 | // | 
|---|
| 7 | //    http://www.apache.org/licenses/LICENSE-2.0 | 
|---|
| 8 | // | 
|---|
| 9 | // Unless required by applicable law or agreed to in writing, software | 
|---|
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, | 
|---|
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|---|
| 12 | // See the License for the specific language governing permissions and | 
|---|
| 13 | // limitations under the License. | 
|---|
| 14 |  | 
|---|
| 15 | #ifndef sw_Half_hpp | 
|---|
| 16 | #define sw_Half_hpp | 
|---|
| 17 |  | 
|---|
| 18 | #include "Math.hpp" | 
|---|
| 19 |  | 
|---|
| 20 | #include <algorithm> | 
|---|
| 21 | #include <cmath> | 
|---|
| 22 |  | 
|---|
| 23 | namespace sw | 
|---|
| 24 | { | 
|---|
| 25 | class half | 
|---|
| 26 | { | 
|---|
| 27 | public: | 
|---|
| 28 | half() = default; | 
|---|
| 29 | explicit half(float f); | 
|---|
| 30 |  | 
|---|
| 31 | operator float() const; | 
|---|
| 32 |  | 
|---|
| 33 | half &operator=(half h); | 
|---|
| 34 | half &operator=(float f); | 
|---|
| 35 |  | 
|---|
| 36 | private: | 
|---|
| 37 | unsigned short fp16i; | 
|---|
| 38 | }; | 
|---|
| 39 |  | 
|---|
| 40 | inline half shortAsHalf(short s) | 
|---|
| 41 | { | 
|---|
| 42 | union | 
|---|
| 43 | { | 
|---|
| 44 | half h; | 
|---|
| 45 | short s; | 
|---|
| 46 | } hs; | 
|---|
| 47 |  | 
|---|
| 48 | hs.s = s; | 
|---|
| 49 |  | 
|---|
| 50 | return hs.h; | 
|---|
| 51 | } | 
|---|
| 52 |  | 
|---|
| 53 | class RGB9E5 | 
|---|
| 54 | { | 
|---|
| 55 | unsigned int R : 9; | 
|---|
| 56 | unsigned int G : 9; | 
|---|
| 57 | unsigned int B : 9; | 
|---|
| 58 | unsigned int E : 5; | 
|---|
| 59 |  | 
|---|
| 60 | public: | 
|---|
| 61 | RGB9E5(float rgb[3]) : RGB9E5(rgb[0], rgb[1], rgb[2]) | 
|---|
| 62 | { | 
|---|
| 63 | } | 
|---|
| 64 |  | 
|---|
| 65 | RGB9E5(float r, float g, float b) | 
|---|
| 66 | { | 
|---|
| 67 | // Vulkan 1.1.117 section 15.2.1 RGB to Shared Exponent Conversion | 
|---|
| 68 |  | 
|---|
| 69 | // B is the exponent bias (15) | 
|---|
| 70 | constexpr int g_sharedexp_bias = 15; | 
|---|
| 71 |  | 
|---|
| 72 | // N is the number of mantissa bits per component (9) | 
|---|
| 73 | constexpr int g_sharedexp_mantissabits = 9; | 
|---|
| 74 |  | 
|---|
| 75 | // Emax is the maximum allowed biased exponent value (31) | 
|---|
| 76 | constexpr int g_sharedexp_maxexponent = 31; | 
|---|
| 77 |  | 
|---|
| 78 | constexpr float g_sharedexp_max = | 
|---|
| 79 | ((static_cast<float>(1 << g_sharedexp_mantissabits) - 1) / | 
|---|
| 80 | static_cast<float>(1 << g_sharedexp_mantissabits)) * | 
|---|
| 81 | static_cast<float>(1 << (g_sharedexp_maxexponent - g_sharedexp_bias)); | 
|---|
| 82 |  | 
|---|
| 83 | // Clamp components to valid range. NaN becomes 0. | 
|---|
| 84 | const float red_c =   std::min(!(r > 0) ? 0 : r, g_sharedexp_max); | 
|---|
| 85 | const float green_c = std::min(!(g > 0) ? 0 : g, g_sharedexp_max); | 
|---|
| 86 | const float blue_c =  std::min(!(b > 0) ? 0 : b, g_sharedexp_max); | 
|---|
| 87 |  | 
|---|
| 88 | // We're reducing the mantissa to 9 bits, so we must round up if the next | 
|---|
| 89 | // bit is 1. In other words add 0.5 to the new mantissa's position and | 
|---|
| 90 | // allow overflow into the exponent so we can scale correctly. | 
|---|
| 91 | constexpr int half = 1 << (23 - g_sharedexp_mantissabits); | 
|---|
| 92 | const float red_r = bit_cast<float>(bit_cast<int>(red_c) + half); | 
|---|
| 93 | const float green_r = bit_cast<float>(bit_cast<int>(green_c) + half); | 
|---|
| 94 | const float blue_r = bit_cast<float>(bit_cast<int>(blue_c) + half); | 
|---|
| 95 |  | 
|---|
| 96 | // The largest component determines the shared exponent. It can't be lower | 
|---|
| 97 | // than 0 (after bias subtraction) so also limit to the mimimum representable. | 
|---|
| 98 | constexpr float min_s = 0.5f / (1 << g_sharedexp_bias); | 
|---|
| 99 | float max_s = std::max(std::max(red_r, green_r), std::max(blue_r, min_s)); | 
|---|
| 100 |  | 
|---|
| 101 | // Obtain the reciprocal of the shared exponent by inverting the bits, | 
|---|
| 102 | // and scale by the new mantissa's size. Note that the IEEE-754 single-precision | 
|---|
| 103 | // format has an implicit leading 1, but this shared component format does not. | 
|---|
| 104 | float scale = bit_cast<float>((bit_cast<int>(max_s) & 0x7F800000) ^ 0x7F800000) * (1 << (g_sharedexp_mantissabits - 2)); | 
|---|
| 105 |  | 
|---|
| 106 | R = static_cast<unsigned int>(round(red_c * scale)); | 
|---|
| 107 | G = static_cast<unsigned int>(round(green_c * scale)); | 
|---|
| 108 | B = static_cast<unsigned int>(round(blue_c * scale)); | 
|---|
| 109 | E = (bit_cast<unsigned int>(max_s) >> 23) - 127 + 15 + 1; | 
|---|
| 110 | } | 
|---|
| 111 |  | 
|---|
| 112 | operator unsigned int() const | 
|---|
| 113 | { | 
|---|
| 114 | return *reinterpret_cast<const unsigned int*>(this); | 
|---|
| 115 | } | 
|---|
| 116 |  | 
|---|
| 117 | void toRGB16F(half rgb[3]) const | 
|---|
| 118 | { | 
|---|
| 119 | constexpr int offset = 24;   // Exponent bias (15) + number of mantissa bits per component (9) = 24 | 
|---|
| 120 |  | 
|---|
| 121 | const float factor = (1u << E) * (1.0f / (1 << offset)); | 
|---|
| 122 | rgb[0] = half(R * factor); | 
|---|
| 123 | rgb[1] = half(G * factor); | 
|---|
| 124 | rgb[2] = half(B * factor); | 
|---|
| 125 | } | 
|---|
| 126 | }; | 
|---|
| 127 |  | 
|---|
| 128 | class R11G11B10F | 
|---|
| 129 | { | 
|---|
| 130 | unsigned int R : 11; | 
|---|
| 131 | unsigned int G : 11; | 
|---|
| 132 | unsigned int B : 10; | 
|---|
| 133 |  | 
|---|
| 134 | static inline half float11ToFloat16(unsigned short fp11) | 
|---|
| 135 | { | 
|---|
| 136 | return shortAsHalf(fp11 << 4);   // Sign bit 0 | 
|---|
| 137 | } | 
|---|
| 138 |  | 
|---|
| 139 | static inline half float10ToFloat16(unsigned short fp10) | 
|---|
| 140 | { | 
|---|
| 141 | return shortAsHalf(fp10 << 5);   // Sign bit 0 | 
|---|
| 142 | } | 
|---|
| 143 |  | 
|---|
| 144 | inline unsigned short float32ToFloat11(float fp32) | 
|---|
| 145 | { | 
|---|
| 146 | const unsigned int float32MantissaMask = 0x7FFFFF; | 
|---|
| 147 | const unsigned int float32ExponentMask = 0x7F800000; | 
|---|
| 148 | const unsigned int float32SignMask = 0x80000000; | 
|---|
| 149 | const unsigned int float32ValueMask = ~float32SignMask; | 
|---|
| 150 | const unsigned int float32ExponentFirstBit = 23; | 
|---|
| 151 | const unsigned int float32ExponentBias = 127; | 
|---|
| 152 |  | 
|---|
| 153 | const unsigned short float11Max = 0x7BF; | 
|---|
| 154 | const unsigned short float11MantissaMask = 0x3F; | 
|---|
| 155 | const unsigned short float11ExponentMask = 0x7C0; | 
|---|
| 156 | const unsigned short float11BitMask = 0x7FF; | 
|---|
| 157 | const unsigned int float11ExponentBias = 14; | 
|---|
| 158 |  | 
|---|
| 159 | const unsigned int float32Maxfloat11 = 0x477E0000; | 
|---|
| 160 | const unsigned int float32Minfloat11 = 0x38800000; | 
|---|
| 161 |  | 
|---|
| 162 | const unsigned int float32Bits = *reinterpret_cast<unsigned int*>(&fp32); | 
|---|
| 163 | const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask; | 
|---|
| 164 |  | 
|---|
| 165 | unsigned int float32Val = float32Bits & float32ValueMask; | 
|---|
| 166 |  | 
|---|
| 167 | if((float32Val & float32ExponentMask) == float32ExponentMask) | 
|---|
| 168 | { | 
|---|
| 169 | // INF or NAN | 
|---|
| 170 | if((float32Val & float32MantissaMask) != 0) | 
|---|
| 171 | { | 
|---|
| 172 | return float11ExponentMask | | 
|---|
| 173 | (((float32Val >> 17) | (float32Val >> 11) | (float32Val >> 6) | (float32Val)) & | 
|---|
| 174 | float11MantissaMask); | 
|---|
| 175 | } | 
|---|
| 176 | else if(float32Sign) | 
|---|
| 177 | { | 
|---|
| 178 | // -INF is clamped to 0 since float11 is positive only | 
|---|
| 179 | return 0; | 
|---|
| 180 | } | 
|---|
| 181 | else | 
|---|
| 182 | { | 
|---|
| 183 | return float11ExponentMask; | 
|---|
| 184 | } | 
|---|
| 185 | } | 
|---|
| 186 | else if(float32Sign) | 
|---|
| 187 | { | 
|---|
| 188 | // float11 is positive only, so clamp to zero | 
|---|
| 189 | return 0; | 
|---|
| 190 | } | 
|---|
| 191 | else if(float32Val > float32Maxfloat11) | 
|---|
| 192 | { | 
|---|
| 193 | // The number is too large to be represented as a float11, set to max | 
|---|
| 194 | return float11Max; | 
|---|
| 195 | } | 
|---|
| 196 | else | 
|---|
| 197 | { | 
|---|
| 198 | if(float32Val < float32Minfloat11) | 
|---|
| 199 | { | 
|---|
| 200 | // The number is too small to be represented as a normalized float11 | 
|---|
| 201 | // Convert it to a denormalized value. | 
|---|
| 202 | const unsigned int shift = (float32ExponentBias - float11ExponentBias) - | 
|---|
| 203 | (float32Val >> float32ExponentFirstBit); | 
|---|
| 204 | float32Val = | 
|---|
| 205 | ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift; | 
|---|
| 206 | } | 
|---|
| 207 | else | 
|---|
| 208 | { | 
|---|
| 209 | // Rebias the exponent to represent the value as a normalized float11 | 
|---|
| 210 | float32Val += 0xC8000000; | 
|---|
| 211 | } | 
|---|
| 212 |  | 
|---|
| 213 | return ((float32Val + 0xFFFF + ((float32Val >> 17) & 1)) >> 17) & float11BitMask; | 
|---|
| 214 | } | 
|---|
| 215 | } | 
|---|
| 216 |  | 
|---|
| 217 | inline unsigned short float32ToFloat10(float fp32) | 
|---|
| 218 | { | 
|---|
| 219 | const unsigned int float32MantissaMask = 0x7FFFFF; | 
|---|
| 220 | const unsigned int float32ExponentMask = 0x7F800000; | 
|---|
| 221 | const unsigned int float32SignMask = 0x80000000; | 
|---|
| 222 | const unsigned int float32ValueMask = ~float32SignMask; | 
|---|
| 223 | const unsigned int float32ExponentFirstBit = 23; | 
|---|
| 224 | const unsigned int float32ExponentBias = 127; | 
|---|
| 225 |  | 
|---|
| 226 | const unsigned short float10Max = 0x3DF; | 
|---|
| 227 | const unsigned short float10MantissaMask = 0x1F; | 
|---|
| 228 | const unsigned short float10ExponentMask = 0x3E0; | 
|---|
| 229 | const unsigned short float10BitMask = 0x3FF; | 
|---|
| 230 | const unsigned int float10ExponentBias = 14; | 
|---|
| 231 |  | 
|---|
| 232 | const unsigned int float32Maxfloat10 = 0x477C0000; | 
|---|
| 233 | const unsigned int float32Minfloat10 = 0x38800000; | 
|---|
| 234 |  | 
|---|
| 235 | const unsigned int float32Bits = *reinterpret_cast<unsigned int*>(&fp32); | 
|---|
| 236 | const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask; | 
|---|
| 237 |  | 
|---|
| 238 | unsigned int float32Val = float32Bits & float32ValueMask; | 
|---|
| 239 |  | 
|---|
| 240 | if((float32Val & float32ExponentMask) == float32ExponentMask) | 
|---|
| 241 | { | 
|---|
| 242 | // INF or NAN | 
|---|
| 243 | if((float32Val & float32MantissaMask) != 0) | 
|---|
| 244 | { | 
|---|
| 245 | return float10ExponentMask | | 
|---|
| 246 | (((float32Val >> 18) | (float32Val >> 13) | (float32Val >> 3) | (float32Val)) & | 
|---|
| 247 | float10MantissaMask); | 
|---|
| 248 | } | 
|---|
| 249 | else if(float32Sign) | 
|---|
| 250 | { | 
|---|
| 251 | // -INF is clamped to 0 since float11 is positive only | 
|---|
| 252 | return 0; | 
|---|
| 253 | } | 
|---|
| 254 | else | 
|---|
| 255 | { | 
|---|
| 256 | return float10ExponentMask; | 
|---|
| 257 | } | 
|---|
| 258 | } | 
|---|
| 259 | else if(float32Sign) | 
|---|
| 260 | { | 
|---|
| 261 | // float10 is positive only, so clamp to zero | 
|---|
| 262 | return 0; | 
|---|
| 263 | } | 
|---|
| 264 | else if(float32Val > float32Maxfloat10) | 
|---|
| 265 | { | 
|---|
| 266 | // The number is too large to be represented as a float11, set to max | 
|---|
| 267 | return float10Max; | 
|---|
| 268 | } | 
|---|
| 269 | else | 
|---|
| 270 | { | 
|---|
| 271 | if(float32Val < float32Minfloat10) | 
|---|
| 272 | { | 
|---|
| 273 | // The number is too small to be represented as a normalized float11 | 
|---|
| 274 | // Convert it to a denormalized value. | 
|---|
| 275 | const unsigned int shift = (float32ExponentBias - float10ExponentBias) - | 
|---|
| 276 | (float32Val >> float32ExponentFirstBit); | 
|---|
| 277 | float32Val = | 
|---|
| 278 | ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift; | 
|---|
| 279 | } | 
|---|
| 280 | else | 
|---|
| 281 | { | 
|---|
| 282 | // Rebias the exponent to represent the value as a normalized float11 | 
|---|
| 283 | float32Val += 0xC8000000; | 
|---|
| 284 | } | 
|---|
| 285 |  | 
|---|
| 286 | return ((float32Val + 0x1FFFF + ((float32Val >> 18) & 1)) >> 18) & float10BitMask; | 
|---|
| 287 | } | 
|---|
| 288 | } | 
|---|
| 289 |  | 
|---|
| 290 | public: | 
|---|
| 291 | R11G11B10F(float rgb[3]) | 
|---|
| 292 | { | 
|---|
| 293 | R = float32ToFloat11(rgb[0]); | 
|---|
| 294 | G = float32ToFloat11(rgb[1]); | 
|---|
| 295 | B = float32ToFloat10(rgb[2]); | 
|---|
| 296 | } | 
|---|
| 297 |  | 
|---|
| 298 | operator unsigned int() const | 
|---|
| 299 | { | 
|---|
| 300 | return *reinterpret_cast<const unsigned int*>(this); | 
|---|
| 301 | } | 
|---|
| 302 |  | 
|---|
| 303 | void toRGB16F(half rgb[3]) const | 
|---|
| 304 | { | 
|---|
| 305 | rgb[0] = float11ToFloat16(R); | 
|---|
| 306 | rgb[1] = float11ToFloat16(G); | 
|---|
| 307 | rgb[2] = float10ToFloat16(B); | 
|---|
| 308 | } | 
|---|
| 309 | }; | 
|---|
| 310 | } | 
|---|
| 311 |  | 
|---|
| 312 | #endif   // sw_Half_hpp | 
|---|
| 313 |  | 
|---|