| 1 | // SPDX-License-Identifier: Apache-2.0 |
| 2 | // ---------------------------------------------------------------------------- |
| 3 | // Copyright 2011-2021 Arm Limited |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| 6 | // use this file except in compliance with the License. You may obtain a copy |
| 7 | // of the License at: |
| 8 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // |
| 11 | // Unless required by applicable law or agreed to in writing, software |
| 12 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 13 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 14 | // License for the specific language governing permissions and limitations |
| 15 | // under the License. |
| 16 | // ---------------------------------------------------------------------------- |
| 17 | |
| 18 | /** |
| 19 | * @brief Soft-float library for IEEE-754. |
| 20 | */ |
| 21 | #if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0) |
| 22 | |
| 23 | #include "astcenc_mathlib.h" |
| 24 | |
| 25 | /* sized soft-float types. These are mapped to the sized integer |
| 26 | types of C99, instead of C's floating-point types; this is because |
| 27 | the library needs to maintain exact, bit-level control on all |
| 28 | operations on these data types. */ |
| 29 | typedef uint16_t sf16; |
| 30 | typedef uint32_t sf32; |
| 31 | |
| 32 | /****************************************** |
| 33 | helper functions and their lookup tables |
| 34 | ******************************************/ |
| 35 | /* count leading zeros functions. Only used when the input is nonzero. */ |
| 36 | |
| 37 | #if defined(__GNUC__) && (defined(__i386) || defined(__amd64)) |
| 38 | #elif defined(__arm__) && defined(__ARMCC_VERSION) |
| 39 | #elif defined(__arm__) && defined(__GNUC__) |
| 40 | #else |
| 41 | /* table used for the slow default versions. */ |
| 42 | static const uint8_t clz_table[256] = |
| 43 | { |
| 44 | 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, |
| 45 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
| 46 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 47 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 48 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 49 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 50 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 51 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 52 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 53 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 54 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 55 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 56 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 57 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 58 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 59 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
| 60 | }; |
| 61 | #endif |
| 62 | |
| 63 | /* |
| 64 | 32-bit count-leading-zeros function: use the Assembly instruction whenever possible. */ |
| 65 | static uint32_t clz32(uint32_t inp) |
| 66 | { |
| 67 | #if defined(__GNUC__) && (defined(__i386) || defined(__amd64)) |
| 68 | uint32_t bsr; |
| 69 | __asm__("bsrl %1, %0" : "=r" (bsr):"r" (inp | 1)); |
| 70 | return 31 - bsr; |
| 71 | #else |
| 72 | #if defined(__arm__) && defined(__ARMCC_VERSION) |
| 73 | return __clz(inp); /* armcc builtin */ |
| 74 | #else |
| 75 | #if defined(__arm__) && defined(__GNUC__) |
| 76 | uint32_t lz; |
| 77 | __asm__("clz %0, %1" : "=r" (lz):"r" (inp)); |
| 78 | return lz; |
| 79 | #else |
| 80 | /* slow default version */ |
| 81 | uint32_t summa = 24; |
| 82 | if (inp >= UINT32_C(0x10000)) |
| 83 | { |
| 84 | inp >>= 16; |
| 85 | summa -= 16; |
| 86 | } |
| 87 | if (inp >= UINT32_C(0x100)) |
| 88 | { |
| 89 | inp >>= 8; |
| 90 | summa -= 8; |
| 91 | } |
| 92 | return summa + clz_table[inp]; |
| 93 | #endif |
| 94 | #endif |
| 95 | #endif |
| 96 | } |
| 97 | |
| 98 | /* the five rounding modes that IEEE-754r defines */ |
| 99 | typedef enum |
| 100 | { |
| 101 | SF_UP = 0, /* round towards positive infinity */ |
| 102 | SF_DOWN = 1, /* round towards negative infinity */ |
| 103 | SF_TOZERO = 2, /* round towards zero */ |
| 104 | SF_NEARESTEVEN = 3, /* round toward nearest value; if mid-between, round to even value */ |
| 105 | SF_NEARESTAWAY = 4 /* round toward nearest value; if mid-between, round away from zero */ |
| 106 | } roundmode; |
| 107 | |
| 108 | |
| 109 | static uint32_t rtne_shift32(uint32_t inp, uint32_t shamt) |
| 110 | { |
| 111 | uint32_t vl1 = UINT32_C(1) << shamt; |
| 112 | uint32_t inp2 = inp + (vl1 >> 1); /* added 0.5 ULP */ |
| 113 | uint32_t msk = (inp | UINT32_C(1)) & vl1; /* nonzero if odd. '| 1' forces it to 1 if the shamt is 0. */ |
| 114 | msk--; /* negative if even, nonnegative if odd. */ |
| 115 | inp2 -= (msk >> 31); /* subtract epsilon before shift if even. */ |
| 116 | inp2 >>= shamt; |
| 117 | return inp2; |
| 118 | } |
| 119 | |
| 120 | static uint32_t rtna_shift32(uint32_t inp, uint32_t shamt) |
| 121 | { |
| 122 | uint32_t vl1 = (UINT32_C(1) << shamt) >> 1; |
| 123 | inp += vl1; |
| 124 | inp >>= shamt; |
| 125 | return inp; |
| 126 | } |
| 127 | |
| 128 | static uint32_t rtup_shift32(uint32_t inp, uint32_t shamt) |
| 129 | { |
| 130 | uint32_t vl1 = UINT32_C(1) << shamt; |
| 131 | inp += vl1; |
| 132 | inp--; |
| 133 | inp >>= shamt; |
| 134 | return inp; |
| 135 | } |
| 136 | |
| 137 | /* convert from FP16 to FP32. */ |
| 138 | static sf32 sf16_to_sf32(sf16 inp) |
| 139 | { |
| 140 | uint32_t inpx = inp; |
| 141 | |
| 142 | /* |
| 143 | This table contains, for every FP16 sign/exponent value combination, |
| 144 | the difference between the input FP16 value and the value obtained |
| 145 | by shifting the correct FP32 result right by 13 bits. |
| 146 | This table allows us to handle every case except denormals and NaN |
| 147 | with just 1 table lookup, 2 shifts and 1 add. |
| 148 | */ |
| 149 | |
| 150 | #define WITH_MSB(a) (UINT32_C(a) | (1u << 31)) |
| 151 | static const uint32_t tbl[64] = |
| 152 | { |
| 153 | WITH_MSB(0x00000), 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, |
| 154 | 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, |
| 155 | 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, |
| 156 | 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, 0x1C000, WITH_MSB(0x38000), |
| 157 | WITH_MSB(0x38000), 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, |
| 158 | 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, |
| 159 | 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, |
| 160 | 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, 0x54000, WITH_MSB(0x70000) |
| 161 | }; |
| 162 | |
| 163 | uint32_t res = tbl[inpx >> 10]; |
| 164 | res += inpx; |
| 165 | |
| 166 | /* Normal cases: MSB of 'res' not set. */ |
| 167 | if ((res & WITH_MSB(0)) == 0) |
| 168 | { |
| 169 | return res << 13; |
| 170 | } |
| 171 | |
| 172 | /* Infinity and Zero: 10 LSB of 'res' not set. */ |
| 173 | if ((res & 0x3FF) == 0) |
| 174 | { |
| 175 | return res << 13; |
| 176 | } |
| 177 | |
| 178 | /* NaN: the exponent field of 'inp' is non-zero. */ |
| 179 | if ((inpx & 0x7C00) != 0) |
| 180 | { |
| 181 | /* All NaNs are quietened. */ |
| 182 | return (res << 13) | 0x400000; |
| 183 | } |
| 184 | |
| 185 | /* Denormal cases */ |
| 186 | uint32_t sign = (inpx & 0x8000) << 16; |
| 187 | uint32_t mskval = inpx & 0x7FFF; |
| 188 | uint32_t leadingzeroes = clz32(mskval); |
| 189 | mskval <<= leadingzeroes; |
| 190 | return (mskval >> 8) + ((0x85 - leadingzeroes) << 23) + sign; |
| 191 | } |
| 192 | |
| 193 | /* Conversion routine that converts from FP32 to FP16. It supports denormals and all rounding modes. If a NaN is given as input, it is quietened. */ |
| 194 | static sf16 sf32_to_sf16(sf32 inp, roundmode rmode) |
| 195 | { |
| 196 | /* for each possible sign/exponent combination, store a case index. This gives a 512-byte table */ |
| 197 | static const uint8_t tab[512] { |
| 198 | 0, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, |
| 199 | 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, |
| 200 | 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, |
| 201 | 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, |
| 202 | 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, |
| 203 | 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, |
| 204 | 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, |
| 205 | 20, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, |
| 206 | 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 40, |
| 207 | 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, |
| 208 | 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, |
| 209 | 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, |
| 210 | 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, |
| 211 | 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, |
| 212 | 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, |
| 213 | 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 50, |
| 214 | |
| 215 | 5, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, |
| 216 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, |
| 217 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, |
| 218 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, |
| 219 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, |
| 220 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, |
| 221 | 15, 15, 15, 15, 15, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, |
| 222 | 25, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, |
| 223 | 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 45, |
| 224 | 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, |
| 225 | 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, |
| 226 | 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, |
| 227 | 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, |
| 228 | 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, |
| 229 | 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, |
| 230 | 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 55, |
| 231 | }; |
| 232 | |
| 233 | /* many of the cases below use a case-dependent magic constant. So we look up a magic constant before actually performing the switch. This table allows us to group cases, thereby minimizing code |
| 234 | size. */ |
| 235 | static const uint32_t tabx[60] { |
| 236 | UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x80000000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000), |
| 237 | UINT32_C(1), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x8001), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000), |
| 238 | UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000), UINT32_C(0x8000), |
| 239 | UINT32_C(0xC8001FFF), UINT32_C(0xC8000000), UINT32_C(0xC8000000), UINT32_C(0xC8000FFF), UINT32_C(0xC8001000), |
| 240 | UINT32_C(0x58000000), UINT32_C(0x38001FFF), UINT32_C(0x58000000), UINT32_C(0x58000FFF), UINT32_C(0x58001000), |
| 241 | UINT32_C(0x7C00), UINT32_C(0x7BFF), UINT32_C(0x7BFF), UINT32_C(0x7C00), UINT32_C(0x7C00), |
| 242 | UINT32_C(0xFBFF), UINT32_C(0xFC00), UINT32_C(0xFBFF), UINT32_C(0xFC00), UINT32_C(0xFC00), |
| 243 | UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000), UINT32_C(0x90000000), |
| 244 | UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000), UINT32_C(0x20000000) |
| 245 | }; |
| 246 | |
| 247 | uint32_t p; |
| 248 | uint32_t idx = rmode + tab[inp >> 23]; |
| 249 | uint32_t vlx = tabx[idx]; |
| 250 | switch (idx) |
| 251 | { |
| 252 | /* |
| 253 | Positive number which may be Infinity or NaN. |
| 254 | We need to check whether it is NaN; if it is, quieten it by setting the top bit of the mantissa. |
| 255 | (If we don't do this quieting, then a NaN that is distinguished only by having |
| 256 | its low-order bits set, would be turned into an INF. */ |
| 257 | case 50: |
| 258 | case 51: |
| 259 | case 52: |
| 260 | case 53: |
| 261 | case 54: |
| 262 | case 55: |
| 263 | case 56: |
| 264 | case 57: |
| 265 | case 58: |
| 266 | case 59: |
| 267 | /* |
| 268 | the input value is 0x7F800000 or 0xFF800000 if it is INF. |
| 269 | By subtracting 1, we get 7F7FFFFF or FF7FFFFF, that is, bit 23 becomes zero. |
| 270 | For NaNs, however, this operation will keep bit 23 with the value 1. |
| 271 | We can then extract bit 23, and logical-OR bit 9 of the result with this |
| 272 | bit in order to quieten the NaN (a Quiet NaN is a NaN where the top bit |
| 273 | of the mantissa is set.) |
| 274 | */ |
| 275 | p = (inp - 1) & UINT32_C(0x800000); /* zero if INF, nonzero if NaN. */ |
| 276 | return static_cast<sf16>(((inp + vlx) >> 13) | (p >> 14)); |
| 277 | /* |
| 278 | positive, exponent = 0, round-mode == UP; need to check whether number actually is 0. |
| 279 | If it is, then return 0, else return 1 (the smallest representable nonzero number) |
| 280 | */ |
| 281 | case 0: |
| 282 | /* |
| 283 | -inp will set the MSB if the input number is nonzero. |
| 284 | Thus (-inp) >> 31 will turn into 0 if the input number is 0 and 1 otherwise. |
| 285 | */ |
| 286 | return static_cast<sf16>(static_cast<uint32_t>((-static_cast<int32_t>(inp))) >> 31); |
| 287 | |
| 288 | /* |
| 289 | negative, exponent = , round-mode == DOWN, need to check whether number is |
| 290 | actually 0. If it is, return 0x8000 ( float -0.0 ) |
| 291 | Else return the smallest negative number ( 0x8001 ) */ |
| 292 | case 6: |
| 293 | /* |
| 294 | in this case 'vlx' is 0x80000000. By subtracting the input value from it, |
| 295 | we obtain a value that is 0 if the input value is in fact zero and has |
| 296 | the MSB set if it isn't. We then right-shift the value by 31 places to |
| 297 | get a value that is 0 if the input is -0.0 and 1 otherwise. |
| 298 | */ |
| 299 | return static_cast<sf16>(((vlx - inp) >> 31) + UINT32_C(0x8000)); |
| 300 | |
| 301 | /* |
| 302 | for all other cases involving underflow/overflow, we don't need to |
| 303 | do actual tests; we just return 'vlx'. |
| 304 | */ |
| 305 | case 1: |
| 306 | case 2: |
| 307 | case 3: |
| 308 | case 4: |
| 309 | case 5: |
| 310 | case 7: |
| 311 | case 8: |
| 312 | case 9: |
| 313 | case 10: |
| 314 | case 11: |
| 315 | case 12: |
| 316 | case 13: |
| 317 | case 14: |
| 318 | case 15: |
| 319 | case 16: |
| 320 | case 17: |
| 321 | case 18: |
| 322 | case 19: |
| 323 | case 40: |
| 324 | case 41: |
| 325 | case 42: |
| 326 | case 43: |
| 327 | case 44: |
| 328 | case 45: |
| 329 | case 46: |
| 330 | case 47: |
| 331 | case 48: |
| 332 | case 49: |
| 333 | return static_cast<sf16>(vlx); |
| 334 | |
| 335 | /* |
| 336 | for normal numbers, 'vlx' is the difference between the FP32 value of a number and the |
| 337 | FP16 representation of the same number left-shifted by 13 places. In addition, a rounding constant is |
| 338 | baked into 'vlx': for rounding-away-from zero, the constant is 2^13 - 1, causing roundoff away |
| 339 | from zero. for round-to-nearest away, the constant is 2^12, causing roundoff away from zero. |
| 340 | for round-to-nearest-even, the constant is 2^12 - 1. This causes correct round-to-nearest-even |
| 341 | except for odd input numbers. For odd input numbers, we need to add 1 to the constant. */ |
| 342 | |
| 343 | /* normal number, all rounding modes except round-to-nearest-even: */ |
| 344 | case 30: |
| 345 | case 31: |
| 346 | case 32: |
| 347 | case 34: |
| 348 | case 35: |
| 349 | case 36: |
| 350 | case 37: |
| 351 | case 39: |
| 352 | return static_cast<sf16>((inp + vlx) >> 13); |
| 353 | |
| 354 | /* normal number, round-to-nearest-even. */ |
| 355 | case 33: |
| 356 | case 38: |
| 357 | p = inp + vlx; |
| 358 | p += (inp >> 13) & 1; |
| 359 | return static_cast<sf16>(p >> 13); |
| 360 | |
| 361 | /* |
| 362 | the various denormal cases. These are not expected to be common, so their performance is a bit |
| 363 | less important. For each of these cases, we need to extract an exponent and a mantissa |
| 364 | (including the implicit '1'!), and then right-shift the mantissa by a shift-amount that |
| 365 | depends on the exponent. The shift must apply the correct rounding mode. 'vlx' is used to supply the |
| 366 | sign of the resulting denormal number. |
| 367 | */ |
| 368 | case 21: |
| 369 | case 22: |
| 370 | case 25: |
| 371 | case 27: |
| 372 | /* denormal, round towards zero. */ |
| 373 | p = 126 - ((inp >> 23) & 0xFF); |
| 374 | return static_cast<sf16>((((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000)) >> p) | vlx); |
| 375 | case 20: |
| 376 | case 26: |
| 377 | /* denormal, round away from zero. */ |
| 378 | p = 126 - ((inp >> 23) & 0xFF); |
| 379 | return static_cast<sf16>(rtup_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx); |
| 380 | case 24: |
| 381 | case 29: |
| 382 | /* denormal, round to nearest-away */ |
| 383 | p = 126 - ((inp >> 23) & 0xFF); |
| 384 | return static_cast<sf16>(rtna_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx); |
| 385 | case 23: |
| 386 | case 28: |
| 387 | /* denormal, round to nearest-even. */ |
| 388 | p = 126 - ((inp >> 23) & 0xFF); |
| 389 | return static_cast<sf16>(rtne_shift32((inp & UINT32_C(0x7FFFFF)) + UINT32_C(0x800000), p) | vlx); |
| 390 | } |
| 391 | |
| 392 | return 0; |
| 393 | } |
| 394 | |
| 395 | /* convert from soft-float to native-float */ |
| 396 | float sf16_to_float(uint16_t p) |
| 397 | { |
| 398 | if32 i; |
| 399 | i.u = sf16_to_sf32(p); |
| 400 | return i.f; |
| 401 | } |
| 402 | |
| 403 | /* convert from native-float to soft-float */ |
| 404 | uint16_t float_to_sf16(float p) |
| 405 | { |
| 406 | if32 i; |
| 407 | i.f = p; |
| 408 | return sf32_to_sf16(i.u, SF_NEARESTEVEN); |
| 409 | } |
| 410 | |
| 411 | #endif |
| 412 | |