| 1 | // |
| 2 | // m3_math_utils.h |
| 3 | // |
| 4 | // Created by Volodymyr Shymanksyy on 8/10/19. |
| 5 | // Copyright © 2019 Volodymyr Shymanskyy. All rights reserved. |
| 6 | // |
| 7 | |
| 8 | #ifndef m3_math_utils_h |
| 9 | #define m3_math_utils_h |
| 10 | |
| 11 | #include "m3_core.h" |
| 12 | |
| 13 | #include <limits.h> |
| 14 | |
| 15 | #if defined(M3_COMPILER_MSVC) |
| 16 | |
| 17 | #include <intrin.h> |
| 18 | |
| 19 | #define __builtin_popcount __popcnt |
| 20 | |
| 21 | static inline |
| 22 | int __builtin_ctz(uint32_t x) { |
| 23 | unsigned long ret; |
| 24 | _BitScanForward(&ret, x); |
| 25 | return (int)ret; |
| 26 | } |
| 27 | |
| 28 | static inline |
| 29 | int __builtin_clz(uint32_t x) { |
| 30 | unsigned long ret; |
| 31 | _BitScanReverse(&ret, x); |
| 32 | return (int)(31 ^ ret); |
| 33 | } |
| 34 | |
| 35 | |
| 36 | |
| 37 | #ifdef _WIN64 |
| 38 | |
| 39 | #define __builtin_popcountll __popcnt64 |
| 40 | |
| 41 | static inline |
| 42 | int __builtin_ctzll(uint64_t value) { |
| 43 | unsigned long ret; |
| 44 | _BitScanForward64(&ret, value); |
| 45 | return (int)ret; |
| 46 | } |
| 47 | |
| 48 | static inline |
| 49 | int __builtin_clzll(uint64_t value) { |
| 50 | unsigned long ret; |
| 51 | _BitScanReverse64(&ret, value); |
| 52 | return (int)(63 ^ ret); |
| 53 | } |
| 54 | |
| 55 | #else // _WIN64 |
| 56 | |
| 57 | #define __builtin_popcountll(x) (__popcnt((x) & 0xFFFFFFFF) + __popcnt((x) >> 32)) |
| 58 | |
| 59 | static inline |
| 60 | int __builtin_ctzll(uint64_t value) { |
| 61 | //if (value == 0) return 64; // Note: ctz(0) result is undefined anyway |
| 62 | uint32_t msh = (uint32_t)(value >> 32); |
| 63 | uint32_t lsh = (uint32_t)(value & 0xFFFFFFFF); |
| 64 | if (lsh != 0) return __builtin_ctz(lsh); |
| 65 | return 32 + __builtin_ctz(msh); |
| 66 | } |
| 67 | |
| 68 | static inline |
| 69 | int __builtin_clzll(uint64_t value) { |
| 70 | //if (value == 0) return 64; // Note: clz(0) result is undefined anyway |
| 71 | uint32_t msh = (uint32_t)(value >> 32); |
| 72 | uint32_t lsh = (uint32_t)(value & 0xFFFFFFFF); |
| 73 | if (msh != 0) return __builtin_clz(msh); |
| 74 | return 32 + __builtin_clz(lsh); |
| 75 | } |
| 76 | |
| 77 | #endif // _WIN64 |
| 78 | |
| 79 | #endif // defined(M3_COMPILER_MSVC) |
| 80 | |
| 81 | |
| 82 | // TODO: not sure why, signbit is actually defined in math.h |
| 83 | #if (defined(ESP8266) || defined(ESP32)) && !defined(signbit) |
| 84 | #define signbit(__x) \ |
| 85 | ((sizeof(__x) == sizeof(float)) ? __signbitf(__x) : __signbitd(__x)) |
| 86 | #endif |
| 87 | |
| 88 | #if defined(__AVR__) |
| 89 | |
| 90 | static inline |
| 91 | float rintf( float arg ) { |
| 92 | union { float f; uint32_t i; } u; |
| 93 | u.f = arg; |
| 94 | uint32_t ux = u.i & 0x7FFFFFFF; |
| 95 | if (M3_UNLIKELY(ux == 0 || ux > 0x5A000000)) { |
| 96 | return arg; |
| 97 | } |
| 98 | return (float)lrint(arg); |
| 99 | } |
| 100 | |
| 101 | static inline |
| 102 | double rint( double arg ) { |
| 103 | union { double f; uint32_t i[2]; } u; |
| 104 | u.f = arg; |
| 105 | uint32_t ux = u.i[1] & 0x7FFFFFFF; |
| 106 | if (M3_UNLIKELY((ux == 0 && u.i[0] == 0) || ux > 0x433FFFFF)) { |
| 107 | return arg; |
| 108 | } |
| 109 | return (double)lrint(arg); |
| 110 | } |
| 111 | |
| 112 | //TODO |
| 113 | static inline |
| 114 | uint64_t strtoull(const char* str, char** endptr, int base) { |
| 115 | return 0; |
| 116 | } |
| 117 | |
| 118 | #endif |
| 119 | |
| 120 | /* |
| 121 | * Rotr, Rotl |
| 122 | */ |
| 123 | |
| 124 | static inline |
| 125 | u32 rotl32(u32 n, unsigned c) { |
| 126 | const unsigned mask = CHAR_BIT * sizeof(n) - 1; |
| 127 | c &= mask & 31; |
| 128 | return (n << c) | (n >> ((-c) & mask)); |
| 129 | } |
| 130 | |
| 131 | static inline |
| 132 | u32 rotr32(u32 n, unsigned c) { |
| 133 | const unsigned mask = CHAR_BIT * sizeof(n) - 1; |
| 134 | c &= mask & 31; |
| 135 | return (n >> c) | (n << ((-c) & mask)); |
| 136 | } |
| 137 | |
| 138 | static inline |
| 139 | u64 rotl64(u64 n, unsigned c) { |
| 140 | const unsigned mask = CHAR_BIT * sizeof(n) - 1; |
| 141 | c &= mask & 63; |
| 142 | return (n << c) | (n >> ((-c) & mask)); |
| 143 | } |
| 144 | |
| 145 | static inline |
| 146 | u64 rotr64(u64 n, unsigned c) { |
| 147 | const unsigned mask = CHAR_BIT * sizeof(n) - 1; |
| 148 | c &= mask & 63; |
| 149 | return (n >> c) | (n << ((-c) & mask)); |
| 150 | } |
| 151 | |
| 152 | /* |
| 153 | * Integer Div, Rem |
| 154 | */ |
| 155 | |
| 156 | #define OP_DIV_U(RES, A, B) \ |
| 157 | if (M3_UNLIKELY(B == 0)) newTrap (m3Err_trapDivisionByZero); \ |
| 158 | RES = A / B; |
| 159 | |
| 160 | #define OP_REM_U(RES, A, B) \ |
| 161 | if (M3_UNLIKELY(B == 0)) newTrap (m3Err_trapDivisionByZero); \ |
| 162 | RES = A % B; |
| 163 | |
| 164 | // 2's complement detection |
| 165 | #if (INT_MIN != -INT_MAX) |
| 166 | |
| 167 | #define OP_DIV_S(RES, A, B, TYPE_MIN) \ |
| 168 | if (M3_UNLIKELY(B == 0)) newTrap (m3Err_trapDivisionByZero); \ |
| 169 | if (M3_UNLIKELY(B == -1 and A == TYPE_MIN)) { \ |
| 170 | newTrap (m3Err_trapIntegerOverflow); \ |
| 171 | } \ |
| 172 | RES = A / B; |
| 173 | |
| 174 | #define OP_REM_S(RES, A, B, TYPE_MIN) \ |
| 175 | if (M3_UNLIKELY(B == 0)) newTrap (m3Err_trapDivisionByZero); \ |
| 176 | if (M3_UNLIKELY(B == -1 and A == TYPE_MIN)) RES = 0; \ |
| 177 | else RES = A % B; |
| 178 | |
| 179 | #else |
| 180 | |
| 181 | #define OP_DIV_S(RES, A, B, TYPE_MIN) OP_DIV_U(RES, A, B) |
| 182 | #define OP_REM_S(RES, A, B, TYPE_MIN) OP_REM_U(RES, A, B) |
| 183 | |
| 184 | #endif |
| 185 | |
| 186 | /* |
| 187 | * Trunc |
| 188 | */ |
| 189 | |
| 190 | #define OP_TRUNC(RES, A, TYPE, RMIN, RMAX) \ |
| 191 | if (M3_UNLIKELY(isnan(A))) { \ |
| 192 | newTrap (m3Err_trapIntegerConversion); \ |
| 193 | } \ |
| 194 | if (M3_UNLIKELY(A <= RMIN or A >= RMAX)) { \ |
| 195 | newTrap (m3Err_trapIntegerOverflow); \ |
| 196 | } \ |
| 197 | RES = (TYPE)A; |
| 198 | |
| 199 | |
| 200 | #define OP_I32_TRUNC_F32(RES, A) OP_TRUNC(RES, A, i32, -2147483904.0f, 2147483648.0f) |
| 201 | #define OP_U32_TRUNC_F32(RES, A) OP_TRUNC(RES, A, u32, -1.0f, 4294967296.0f) |
| 202 | #define OP_I32_TRUNC_F64(RES, A) OP_TRUNC(RES, A, i32, -2147483649.0 , 2147483648.0 ) |
| 203 | #define OP_U32_TRUNC_F64(RES, A) OP_TRUNC(RES, A, u32, -1.0 , 4294967296.0 ) |
| 204 | |
| 205 | #define OP_I64_TRUNC_F32(RES, A) OP_TRUNC(RES, A, i64, -9223373136366403584.0f, 9223372036854775808.0f) |
| 206 | #define OP_U64_TRUNC_F32(RES, A) OP_TRUNC(RES, A, u64, -1.0f, 18446744073709551616.0f) |
| 207 | #define OP_I64_TRUNC_F64(RES, A) OP_TRUNC(RES, A, i64, -9223372036854777856.0 , 9223372036854775808.0 ) |
| 208 | #define OP_U64_TRUNC_F64(RES, A) OP_TRUNC(RES, A, u64, -1.0 , 18446744073709551616.0 ) |
| 209 | |
| 210 | #define OP_TRUNC_SAT(RES, A, TYPE, RMIN, RMAX, IMIN, IMAX) \ |
| 211 | if (M3_UNLIKELY(isnan(A))) { \ |
| 212 | RES = 0; \ |
| 213 | } else if (M3_UNLIKELY(A <= RMIN)) { \ |
| 214 | RES = IMIN; \ |
| 215 | } else if (M3_UNLIKELY(A >= RMAX)) { \ |
| 216 | RES = IMAX; \ |
| 217 | } else { \ |
| 218 | RES = (TYPE)A; \ |
| 219 | } |
| 220 | |
| 221 | #define OP_I32_TRUNC_SAT_F32(RES, A) OP_TRUNC_SAT(RES, A, i32, -2147483904.0f, 2147483648.0f, INT32_MIN, INT32_MAX) |
| 222 | #define OP_U32_TRUNC_SAT_F32(RES, A) OP_TRUNC_SAT(RES, A, u32, -1.0f, 4294967296.0f, 0UL, UINT32_MAX) |
| 223 | #define OP_I32_TRUNC_SAT_F64(RES, A) OP_TRUNC_SAT(RES, A, i32, -2147483649.0 , 2147483648.0, INT32_MIN, INT32_MAX) |
| 224 | #define OP_U32_TRUNC_SAT_F64(RES, A) OP_TRUNC_SAT(RES, A, u32, -1.0 , 4294967296.0, 0UL, UINT32_MAX) |
| 225 | |
| 226 | #define OP_I64_TRUNC_SAT_F32(RES, A) OP_TRUNC_SAT(RES, A, i64, -9223373136366403584.0f, 9223372036854775808.0f, INT64_MIN, INT64_MAX) |
| 227 | #define OP_U64_TRUNC_SAT_F32(RES, A) OP_TRUNC_SAT(RES, A, u64, -1.0f, 18446744073709551616.0f, 0ULL, UINT64_MAX) |
| 228 | #define OP_I64_TRUNC_SAT_F64(RES, A) OP_TRUNC_SAT(RES, A, i64, -9223372036854777856.0 , 9223372036854775808.0, INT64_MIN, INT64_MAX) |
| 229 | #define OP_U64_TRUNC_SAT_F64(RES, A) OP_TRUNC_SAT(RES, A, u64, -1.0 , 18446744073709551616.0, 0ULL, UINT64_MAX) |
| 230 | |
| 231 | /* |
| 232 | * Min, Max |
| 233 | */ |
| 234 | |
| 235 | #if d_m3HasFloat |
| 236 | |
| 237 | #include <math.h> |
| 238 | |
| 239 | static inline |
| 240 | f32 min_f32(f32 a, f32 b) { |
| 241 | if (M3_UNLIKELY(isnan(a) or isnan(b))) return NAN; |
| 242 | if (M3_UNLIKELY(a == 0 and a == b)) return signbit(a) ? a : b; |
| 243 | return a > b ? b : a; |
| 244 | } |
| 245 | |
| 246 | static inline |
| 247 | f32 max_f32(f32 a, f32 b) { |
| 248 | if (M3_UNLIKELY(isnan(a) or isnan(b))) return NAN; |
| 249 | if (M3_UNLIKELY(a == 0 and a == b)) return signbit(a) ? b : a; |
| 250 | return a > b ? a : b; |
| 251 | } |
| 252 | |
| 253 | static inline |
| 254 | f64 min_f64(f64 a, f64 b) { |
| 255 | if (M3_UNLIKELY(isnan(a) or isnan(b))) return NAN; |
| 256 | if (M3_UNLIKELY(a == 0 and a == b)) return signbit(a) ? a : b; |
| 257 | return a > b ? b : a; |
| 258 | } |
| 259 | |
| 260 | static inline |
| 261 | f64 max_f64(f64 a, f64 b) { |
| 262 | if (M3_UNLIKELY(isnan(a) or isnan(b))) return NAN; |
| 263 | if (M3_UNLIKELY(a == 0 and a == b)) return signbit(a) ? b : a; |
| 264 | return a > b ? a : b; |
| 265 | } |
| 266 | #endif |
| 267 | |
| 268 | #endif // m3_math_utils_h |
| 269 | |