1 | // SPDX-License-Identifier: Apache-2.0 |
2 | // ---------------------------------------------------------------------------- |
3 | // Copyright 2011-2021 Arm Limited |
4 | // |
5 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
6 | // use this file except in compliance with the License. You may obtain a copy |
7 | // of the License at: |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, software |
12 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
13 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
14 | // License for the specific language governing permissions and limitations |
15 | // under the License. |
16 | // ---------------------------------------------------------------------------- |
17 | |
18 | /* |
19 | * This module implements a variety of mathematical data types and library |
20 | * functions used by the codec. |
21 | */ |
22 | |
23 | #ifndef ASTC_MATHLIB_H_INCLUDED |
24 | #define ASTC_MATHLIB_H_INCLUDED |
25 | |
26 | #include <cassert> |
27 | #include <cstdint> |
28 | #include <cmath> |
29 | |
30 | #ifndef ASTCENC_POPCNT |
31 | #if defined(__POPCNT__) |
32 | #define ASTCENC_POPCNT 1 |
33 | #else |
34 | #define ASTCENC_POPCNT 0 |
35 | #endif |
36 | #endif |
37 | |
38 | #ifndef ASTCENC_F16C |
39 | #if defined(__F16C__) |
40 | #define ASTCENC_F16C 1 |
41 | #else |
42 | #define ASTCENC_F16C 0 |
43 | #endif |
44 | #endif |
45 | |
46 | #ifndef ASTCENC_SSE |
47 | #if defined(__SSE4_2__) |
48 | #define ASTCENC_SSE 42 |
49 | #elif defined(__SSE4_1__) |
50 | #define ASTCENC_SSE 41 |
51 | #elif defined(__SSE2__) |
52 | #define ASTCENC_SSE 20 |
53 | #else |
54 | #define ASTCENC_SSE 0 |
55 | #endif |
56 | #endif |
57 | |
58 | #ifndef ASTCENC_AVX |
59 | #if defined(__AVX2__) |
60 | #define ASTCENC_AVX 2 |
61 | #elif defined(__AVX__) |
62 | #define ASTCENC_AVX 1 |
63 | #else |
64 | #define ASTCENC_AVX 0 |
65 | #endif |
66 | #endif |
67 | |
68 | #ifndef ASTCENC_NEON |
69 | #if defined(__aarch64__) |
70 | #define ASTCENC_NEON 1 |
71 | #else |
72 | #define ASTCENC_NEON 0 |
73 | #endif |
74 | #endif |
75 | |
76 | #if ASTCENC_AVX |
77 | #define ASTCENC_VECALIGN 32 |
78 | #else |
79 | #define ASTCENC_VECALIGN 16 |
80 | #endif |
81 | |
82 | #if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0 |
83 | #include <immintrin.h> |
84 | #endif |
85 | |
86 | /* ============================================================================ |
87 | Fast math library; note that many of the higher-order functions in this set |
88 | use approximations which are less accurate, but faster, than <cmath> standard |
89 | library equivalents. |
90 | |
91 | Note: Many of these are not necessarily faster than simple C versions when |
92 | used on a single scalar value, but are included for testing purposes as most |
93 | have an option based on SSE intrinsics and therefore provide an obvious route |
94 | to future vectorization. |
95 | ============================================================================ */ |
96 | |
97 | // Union for manipulation of float bit patterns |
98 | typedef union |
99 | { |
100 | uint32_t u; |
101 | int32_t s; |
102 | float f; |
103 | } if32; |
104 | |
105 | // These are namespaced to avoid colliding with C standard library functions. |
106 | namespace astc |
107 | { |
108 | |
109 | static const float PI = 3.14159265358979323846f; |
110 | static const float PI_OVER_TWO = 1.57079632679489661923f; |
111 | |
112 | /** |
113 | * @brief SP float absolute value. |
114 | * |
115 | * @param v The value to make absolute. |
116 | * |
117 | * @return The absolute value. |
118 | */ |
119 | static inline float fabs(float v) |
120 | { |
121 | return std::fabs(v); |
122 | } |
123 | |
124 | /** |
125 | * @brief Test if a float value is a nan. |
126 | * |
127 | * @param v The value test. |
128 | * |
129 | * @return Zero is not a NaN, non-zero otherwise. |
130 | */ |
131 | static inline bool isnan(float v) |
132 | { |
133 | return v != v; |
134 | } |
135 | |
136 | /** |
137 | * @brief Return the minimum of two values. |
138 | * |
139 | * For floats, NaNs are turned into @c q. |
140 | * |
141 | * @param p The first value to compare. |
142 | * @param q The second value to compare. |
143 | * |
144 | * @return The smallest value. |
145 | */ |
146 | template<typename T> |
147 | static inline T min(T p, T q) |
148 | { |
149 | return p < q ? p : q; |
150 | } |
151 | |
152 | /** |
153 | * @brief Return the minimum of three values. |
154 | * |
155 | * For floats, NaNs are turned into @c r. |
156 | * |
157 | * @param p The first value to compare. |
158 | * @param q The second value to compare. |
159 | * @param r The third value to compare. |
160 | * |
161 | * @return The smallest value. |
162 | */ |
163 | template<typename T> |
164 | static inline T min(T p, T q, T r) |
165 | { |
166 | return min(min(p, q), r); |
167 | } |
168 | |
169 | /** |
170 | * @brief Return the minimum of four values. |
171 | * |
172 | * For floats, NaNs are turned into @c s. |
173 | * |
174 | * @param p The first value to compare. |
175 | * @param q The second value to compare. |
176 | * @param r The third value to compare. |
177 | * @param s The fourth value to compare. |
178 | * |
179 | * @return The smallest value. |
180 | */ |
181 | template<typename T> |
182 | static inline T min(T p, T q, T r, T s) |
183 | { |
184 | return min(min(p, q), min(r, s)); |
185 | } |
186 | |
187 | /** |
188 | * @brief Return the maximum of two values. |
189 | * |
190 | * For floats, NaNs are turned into @c q. |
191 | * |
192 | * @param p The first value to compare. |
193 | * @param q The second value to compare. |
194 | * |
195 | * @return The largest value. |
196 | */ |
197 | template<typename T> |
198 | static inline T max(T p, T q) |
199 | { |
200 | return p > q ? p : q; |
201 | } |
202 | |
203 | /** |
204 | * @brief Return the maximum of three values. |
205 | * |
206 | * For floats, NaNs are turned into @c r. |
207 | * |
208 | * @param p The first value to compare. |
209 | * @param q The second value to compare. |
210 | * @param r The third value to compare. |
211 | * |
212 | * @return The largest value. |
213 | */ |
214 | template<typename T> |
215 | static inline T max(T p, T q, T r) |
216 | { |
217 | return max(max(p, q), r); |
218 | } |
219 | |
220 | /** |
221 | * @brief Return the maximum of four values. |
222 | * |
223 | * For floats, NaNs are turned into @c s. |
224 | * |
225 | * @param p The first value to compare. |
226 | * @param q The second value to compare. |
227 | * @param r The third value to compare. |
228 | * @param s The fourth value to compare. |
229 | * |
230 | * @return The largest value. |
231 | */ |
232 | template<typename T> |
233 | static inline T max(T p, T q, T r, T s) |
234 | { |
235 | return max(max(p, q), max(r, s)); |
236 | } |
237 | |
238 | /** |
239 | * @brief Clamp a value value between @c mn and @c mx. |
240 | * |
241 | * For floats, NaNs are turned into @c mn. |
242 | * |
243 | * @param v The value to clamp. |
244 | * @param mn The min value (inclusive). |
245 | * @param mx The max value (inclusive). |
246 | * |
247 | * @return The clamped value. |
248 | */ |
249 | template<typename T> |
250 | inline T clamp(T v, T mn, T mx) |
251 | { |
252 | // Do not reorder; correct NaN handling relies on the fact that comparison |
253 | // with NaN returns false and will fall-though to the "min" value. |
254 | if (v > mx) return mx; |
255 | if (v > mn) return v; |
256 | return mn; |
257 | } |
258 | |
259 | /** |
260 | * @brief Clamp a float value between 0.0f and 1.0f. |
261 | * |
262 | * NaNs are turned into 0.0f. |
263 | * |
264 | * @param v The value to clamp. |
265 | * |
266 | * @return The clamped value. |
267 | */ |
268 | static inline float clamp1f(float v) |
269 | { |
270 | return astc::clamp(v, 0.0f, 1.0f); |
271 | } |
272 | |
273 | /** |
274 | * @brief Clamp a float value between 0.0f and 255.0f. |
275 | * |
276 | * NaNs are turned into 0.0f. |
277 | * |
278 | * @param v The value to clamp. |
279 | * |
280 | * @return The clamped value. |
281 | */ |
282 | static inline float clamp255f(float v) |
283 | { |
284 | return astc::clamp(v, 0.0f, 255.0f); |
285 | } |
286 | |
287 | /** |
288 | * @brief SP float round-down. |
289 | * |
290 | * @param v The value to round. |
291 | * |
292 | * @return The rounded value. |
293 | */ |
294 | static inline float flt_rd(float v) |
295 | { |
296 | return std::floor(v); |
297 | } |
298 | |
299 | /** |
300 | * @brief SP float round-to-nearest and convert to integer. |
301 | * |
302 | * @param v The value to round. |
303 | * |
304 | * @return The rounded value. |
305 | */ |
306 | static inline int flt2int_rtn(float v) |
307 | { |
308 | |
309 | return static_cast<int>(v + 0.5f); |
310 | } |
311 | |
312 | /** |
313 | * @brief SP float round down and convert to integer. |
314 | * |
315 | * @param v The value to round. |
316 | * |
317 | * @return The rounded value. |
318 | */ |
319 | static inline int flt2int_rd(float v) |
320 | { |
321 | return static_cast<int>(v); |
322 | } |
323 | |
324 | /** |
325 | * @brief SP float bit-interpreted as an integer. |
326 | * |
327 | * @param v The value to bitcast. |
328 | * |
329 | * @return The converted value. |
330 | */ |
331 | static inline int float_as_int(float v) |
332 | { |
333 | union { int a; float b; } u; |
334 | u.b = v; |
335 | return u.a; |
336 | } |
337 | |
338 | /** |
339 | * @brief Integer bit-interpreted as an SP float. |
340 | * |
341 | * @param v The value to bitcast. |
342 | * |
343 | * @return The converted value. |
344 | */ |
345 | static inline float int_as_float(int v) |
346 | { |
347 | union { int a; float b; } u; |
348 | u.a = v; |
349 | return u.b; |
350 | } |
351 | |
352 | /** |
353 | * @brief Fast approximation of 1.0 / sqrt(val). |
354 | * |
355 | * @param v The input value. |
356 | * |
357 | * @return The approximated result. |
358 | */ |
359 | static inline float rsqrt(float v) |
360 | { |
361 | return 1.0f / std::sqrt(v); |
362 | } |
363 | |
364 | /** |
365 | * @brief Fast approximation of sqrt(val). |
366 | * |
367 | * @param v The input value. |
368 | * |
369 | * @return The approximated result. |
370 | */ |
371 | static inline float sqrt(float v) |
372 | { |
373 | return std::sqrt(v); |
374 | } |
375 | |
376 | /** |
377 | * @brief Extract mantissa and exponent of a float value. |
378 | * |
379 | * @param v The input value. |
380 | * @param[out] expo The output exponent. |
381 | * |
382 | * @return The mantissa. |
383 | */ |
384 | static inline float frexp(float v, int* expo) |
385 | { |
386 | if32 p; |
387 | p.f = v; |
388 | *expo = ((p.u >> 23) & 0xFF) - 126; |
389 | p.u = (p.u & 0x807fffff) | 0x3f000000; |
390 | return p.f; |
391 | } |
392 | |
393 | /** |
394 | * @brief Initialize the seed structure for a random number generator. |
395 | * |
396 | * Important note: For the purposes of ASTC we want sets of random numbers to |
397 | * use the codec, but we want the same seed value across instances and threads |
398 | * to ensure that image output is stable across compressor runs and across |
399 | * platforms. Every PRNG created by this call will therefore return the same |
400 | * sequence of values ... |
401 | * |
402 | * @param state The state structure to initialize. |
403 | */ |
404 | void rand_init(uint64_t state[2]); |
405 | |
406 | /** |
407 | * @brief Return the next random number from the generator. |
408 | * |
409 | * This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the |
410 | * public-domain implementation given by David Blackman & Sebastiano Vigna at |
411 | * http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c |
412 | * |
413 | * @param state The state structure to use/update. |
414 | */ |
415 | uint64_t rand(uint64_t state[2]); |
416 | |
417 | } |
418 | |
419 | /* ============================================================================ |
420 | Softfloat library with fp32 and fp16 conversion functionality. |
421 | ============================================================================ */ |
422 | #if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0) |
423 | /* narrowing float->float conversions */ |
424 | uint16_t float_to_sf16(float val); |
425 | float sf16_to_float(uint16_t val); |
426 | #endif |
427 | |
428 | /********************************* |
429 | Vector library |
430 | *********************************/ |
431 | #include "astcenc_vecmathlib.h" |
432 | |
433 | /********************************* |
434 | Declaration of line types |
435 | *********************************/ |
436 | // parametric line, 2D: The line is given by line = a + b * t. |
437 | |
438 | struct line2 |
439 | { |
440 | vfloat4 a; |
441 | vfloat4 b; |
442 | }; |
443 | |
444 | // parametric line, 3D |
445 | struct line3 |
446 | { |
447 | vfloat4 a; |
448 | vfloat4 b; |
449 | }; |
450 | |
451 | struct line4 |
452 | { |
453 | vfloat4 a; |
454 | vfloat4 b; |
455 | }; |
456 | |
457 | |
458 | struct processed_line2 |
459 | { |
460 | vfloat4 amod; |
461 | vfloat4 bs; |
462 | }; |
463 | |
464 | struct processed_line3 |
465 | { |
466 | vfloat4 amod; |
467 | vfloat4 bs; |
468 | }; |
469 | |
470 | struct processed_line4 |
471 | { |
472 | vfloat4 amod; |
473 | vfloat4 bs; |
474 | }; |
475 | |
476 | #endif |
477 | |