1// SPDX-License-Identifier: Apache-2.0
2// ----------------------------------------------------------------------------
3// Copyright 2011-2021 Arm Limited
4//
5// Licensed under the Apache License, Version 2.0 (the "License"); you may not
6// use this file except in compliance with the License. You may obtain a copy
7// of the License at:
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14// License for the specific language governing permissions and limitations
15// under the License.
16// ----------------------------------------------------------------------------
17
18/*
19 * This module implements a variety of mathematical data types and library
20 * functions used by the codec.
21 */
22
23#ifndef ASTC_MATHLIB_H_INCLUDED
24#define ASTC_MATHLIB_H_INCLUDED
25
26#include <cassert>
27#include <cstdint>
28#include <cmath>
29
30#ifndef ASTCENC_POPCNT
31 #if defined(__POPCNT__)
32 #define ASTCENC_POPCNT 1
33 #else
34 #define ASTCENC_POPCNT 0
35 #endif
36#endif
37
38#ifndef ASTCENC_F16C
39 #if defined(__F16C__)
40 #define ASTCENC_F16C 1
41 #else
42 #define ASTCENC_F16C 0
43 #endif
44#endif
45
46#ifndef ASTCENC_SSE
47 #if defined(__SSE4_2__)
48 #define ASTCENC_SSE 42
49 #elif defined(__SSE4_1__)
50 #define ASTCENC_SSE 41
51 #elif defined(__SSE2__)
52 #define ASTCENC_SSE 20
53 #else
54 #define ASTCENC_SSE 0
55 #endif
56#endif
57
58#ifndef ASTCENC_AVX
59 #if defined(__AVX2__)
60 #define ASTCENC_AVX 2
61 #elif defined(__AVX__)
62 #define ASTCENC_AVX 1
63 #else
64 #define ASTCENC_AVX 0
65 #endif
66#endif
67
68#ifndef ASTCENC_NEON
69 #if defined(__aarch64__)
70 #define ASTCENC_NEON 1
71 #else
72 #define ASTCENC_NEON 0
73 #endif
74#endif
75
76#if ASTCENC_AVX
77 #define ASTCENC_VECALIGN 32
78#else
79 #define ASTCENC_VECALIGN 16
80#endif
81
82#if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
83 #include <immintrin.h>
84#endif
85
86/* ============================================================================
87 Fast math library; note that many of the higher-order functions in this set
88 use approximations which are less accurate, but faster, than <cmath> standard
89 library equivalents.
90
91 Note: Many of these are not necessarily faster than simple C versions when
92 used on a single scalar value, but are included for testing purposes as most
93 have an option based on SSE intrinsics and therefore provide an obvious route
94 to future vectorization.
95============================================================================ */
96
97// Union for manipulation of float bit patterns
98typedef union
99{
100 uint32_t u;
101 int32_t s;
102 float f;
103} if32;
104
105// These are namespaced to avoid colliding with C standard library functions.
106namespace astc
107{
108
109static const float PI = 3.14159265358979323846f;
110static const float PI_OVER_TWO = 1.57079632679489661923f;
111
112/**
113 * @brief SP float absolute value.
114 *
115 * @param v The value to make absolute.
116 *
117 * @return The absolute value.
118 */
119static inline float fabs(float v)
120{
121 return std::fabs(v);
122}
123
124/**
125 * @brief Test if a float value is a nan.
126 *
127 * @param v The value test.
128 *
129 * @return Zero is not a NaN, non-zero otherwise.
130 */
131static inline bool isnan(float v)
132{
133 return v != v;
134}
135
136/**
137 * @brief Return the minimum of two values.
138 *
139 * For floats, NaNs are turned into @c q.
140 *
141 * @param p The first value to compare.
142 * @param q The second value to compare.
143 *
144 * @return The smallest value.
145 */
146template<typename T>
147static inline T min(T p, T q)
148{
149 return p < q ? p : q;
150}
151
152/**
153 * @brief Return the minimum of three values.
154 *
155 * For floats, NaNs are turned into @c r.
156 *
157 * @param p The first value to compare.
158 * @param q The second value to compare.
159 * @param r The third value to compare.
160 *
161 * @return The smallest value.
162 */
163template<typename T>
164static inline T min(T p, T q, T r)
165{
166 return min(min(p, q), r);
167}
168
169/**
170 * @brief Return the minimum of four values.
171 *
172 * For floats, NaNs are turned into @c s.
173 *
174 * @param p The first value to compare.
175 * @param q The second value to compare.
176 * @param r The third value to compare.
177 * @param s The fourth value to compare.
178 *
179 * @return The smallest value.
180 */
181template<typename T>
182static inline T min(T p, T q, T r, T s)
183{
184 return min(min(p, q), min(r, s));
185}
186
187/**
188 * @brief Return the maximum of two values.
189 *
190 * For floats, NaNs are turned into @c q.
191 *
192 * @param p The first value to compare.
193 * @param q The second value to compare.
194 *
195 * @return The largest value.
196 */
197template<typename T>
198static inline T max(T p, T q)
199{
200 return p > q ? p : q;
201}
202
203/**
204 * @brief Return the maximum of three values.
205 *
206 * For floats, NaNs are turned into @c r.
207 *
208 * @param p The first value to compare.
209 * @param q The second value to compare.
210 * @param r The third value to compare.
211 *
212 * @return The largest value.
213 */
214template<typename T>
215static inline T max(T p, T q, T r)
216{
217 return max(max(p, q), r);
218}
219
220/**
221 * @brief Return the maximum of four values.
222 *
223 * For floats, NaNs are turned into @c s.
224 *
225 * @param p The first value to compare.
226 * @param q The second value to compare.
227 * @param r The third value to compare.
228 * @param s The fourth value to compare.
229 *
230 * @return The largest value.
231 */
232template<typename T>
233static inline T max(T p, T q, T r, T s)
234{
235 return max(max(p, q), max(r, s));
236}
237
238/**
239 * @brief Clamp a value value between @c mn and @c mx.
240 *
241 * For floats, NaNs are turned into @c mn.
242 *
243 * @param v The value to clamp.
244 * @param mn The min value (inclusive).
245 * @param mx The max value (inclusive).
246 *
247 * @return The clamped value.
248 */
249template<typename T>
250inline T clamp(T v, T mn, T mx)
251{
252 // Do not reorder; correct NaN handling relies on the fact that comparison
253 // with NaN returns false and will fall-though to the "min" value.
254 if (v > mx) return mx;
255 if (v > mn) return v;
256 return mn;
257}
258
259/**
260 * @brief Clamp a float value between 0.0f and 1.0f.
261 *
262 * NaNs are turned into 0.0f.
263 *
264 * @param v The value to clamp.
265 *
266 * @return The clamped value.
267 */
268static inline float clamp1f(float v)
269{
270 return astc::clamp(v, 0.0f, 1.0f);
271}
272
273/**
274 * @brief Clamp a float value between 0.0f and 255.0f.
275 *
276 * NaNs are turned into 0.0f.
277 *
278 * @param v The value to clamp.
279 *
280 * @return The clamped value.
281 */
282static inline float clamp255f(float v)
283{
284 return astc::clamp(v, 0.0f, 255.0f);
285}
286
287/**
288 * @brief SP float round-down.
289 *
290 * @param v The value to round.
291 *
292 * @return The rounded value.
293 */
294static inline float flt_rd(float v)
295{
296 return std::floor(v);
297}
298
299/**
300 * @brief SP float round-to-nearest and convert to integer.
301 *
302 * @param v The value to round.
303 *
304 * @return The rounded value.
305 */
306static inline int flt2int_rtn(float v)
307{
308
309 return static_cast<int>(v + 0.5f);
310}
311
312/**
313 * @brief SP float round down and convert to integer.
314 *
315 * @param v The value to round.
316 *
317 * @return The rounded value.
318 */
319static inline int flt2int_rd(float v)
320{
321 return static_cast<int>(v);
322}
323
324/**
325 * @brief SP float bit-interpreted as an integer.
326 *
327 * @param v The value to bitcast.
328 *
329 * @return The converted value.
330 */
331static inline int float_as_int(float v)
332{
333 union { int a; float b; } u;
334 u.b = v;
335 return u.a;
336}
337
338/**
339 * @brief Integer bit-interpreted as an SP float.
340 *
341 * @param v The value to bitcast.
342 *
343 * @return The converted value.
344 */
345static inline float int_as_float(int v)
346{
347 union { int a; float b; } u;
348 u.a = v;
349 return u.b;
350}
351
352/**
353 * @brief Fast approximation of 1.0 / sqrt(val).
354 *
355 * @param v The input value.
356 *
357 * @return The approximated result.
358 */
359static inline float rsqrt(float v)
360{
361 return 1.0f / std::sqrt(v);
362}
363
364/**
365 * @brief Fast approximation of sqrt(val).
366 *
367 * @param v The input value.
368 *
369 * @return The approximated result.
370 */
371static inline float sqrt(float v)
372{
373 return std::sqrt(v);
374}
375
376/**
377 * @brief Extract mantissa and exponent of a float value.
378 *
379 * @param v The input value.
380 * @param[out] expo The output exponent.
381 *
382 * @return The mantissa.
383 */
384static inline float frexp(float v, int* expo)
385{
386 if32 p;
387 p.f = v;
388 *expo = ((p.u >> 23) & 0xFF) - 126;
389 p.u = (p.u & 0x807fffff) | 0x3f000000;
390 return p.f;
391}
392
393/**
394 * @brief Initialize the seed structure for a random number generator.
395 *
396 * Important note: For the purposes of ASTC we want sets of random numbers to
397 * use the codec, but we want the same seed value across instances and threads
398 * to ensure that image output is stable across compressor runs and across
399 * platforms. Every PRNG created by this call will therefore return the same
400 * sequence of values ...
401 *
402 * @param state The state structure to initialize.
403 */
404void rand_init(uint64_t state[2]);
405
406/**
407 * @brief Return the next random number from the generator.
408 *
409 * This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the
410 * public-domain implementation given by David Blackman & Sebastiano Vigna at
411 * http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c
412 *
413 * @param state The state structure to use/update.
414 */
415uint64_t rand(uint64_t state[2]);
416
417}
418
419/* ============================================================================
420 Softfloat library with fp32 and fp16 conversion functionality.
421============================================================================ */
422#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
423 /* narrowing float->float conversions */
424 uint16_t float_to_sf16(float val);
425 float sf16_to_float(uint16_t val);
426#endif
427
428/*********************************
429 Vector library
430*********************************/
431#include "astcenc_vecmathlib.h"
432
433/*********************************
434 Declaration of line types
435*********************************/
436// parametric line, 2D: The line is given by line = a + b * t.
437
438struct line2
439{
440 vfloat4 a;
441 vfloat4 b;
442};
443
444// parametric line, 3D
445struct line3
446{
447 vfloat4 a;
448 vfloat4 b;
449};
450
451struct line4
452{
453 vfloat4 a;
454 vfloat4 b;
455};
456
457
458struct processed_line2
459{
460 vfloat4 amod;
461 vfloat4 bs;
462};
463
464struct processed_line3
465{
466 vfloat4 amod;
467 vfloat4 bs;
468};
469
470struct processed_line4
471{
472 vfloat4 amod;
473 vfloat4 bs;
474};
475
476#endif
477