1/*
2Copyright (c) 2012, Broadcom Europe Ltd
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in the
11 documentation and/or other materials provided with the distribution.
12 * Neither the name of the copyright holder nor the
13 names of its contributors may be used to endorse or promote products
14 derived from this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27
28#ifndef KHRN_INT_UTIL_H
29#define KHRN_INT_UTIL_H
30
31#include <ctype.h>
32#include <float.h>
33#include <math.h>
34
35#include "interface/khronos/common/khrn_int_common.h"
36#if !defined(__VIDEOCORE__) && !defined(WIN32) // threadsx/nucleus define LONG which clashses
37#include "interface/vcos/vcos.h"
38#endif
39
40/******************************************************************************
41replacements for videocore intrinsics
42******************************************************************************/
43
44#ifdef _VIDEOCORE
45#include <vc/intrinsics.h>
46#define _minf(x,y) _min((float)(x),(float)(y))
47#define _maxf(x,y) _max((float)(x),(float)(y))
48#else
49static INLINE int32_t _bmask(int32_t x, int32_t y)
50{
51 return x & ((1 << (y & 0x1f)) - 1);
52}
53
54static INLINE int32_t _min(int32_t x, int32_t y)
55{
56 return x < y ? x : y;
57}
58
59static INLINE int32_t _max(int32_t x, int32_t y)
60{
61 return x > y ? x : y;
62}
63
64#if defined(_MSC_VER)
65static INLINE int32_t _msb(uint32_t x)
66{
67 int32_t l = -1;
68
69 if (x)
70 __asm {
71 bsr eax, x
72 mov l, eax
73 }
74
75 return l;
76}
77#elif defined __CC_ARM
78static INLINE int32_t _msb(uint32_t x)
79{
80 return 31 - __clz(x);
81}
82#elif defined(__GNUC__)
83static INLINE int32_t _msb(uint32_t x)
84{
85 return x ? (31 - __builtin_clz(x)) : -1;
86}
87#else
88static INLINE int32_t _msb(uint32_t x) /* unsigned to get lsr */
89{
90 int32_t msb = -1;
91 while (x != 0) {
92 ++msb;
93 x >>= 1;
94 }
95 return msb;
96}
97#endif
98
99static INLINE uint32_t _count(uint32_t x)
100{
101 uint32_t count = 0;
102 while (x != 0) {
103 x &= x - 1;
104 ++count;
105 }
106 return count;
107}
108
109#if defined __CC_ARM && __TARGET_ARCH_THUMB >= 4
110static INLINE uint32_t _bitrev(uint32_t x, uint32_t y)
111{
112 return __rbit(x) >> (32-y);
113}
114#else
115static INLINE uint32_t _bitrev(uint32_t x, uint32_t y)
116{
117 uint32_t bitrev = 0;
118 uint32_t i;
119 for (i = 0; i != y; ++i) {
120 bitrev |= ((x >> i) & 1) << (y - i - 1);
121 }
122 return bitrev;
123}
124#endif
125
126#ifdef __CC_ARM
127static INLINE int32_t _adds(int32_t x, int32_t y)
128{
129 return __qadd(x, y);
130}
131
132static INLINE int32_t _subs(int32_t x, int32_t y)
133{
134 return __qsub(x, y);
135}
136
137static INLINE uint32_t _ror(uint32_t x, uint32_t y)
138{
139 return __ror(x, y);
140}
141#else
142static INLINE int32_t _adds(int32_t x, int32_t y)
143{
144 int32_t z = x + y;
145 return (y > 0) ? ((z < x) ? (int32_t)0x7fffffff : z) : ((z > x) ? (int32_t)0x80000000 : z);
146}
147
148static INLINE int32_t _subs(int32_t x, int32_t y)
149{
150 int32_t z = x - y;
151 return (y > 0) ? ((z > x) ? (int32_t)0x80000000 : z) : ((z < x) ? (int32_t)0x7fffffff : z);
152}
153
154static INLINE uint32_t _ror(uint32_t x, uint32_t y)
155{
156 return (x << (32 - y)) | (x >> y);
157}
158#endif // __CC_ARM
159
160static INLINE int32_t _abs(int32_t x)
161{
162 return x > 0 ? x : -x;
163}
164
165static INLINE float _minf(float x, float y)
166{
167 return x < y ? x : y;
168}
169
170static INLINE float _maxf(float x, float y)
171{
172 return x > y ? x : y;
173}
174
175#endif // !_VIDEOCORE
176
177
178/******************************************************************************
179misc stuff
180******************************************************************************/
181
182#define ARR_COUNT(ARR) (sizeof(ARR) / sizeof(*(ARR)))
183
184/* sign-extend 16-bit value with range [-0x4000, 0xbfff] */
185static INLINE int32_t s_ext_off16(int32_t x)
186{
187 return ((int32_t)(int16_t)(x - 0x4000)) + 0x4000;
188}
189
190static INLINE bool is_power_of_2(uint32_t x)
191{
192 return (x != 0) && ((x & (x - 1)) == 0);
193}
194
195static INLINE uint32_t next_power_of_2(uint32_t x)
196{
197 return is_power_of_2(x) ? x : (uint32_t)(1 << (_msb(x) + 1));
198}
199
200static INLINE uint32_t round_up(uint32_t x, uint32_t y)
201{
202 vcos_assert(is_power_of_2(y));
203 return (x + (y - 1)) & ~(y - 1);
204}
205
206static INLINE void *round_up_ptr(void *x, uint32_t y)
207{
208 vcos_assert(is_power_of_2(y));
209 return (void *)(((uintptr_t)x + (uintptr_t)(y - 1)) & ~(uintptr_t)(y - 1));
210}
211
212static INLINE uint32_t mod(int32_t x, int32_t y)
213{
214 int32_t m = x % y;
215 return (m < 0) ? (m + y) : m;
216}
217
218extern int khrn_get_type_size(int type /* GLenum*/);
219
220static INLINE int find_max(int count, int size, const void *indices)
221{
222 int i;
223 int32_t max = -1;
224
225 switch (size) {
226 case 1:
227 {
228 uint8_t *u = (uint8_t *)indices;
229
230 for (i = 0; i < count; i++)
231 max = _max( max, (int32_t) u[i]);
232
233 break;
234 }
235 case 2:
236 {
237 uint16_t *u = (uint16_t *)indices;
238
239 for (i = 0; i < count; i++)
240 max = _max( max, (int32_t) u[i]);
241
242 break;
243 }
244 default:
245 UNREACHABLE();
246 break;
247 }
248
249 return (int) max;
250}
251
252/******************************************************************************
253for poking around inside floats (we assume ieee-754)
254******************************************************************************/
255
256typedef union {
257 float f;
258 uint32_t bits;
259} KHRN_FLOAT_BITS_T;
260
261static INLINE uint32_t float_to_bits(float f)
262{
263 KHRN_FLOAT_BITS_T t;
264 t.f = f;
265 return t.bits;
266}
267
268static INLINE float float_from_bits(uint32_t bits)
269{
270 KHRN_FLOAT_BITS_T t;
271 t.bits = bits;
272 return t.f;
273}
274
275/******************************************************************************
276input cleaning stuff
277******************************************************************************/
278
279#include "interface/khronos/common/khrn_int_util_cr.h"
280
281static INLINE void clean_floats(float *dst, const float *src, uint32_t count)
282{
283 uint32_t i;
284 for (i = 0; i != count; ++i) {
285 dst[i] = clean_float(src[i]);
286 }
287}
288
289/******************************************************************************
290float to int conversions
291******************************************************************************/
292
293static INLINE float r2ni_to_r2n_bias(float f, int32_t shift)
294{
295 vcos_assert((shift >= -129) && (shift <= 124));
296 return f + float_from_bits(((127 - (shift + 2)) << 23) | 0x7fffff);
297}
298
299/*
300 convert float to integer value with shift
301 saturating, round to nearest
302
303 on videocore, we support shifts in [-32, 31]. we only need to support shifts
304 of 0 and 16 for client-side code
305*/
306
307static INLINE int32_t float_to_int_shift(float f, int32_t shift)
308{
309#ifdef _VIDEOCORE
310 /* floattouint is wrapping, round to negative infinity. shift should be in [-32, 31] */
311 vcos_assert((shift >= -32) && (shift <= 31));
312 f = r2ni_to_r2n_bias(f, shift);
313 if (f < float_from_bits((1 << 31) | ((127 + (31 - shift)) << 23))) { return 0x80000000; }
314 if (f > float_from_bits(((127 + (30 - shift)) << 23) | 0x7fffff)) { return 0x7fffffff; }
315 return _floattouint(f, shift);
316#else
317 vcos_assert((shift >= 0) && (shift <= 31));
318 f *= (float)(uint32_t)(1 << shift);
319 f += (f < 0.0f) ? -0.49999997f : 0.49999997f; /* assume float -> int conversion is round to zero */
320 if (f < -2.14748365e9f) { return 0x80000000; }
321 if (f > 2.14748352e9f) { return 0x7fffffff; }
322 return (int32_t)f;
323#endif
324}
325
326/*
327 convert float to 48-bit integer value with shift
328 saturating, round to nearest
329
330 this is only supported on videocore. shift should be in [-16, 31]
331*/
332
333#ifdef _VIDEOCORE
334static INLINE int64_t float_to_int48_shift(float f, int32_t shift)
335{
336 /* floattouint is wrapping, round to negative infinity. shift should be in [-32, 31] */
337 vcos_assert((shift >= -16) && (shift <= 31));
338 f = r2ni_to_r2n_bias(f, shift);
339 if (f < float_from_bits((1 << 31) | ((127 + (47 - shift)) << 23))) { return 0xffff800000000000ll; }
340 if (f > float_from_bits(((127 + (46 - shift)) << 23) | 0x7fffff)) { return 0x00007fffffffffffll; }
341 return ((int64_t)(int32_t)_floattouint(f, shift - 16) << 16) | _floattouint(f, shift);
342}
343#endif
344
345/*
346 convert float to integer value
347 saturating, round to nearest
348*/
349
350static INLINE int32_t float_to_int(float f)
351{
352 return float_to_int_shift(f, 0);
353}
354
355/*
356 convert float to integer value
357 saturating, round to negative inf
358*/
359
360static INLINE int32_t float_to_int_floor(float f)
361{
362 /*
363 special-case handling of small negative floats
364 this is so we return -1 for negative denormals (which the vg cts requires)
365 (we shouldn't need this if the fp library/hw properly handle denormals)
366 */
367
368 uint32_t u = float_to_bits(f);
369 if (((u & (1 << 31)) && (u + u)) && (f > -1.0f)) {
370 return -1;
371 }
372
373 f = floorf(f); /* assume float -> int conversion is round to zero */
374 if (f < -2.14748365e9f) { return 0x80000000; }
375 if (f > 2.14748352e9f) { return 0x7fffffff; }
376 return (int32_t)f;
377}
378
379/*
380 convert float to integer value
381 saturating, round to zero
382*/
383
384static INLINE int32_t float_to_int_zero(float f)
385{
386 /* assume float -> int conversion is round to zero */
387 if (f < -2.14748365e9f) { return 0x80000000; }
388 if (f > 2.14748352e9f) { return 0x7fffffff; }
389 return (int32_t)f;
390}
391
392/*
393 convert float to 16.16 fixed point value
394 saturating, round to nearest
395
396 Khronos documentation:
397
398 If a value is so large in magnitude that it cannot be represented with the
399 requested type, then the nearest value representable using the requested type
400 is returned.
401*/
402
403static INLINE int32_t float_to_fixed(float f)
404{
405 return float_to_int_shift(f, 16);
406}
407
408/******************************************************************************
409exact float tests (in case fp library/hw don't handle denormals correctly)
410******************************************************************************/
411
412static INLINE bool floats_identical(float x, float y)
413{
414 return float_to_bits(x) == float_to_bits(y);
415}
416
417static INLINE bool is_zero(float f)
418{
419 uint32_t u = float_to_bits(f);
420 return !(u + u);
421}
422
423static INLINE bool is_le_zero(float f)
424{
425 uint32_t u = float_to_bits(f);
426 return (u & (1 << 31)) || !u;
427}
428
429/******************************************************************************
430alignment stuff
431******************************************************************************/
432
433#ifdef _MSC_VER
434 #define alignof(T) __alignof(T)
435#elif defined(__CC_ARM)
436 #define alignof(T) __alignof__(T)
437#else
438 #define alignof(T) (sizeof(struct { T t; char ch; }) - sizeof(T))
439#endif
440
441/*
442 must use both ALIGNED and ALIGN_TO...
443 ALIGNED(16) int align_me[10];
444 ALIGN_TO(align_me, 16);
445*/
446
447#ifdef _MSC_VER
448 #define ALIGNED(ALIGNMENT) __declspec(align(ALIGNMENT))
449 #define ALIGN_TO(X, ALIGNMENT)
450#elif defined(__GNUC__)
451 #define ALIGNED(ALIGNMENT) __attribute__ ((aligned(ALIGNMENT)))
452 #define ALIGN_TO(X, ALIGNMENT)
453#elif defined(__HIGHC__)
454 #define ALIGNED(ALIGMENT)
455 #define ALIGN_TO(X, ALIGNMENT) pragma Align_to(ALIGNMENT, X)
456#else
457 /* leave undefined (will get error on use) */
458#endif
459
460/******************************************************************************
461range/rect intersect stuff
462******************************************************************************/
463
464extern void khrn_clip_range(
465 int32_t *x0, int32_t *l0,
466 int32_t x1, int32_t l1);
467
468extern void khrn_clip_range2(
469 int32_t *ax0, int32_t *bx0, int32_t *l0,
470 int32_t ax1, int32_t al1,
471 int32_t bx1, int32_t bl1);
472
473extern void khrn_clip_rect(
474 int32_t *x0, int32_t *y0, int32_t *w0, int32_t *h0,
475 int32_t x1, int32_t y1, int32_t w1, int32_t h1);
476
477extern void khrn_clip_rect2(
478 int32_t *ax0, int32_t *ay0, int32_t *bx0, int32_t *by0, int32_t *w0, int32_t *h0,
479 int32_t ax1, int32_t ay1, int32_t aw1, int32_t ah1,
480 int32_t bx1, int32_t by1, int32_t bw1, int32_t bh1);
481
482static INLINE bool khrn_ranges_intersect(
483 int32_t x0, int32_t l0,
484 int32_t x1, int32_t l1)
485{
486 return (x0 < (x1 + l1)) && (x1 < (x0 + l0));
487}
488
489static INLINE bool khrn_rects_intersect(
490 int32_t x0, int32_t y0, int32_t w0, int32_t h0,
491 int32_t x1, int32_t y1, int32_t w1, int32_t h1)
492{
493 return khrn_ranges_intersect(x0, w0, x1, w1) && khrn_ranges_intersect(y0, h0, y1, h1);
494}
495
496/******************************************************************************
497memory barrier
498******************************************************************************/
499
500#ifdef KHRN_SINGLE_THREADED
501 /* everything is done in one thread, no need for barriers */
502 static INLINE void khrn_barrier(void) {}
503#elif defined(_VIDEOCORE)
504 /* don't need a real memory barrier
505 * extern function should do as a compiler barrier, but todo: is there a better way? */
506 extern void khrn_barrier(void);
507#else
508 /* leave undefined (will get error on use) */
509#endif
510
511#endif
512