khrn_int_util.h source code [PiUserland/interface/khronos/common/khrn_int_util.h]

1	/*
2	Copyright (c) 2012, Broadcom Europe Ltd
3	All rights reserved.
4
5	Redistribution and use in source and binary forms, with or without
6	modification, are permitted provided that the following conditions are met:
7	* Redistributions of source code must retain the above copyright
8	notice, this list of conditions and the following disclaimer.
9	* Redistributions in binary form must reproduce the above copyright
10	notice, this list of conditions and the following disclaimer in the
11	documentation and/or other materials provided with the distribution.
12	* Neither the name of the copyright holder nor the
13	names of its contributors may be used to endorse or promote products
14	derived from this software without specific prior written permission.
15
16	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17	ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18	WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19	DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20	DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21	(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22	LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23	ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25	SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26	*/
27
28	#ifndef KHRN_INT_UTIL_H
29	#define KHRN_INT_UTIL_H
30
31	#include <ctype.h>
32	#include <float.h>
33	#include <math.h>
34
35	#include "interface/khronos/common/khrn_int_common.h"
36	#if !defined(__VIDEOCORE__) && !defined(WIN32) // threadsx/nucleus define LONG which clashses
37	#include "interface/vcos/vcos.h"
38	#endif
39
40	/******************************************************************************
41	replacements for videocore intrinsics
42	******************************************************************************/
43
44	#ifdef _VIDEOCORE
45	#include <vc/intrinsics.h>
46	#define _minf(x,y) _min((float)(x),(float)(y))
47	#define _maxf(x,y) _max((float)(x),(float)(y))
48	#else
49	static INLINE int32_t _bmask(int32_t x, int32_t y)
50	{
51	return x & ((`1` << (y & `0x1f`)) - `1`);
52	}
53
54	static INLINE int32_t _min(int32_t x, int32_t y)
55	{
56	return x < y ? x : y;
57	}
58
59	static INLINE int32_t _max(int32_t x, int32_t y)
60	{
61	return x > y ? x : y;
62	}
63
64	#if defined(_MSC_VER)
65	static INLINE int32_t _msb(uint32_t x)
66	{
67	int32_t l = -`1`;
68
69	if (x)
70	__asm {
71	bsr eax, x
72	mov l, eax
73	}
74
75	return l;
76	}
77	#elif defined __CC_ARM
78	static INLINE int32_t _msb(uint32_t x)
79	{
80	return `31` - __clz(x);
81	}
82	#elif defined(__GNUC__)
83	static INLINE int32_t _msb(uint32_t x)
84	{
85	return x ? (`31` - __builtin_clz(x)) : -`1`;
86	}
87	#else
88	static INLINE int32_t _msb(uint32_t x) / unsigned to get lsr /
89	{
90	int32_t msb = -`1`;
91	while (x != `0`) {
92	++msb;
93	x >>= `1`;
94	}
95	return msb;
96	}
97	#endif
98
99	static INLINE uint32_t _count(uint32_t x)
100	{
101	uint32_t count = `0`;
102	while (x != `0`) {
103	x &= x - `1`;
104	++count;
105	}
106	return count;
107	}
108
109	#if defined __CC_ARM && __TARGET_ARCH_THUMB >= 4
110	static INLINE uint32_t _bitrev(uint32_t x, uint32_t y)
111	{
112	return __rbit(x) >> (`32`-y);
113	}
114	#else
115	static INLINE uint32_t _bitrev(uint32_t x, uint32_t y)
116	{
117	uint32_t bitrev = `0`;
118	uint32_t i;
119	for (i = `0`; i != y; ++i) {
120	bitrev \|= ((x >> i) & `1`) << (y - i - `1`);
121	}
122	return bitrev;
123	}
124	#endif
125
126	#ifdef __CC_ARM
127	static INLINE int32_t _adds(int32_t x, int32_t y)
128	{
129	return __qadd(x, y);
130	}
131
132	static INLINE int32_t _subs(int32_t x, int32_t y)
133	{
134	return __qsub(x, y);
135	}
136
137	static INLINE uint32_t _ror(uint32_t x, uint32_t y)
138	{
139	return __ror(x, y);
140	}
141	#else
142	static INLINE int32_t _adds(int32_t x, int32_t y)
143	{
144	int32_t z = x + y;
145	return (y > `0`) ? ((z < x) ? (int32_t)`0x7fffffff` : z) : ((z > x) ? (int32_t)`0x80000000` : z);
146	}
147
148	static INLINE int32_t _subs(int32_t x, int32_t y)
149	{
150	int32_t z = x - y;
151	return (y > `0`) ? ((z > x) ? (int32_t)`0x80000000` : z) : ((z < x) ? (int32_t)`0x7fffffff` : z);
152	}
153
154	static INLINE uint32_t _ror(uint32_t x, uint32_t y)
155	{
156	return (x << (`32` - y)) \| (x >> y);
157	}
158	#endif // __CC_ARM
159
160	static INLINE int32_t _abs(int32_t x)
161	{
162	return x > `0` ? x : -x;
163	}
164
165	static INLINE float _minf(float x, float y)
166	{
167	return x < y ? x : y;
168	}
169
170	static INLINE float _maxf(float x, float y)
171	{
172	return x > y ? x : y;
173	}
174
175	#endif // !_VIDEOCORE
176
177
178	/******************************************************************************
179	misc stuff
180	******************************************************************************/
181
182	#define ARR_COUNT(ARR) (sizeof(ARR) / sizeof(*(ARR)))
183
184	/ sign-extend 16-bit value with range [-0x4000, 0xbfff] /
185	static INLINE int32_t s_ext_off16(int32_t x)
186	{
187	return ((int32_t)(int16_t)(x - `0x4000`)) + `0x4000`;
188	}
189
190	static INLINE bool is_power_of_2(uint32_t x)
191	{
192	return (x != `0`) && ((x & (x - `1`)) == `0`);
193	}
194
195	static INLINE uint32_t next_power_of_2(uint32_t x)
196	{
197	return is_power_of_2(x) ? x : (uint32_t)(`1` << (_msb(x) + `1`));
198	}
199
200	static INLINE uint32_t round_up(uint32_t x, uint32_t y)
201	{
202	vcos_assert(is_power_of_2(y));
203	return (x + (y - `1`)) & ~(y - `1`);
204	}
205
206	static INLINE void round_up_ptr(void* *x, uint32_t y)
207	{
208	vcos_assert(is_power_of_2(y));
209	return (void *)(((uintptr_t)x + (uintptr_t)(y - `1`)) & ~(uintptr_t)(y - `1`));
210	}
211
212	static INLINE uint32_t mod(int32_t x, int32_t y)
213	{
214	int32_t m = x % y;
215	return (m < `0`) ? (m + y) : m;
216	}
217
218	extern int khrn_get_type_size(int type / GLenum/);
219
220	static INLINE int find_max(int count, int size, const void *indices)
221	{
222	int i;
223	int32_t max = -`1`;
224
225	switch (size) {
226	case `1`:
227	{
228	uint8_t u = (uint8_t )indices;
229
230	for (i = `0`; i < count; i++)
231	max = _max( max, (int32_t) u[i]);
232
233	break;
234	}
235	case `2`:
236	{
237	uint16_t u = (uint16_t )indices;
238
239	for (i = `0`; i < count; i++)
240	max = _max( max, (int32_t) u[i]);
241
242	break;
243	}
244	default:
245	UNREACHABLE();
246	break;
247	}
248
249	return (int) max;
250	}
251
252	/******************************************************************************
253	for poking around inside floats (we assume ieee-754)
254	******************************************************************************/
255
256	typedef union {
257	float f;
258	uint32_t bits;
259	} KHRN_FLOAT_BITS_T;
260
261	static INLINE uint32_t float_to_bits(float f)
262	{
263	KHRN_FLOAT_BITS_T t;
264	t.f = f;
265	return t.bits;
266	}
267
268	static INLINE float float_from_bits(uint32_t bits)
269	{
270	KHRN_FLOAT_BITS_T t;
271	t.bits = bits;
272	return t.f;
273	}
274
275	/******************************************************************************
276	input cleaning stuff
277	******************************************************************************/
278
279	#include "interface/khronos/common/khrn_int_util_cr.h"
280
281	static INLINE void clean_floats(float dst, const* float *src, uint32_t count)
282	{
283	uint32_t i;
284	for (i = `0`; i != count; ++i) {
285	dst[i] = clean_float(src[i]);
286	}
287	}
288
289	/******************************************************************************
290	float to int conversions
291	******************************************************************************/
292
293	static INLINE float r2ni_to_r2n_bias(float f, int32_t shift)
294	{
295	vcos_assert((shift >= -`129`) && (shift <= `124`));
296	return f + float_from_bits(((`127` - (shift + `2`)) << `23`) \| `0x7fffff`);
297	}
298
299	/*
300	convert float to integer value with shift
301	saturating, round to nearest
302
303	on videocore, we support shifts in [-32, 31]. we only need to support shifts
304	of 0 and 16 for client-side code
305	*/
306
307	static INLINE int32_t float_to_int_shift(float f, int32_t shift)
308	{
309	#ifdef _VIDEOCORE
310	/ floattouint is wrapping, round to negative infinity. shift should be in [-32, 31] /
311	vcos_assert((shift >= -`32`) && (shift <= `31`));
312	f = r2ni_to_r2n_bias(f, shift);
313	if (f < float_from_bits((`1` << `31`) \| ((`127` + (`31` - shift)) << `23`))) { return `0x80000000`; }
314	if (f > float_from_bits(((`127` + (`30` - shift)) << `23`) \| `0x7fffff`)) { return `0x7fffffff`; }
315	return _floattouint(f, shift);
316	#else
317	vcos_assert((shift >= `0`) && (shift <= `31`));
318	f = (float*)(uint32_t)(`1` << shift);
319	f += (f < `0.0f`) ? -`0.49999997f` : `0.49999997f`; / assume float -> int conversion is round to zero /
320	if (f < -`2.14748365e9f`) { return `0x80000000`; }
321	if (f > `2.14748352e9f`) { return `0x7fffffff`; }
322	return (int32_t)f;
323	#endif
324	}
325
326	/*
327	convert float to 48-bit integer value with shift
328	saturating, round to nearest
329
330	this is only supported on videocore. shift should be in [-16, 31]
331	*/
332
333	#ifdef _VIDEOCORE
334	static INLINE int64_t float_to_int48_shift(float f, int32_t shift)
335	{
336	/ floattouint is wrapping, round to negative infinity. shift should be in [-32, 31] /
337	vcos_assert((shift >= -`16`) && (shift <= `31`));
338	f = r2ni_to_r2n_bias(f, shift);
339	if (f < float_from_bits((`1` << `31`) \| ((`127` + (`47` - shift)) << `23`))) { return `0xffff800000000000ll`; }
340	if (f > float_from_bits(((`127` + (`46` - shift)) << `23`) \| `0x7fffff`)) { return `0x00007fffffffffffll`; }
341	return ((int64_t)(int32_t)_floattouint(f, shift - `16`) << `16`) \| _floattouint(f, shift);
342	}
343	#endif
344
345	/*
346	convert float to integer value
347	saturating, round to nearest
348	*/
349
350	static INLINE int32_t float_to_int(float f)
351	{
352	return float_to_int_shift(f, `0`);
353	}
354
355	/*
356	convert float to integer value
357	saturating, round to negative inf
358	*/
359
360	static INLINE int32_t float_to_int_floor(float f)
361	{
362	/*
363	special-case handling of small negative floats
364	this is so we return -1 for negative denormals (which the vg cts requires)
365	(we shouldn't need this if the fp library/hw properly handle denormals)
366	*/
367
368	uint32_t u = float_to_bits(f);
369	if (((u & (`1` << `31`)) && (u + u)) && (f > -`1.0f`)) {
370	return -`1`;
371	}
372
373	f = floorf(f); / assume float -> int conversion is round to zero /
374	if (f < -`2.14748365e9f`) { return `0x80000000`; }
375	if (f > `2.14748352e9f`) { return `0x7fffffff`; }
376	return (int32_t)f;
377	}
378
379	/*
380	convert float to integer value
381	saturating, round to zero
382	*/
383
384	static INLINE int32_t float_to_int_zero(float f)
385	{
386	/ assume float -> int conversion is round to zero /
387	if (f < -`2.14748365e9f`) { return `0x80000000`; }
388	if (f > `2.14748352e9f`) { return `0x7fffffff`; }
389	return (int32_t)f;
390	}
391
392	/*
393	convert float to 16.16 fixed point value
394	saturating, round to nearest
395
396	Khronos documentation:
397
398	If a value is so large in magnitude that it cannot be represented with the
399	requested type, then the nearest value representable using the requested type
400	is returned.
401	*/
402
403	static INLINE int32_t float_to_fixed(float f)
404	{
405	return float_to_int_shift(f, `16`);
406	}
407
408	/******************************************************************************
409	exact float tests (in case fp library/hw don't handle denormals correctly)
410	******************************************************************************/
411
412	static INLINE bool floats_identical(float x, float y)
413	{
414	return float_to_bits(x) == float_to_bits(y);
415	}
416
417	static INLINE bool is_zero(float f)
418	{
419	uint32_t u = float_to_bits(f);
420	return !(u + u);
421	}
422
423	static INLINE bool is_le_zero(float f)
424	{
425	uint32_t u = float_to_bits(f);
426	return (u & (`1` << `31`)) \|\| !u;
427	}
428
429	/******************************************************************************
430	alignment stuff
431	******************************************************************************/
432
433	#ifdef _MSC_VER
434	#define alignof(T) __alignof(T)
435	#elif defined(__CC_ARM)
436	#define alignof(T) __alignof__(T)
437	#else
438	#define alignof(T) (sizeof(struct { T t; char ch; }) - sizeof(T))
439	#endif
440
441	/*
442	must use both ALIGNED and ALIGN_TO...
443	ALIGNED(16) int align_me[10];
444	ALIGN_TO(align_me, 16);
445	*/
446
447	#ifdef _MSC_VER
448	#define ALIGNED(ALIGNMENT) __declspec(align(ALIGNMENT))
449	#define ALIGN_TO(X, ALIGNMENT)
450	#elif defined(__GNUC__)
451	#define ALIGNED(ALIGNMENT) __attribute__ ((aligned(ALIGNMENT)))
452	#define ALIGN_TO(X, ALIGNMENT)
453	#elif defined(__HIGHC__)
454	#define ALIGNED(ALIGMENT)
455	#define ALIGN_TO(X, ALIGNMENT) pragma Align_to(ALIGNMENT, X)
456	#else
457	/ leave undefined (will get error on use) /
458	#endif
459
460	/******************************************************************************
461	range/rect intersect stuff
462	******************************************************************************/
463
464	extern void khrn_clip_range(
465	int32_t x0, int32_t l0,
466	int32_t x1, int32_t l1);
467
468	extern void khrn_clip_range2(
469	int32_t ax0, int32_t bx0, int32_t *l0,
470	int32_t ax1, int32_t al1,
471	int32_t bx1, int32_t bl1);
472
473	extern void khrn_clip_rect(
474	int32_t x0, int32_t y0, int32_t w0, int32_t h0,
475	int32_t x1, int32_t y1, int32_t w1, int32_t h1);
476
477	extern void khrn_clip_rect2(
478	int32_t ax0, int32_t ay0, int32_t bx0, int32_t by0, int32_t w0, int32_t h0,
479	int32_t ax1, int32_t ay1, int32_t aw1, int32_t ah1,
480	int32_t bx1, int32_t by1, int32_t bw1, int32_t bh1);
481
482	static INLINE bool khrn_ranges_intersect(
483	int32_t x0, int32_t l0,
484	int32_t x1, int32_t l1)
485	{
486	return (x0 < (x1 + l1)) && (x1 < (x0 + l0));
487	}
488
489	static INLINE bool khrn_rects_intersect(
490	int32_t x0, int32_t y0, int32_t w0, int32_t h0,
491	int32_t x1, int32_t y1, int32_t w1, int32_t h1)
492	{
493	return khrn_ranges_intersect(x0, w0, x1, w1) && khrn_ranges_intersect(y0, h0, y1, h1);
494	}
495
496	/******************************************************************************
497	memory barrier
498	******************************************************************************/
499
500	#ifdef KHRN_SINGLE_THREADED
501	/ everything is done in one thread, no need for barriers /
502	static INLINE void khrn_barrier(void) {}
503	#elif defined(_VIDEOCORE)
504	/ don't need a real memory barrier*
505	* extern function should do as a compiler barrier, but todo: is there a better way? */
506	extern void khrn_barrier(void);
507	#else
508	/ leave undefined (will get error on use) /
509	#endif
510
511	#endif
512

Browse the source code of PiUserland/interface/khronos/common/khrn_int_util.h