blsupport_p.h source code [Blend2d/src/blend2d/blsupport_p.h]

1	// [Blend2D]
2	// 2D Vector Graphics Powered by a JIT Compiler.
3	//
4	// [License]
5	// Zlib - See LICENSE.md file in the package.
6
7	#ifndef BLEND2D_BLSUPPORT_P_H
8	#define BLEND2D_BLSUPPORT_P_H
9
10	#include "./blapi-internal_p.h"
11
12	//! \cond INTERNAL
13	//! \addtogroup blend2d_internal
14	//! \{
15
16	// ============================================================================
17	// [StdInt]
18	// ============================================================================
19
20	template<typename T>
21	static constexpr bool blIsUnsigned() noexcept { return std::is_unsigned<T>::value; }
22
23	//! Cast an integer `x` to a fixed-width type as defined by <stdint.h>
24	//!
25	//! This can help when specializing some functions for a particular type. Since
26	//! some C/C++ types may overlap (like `long` vs `long long`) it's easier to
27	//! just cast to a type as defined by <stdint.h> and specialize for it.
28	template<typename T>
29	static constexpr typename BLInternal::StdInt<sizeof(T), blIsUnsigned<T>()>::Type blAsStdInt(T x) noexcept {
30	return (typename BLInternal::StdInt<sizeof(T), blIsUnsigned<T>()>::Type)x;
31	}
32
33	//! Cast an integer `x` to a fixed-width unsigned type as defined by <stdint.h>
34	template<typename T>
35	static constexpr typename BLInternal::StdInt<sizeof(T), `1`>::Type blAsStdUInt(T x) noexcept {
36	return (typename BLInternal::StdInt<sizeof(T), `1`>::Type)x;
37	}
38
39	//! Cast an integer `x` to either `int32_t`, uint32_t`, `int64_t`, or `uint64_t`.
40	//!
41	//! Used to keep a signedness of `T`, but to promote it to at least 32-bit type.
42	template<typename T>
43	static constexpr typename BLInternal::StdInt<blMax<size_t>(sizeof(T), `4`), blIsUnsigned<T>()>::Type blAsIntLeast32(T x) noexcept {
44	typedef typename BLInternal::StdInt<blMax<size_t>(sizeof(T), `4`), blIsUnsigned<T>()>::Type Result;
45	return Result(x);
46	}
47
48	//! Cast an integer `x` to either `uint32_t` or `uint64_t`.
49	template<typename T>
50	static constexpr typename BLInternal::StdInt<blMax<size_t>(sizeof(T), `4`), `1`>::Type blAsUIntLeast32(T x) noexcept {
51	typedef typename BLInternal::StdInt<blMax<size_t>(sizeof(T), `4`), `1`>::Type Result;
52	typedef typename std::make_unsigned<T>::type U;
53	return Result(U(x));
54	}
55
56	// ============================================================================
57	// [MisalignedInt]
58	// ============================================================================
59
60	static const constexpr bool BL_UNALIGNED_IO_16 = BL_TARGET_ARCH_X86 != `0`;
61	static const constexpr bool BL_UNALIGNED_IO_32 = BL_TARGET_ARCH_X86 != `0`;
62	static const constexpr bool BL_UNALIGNED_IO_64 = BL_TARGET_ARCH_X86 != `0`;
63
64	// Type alignment (not allowed by C++11 'alignas' keyword).
65	#if defined(__GNUC__)
66	#define BL_MISALIGN_TYPE(TYPE, N) __attribute__((__aligned__(N))) TYPE
67	#elif defined(_MSC_VER)
68	#define BL_MISALIGN_TYPE(TYPE, N) __declspec(align(N)) TYPE
69	#else
70	#define BL_MISALIGN_TYPE(TYPE, N) TYPE
71	#endif
72
73	//! An integer type that has possibly less alignment than its size.
74	template<typename T, size_t Alignment>
75	struct BLMisalignedUInt {};
76
77	template<> struct BLMisalignedUInt<uint8_t, `1`> { typedef uint8_t T; };
78	template<> struct BLMisalignedUInt<uint16_t, `1`> { typedef uint16_t BL_MISALIGN_TYPE(T, `1`); };
79	template<> struct BLMisalignedUInt<uint16_t, `2`> { typedef uint16_t T; };
80	template<> struct BLMisalignedUInt<uint32_t, `1`> { typedef uint32_t BL_MISALIGN_TYPE(T, `1`); };
81	template<> struct BLMisalignedUInt<uint32_t, `2`> { typedef uint32_t BL_MISALIGN_TYPE(T, `2`); };
82	template<> struct BLMisalignedUInt<uint32_t, `4`> { typedef uint32_t T; };
83	template<> struct BLMisalignedUInt<uint64_t, `1`> { typedef uint64_t BL_MISALIGN_TYPE(T, `1`); };
84	template<> struct BLMisalignedUInt<uint64_t, `2`> { typedef uint64_t BL_MISALIGN_TYPE(T, `2`); };
85	template<> struct BLMisalignedUInt<uint64_t, `4`> { typedef uint64_t BL_MISALIGN_TYPE(T, `4`); };
86	template<> struct BLMisalignedUInt<uint64_t, `8`> { typedef uint64_t T; };
87
88	#undef BL_MISALIGN_TYPE
89
90	// ============================================================================
91	// [Numeric Limits]
92	// ============================================================================
93
94	template<typename T> static constexpr T blInf() noexcept { return std::numeric_limits<T>::infinity(); }
95	template<typename T> static constexpr T blNaN() noexcept { return std::numeric_limits<T>::quiet_NaN(); }
96	template<typename T> static constexpr T blMinValue() noexcept { return std::numeric_limits<T>::lowest(); }
97	template<typename T> static constexpr T blMaxValue() noexcept { return std::numeric_limits<T>::max(); }
98
99	// ============================================================================
100	// [Bit Utilities]
101	// ============================================================================
102
103	namespace {
104
105	template<typename T>
106	constexpr T blBitOnes() noexcept { return T(~T(`0`)); }
107
108	//! Returns `0 - x` in a safe way (no undefined behavior), works for both signed and unsigned numbers.
109	template<typename T>
110	constexpr T blNegate(const T& x) noexcept {
111	typedef typename std::make_unsigned<T>::type U;
112	return T(U(`0`) - U(x));
113	}
114
115	template<typename T>
116	constexpr uint32_t blBitSizeOf() noexcept { return uint32_t(sizeof(T) * `8u`); }
117
118	template<typename T>
119	constexpr size_t blBitWordCountFromBitCount(size_t nBits) noexcept {
120	return (nBits + blBitSizeOf<T>() - `1`) / blBitSizeOf<T>();
121	}
122
123	//! Bit-cast `x` of `In` type to the given `Out` type.
124	//!
125	//! Useful to bit-cast between integers and floating points. The size of `Out`
126	//! and `In` must be the same otherwise the compilation would fail. Bit casting
127	//! is used by `blEquals` to implement bit equality for floating point types.
128	template<typename Out, typename In>
129	BL_INLINE Out blBitCast(const In& x) noexcept {
130	static_assert(sizeof(Out) == sizeof(In), "The size of In and Out must match");
131	union { In in; Out out; } u;
132	u.in = x;
133	return u.out;
134	}
135
136	//! Returns `x << y` (shift left logical) by explicitly casting `x` to an unsigned type and back.
137	template<typename X, typename Y>
138	constexpr X blBitShl(const X& x, const Y& y) noexcept {
139	typedef typename std::make_unsigned<X>::type U;
140	return X(U(x) << y);
141	}
142
143	//! Returns `x >> y` (shift right logical) by explicitly casting `x` to an unsigned type and back.
144	template<typename X, typename Y>
145	constexpr X blBitShr(const X& x, const Y& y) noexcept {
146	typedef typename std::make_unsigned<X>::type U;
147	return X(U(x) >> y);
148	}
149
150	//! Returns `x >> y` (shift right arithmetic) by explicitly casting `x` to a signed type and back.
151	template<typename X, typename Y>
152	constexpr X blBitSar(const X& x, const Y& y) noexcept {
153	typedef typename std::make_signed<X>::type S;
154	return X(S(x) >> y);
155	}
156
157	template<typename T>
158	BL_INLINE T blBitRolImpl(const T& x, unsigned n) noexcept {
159	return blBitShl(x, n % unsigned(sizeof(T) * `8u`)) \| blBitShr(x, (`0u` - n) % unsigned(sizeof(T) * `8u`)) ;
160	}
161
162	template<typename T>
163	BL_INLINE T blBitRorImpl(const T& x, unsigned n) noexcept {
164	return blBitShr(x, n % unsigned(sizeof(T) * `8u`)) \| blBitShl(x, (`0u` - n) % unsigned(sizeof(T) * `8u`)) ;
165	}
166
167	// MSVC is unable to emit `rol\|ror` instruction when `n` is not constant so we
168	// have to help it a bit. This, however, prevents us from using `constexpr`.
169	#if defined(_MSC_VER)
170	template<> BL_INLINE uint8_t blBitRolImpl(const uint8_t& x, unsigned n) noexcept { return uint8_t(_rotl8(x, uint8_t(n))); }
171	template<> BL_INLINE uint8_t blBitRorImpl(const uint8_t& x, unsigned n) noexcept { return uint8_t(_rotr8(x, uint8_t(n))); }
172	template<> BL_INLINE uint16_t blBitRolImpl(const uint16_t& x, unsigned n) noexcept { return uint16_t(_rotl16(x, uint8_t(n))); }
173	template<> BL_INLINE uint16_t blBitRorImpl(const uint16_t& x, unsigned n) noexcept { return uint16_t(_rotr16(x, uint8_t(n))); }
174	template<> BL_INLINE uint32_t blBitRolImpl(const uint32_t& x, unsigned n) noexcept { return uint32_t(_rotl(x, int(n))); }
175	template<> BL_INLINE uint32_t blBitRorImpl(const uint32_t& x, unsigned n) noexcept { return uint32_t(_rotr(x, int(n))); }
176	template<> BL_INLINE uint64_t blBitRolImpl(const uint64_t& x, unsigned n) noexcept { return uint64_t(_rotl64(x, int(n))); }
177	template<> BL_INLINE uint64_t blBitRorImpl(const uint64_t& x, unsigned n) noexcept { return uint64_t(_rotr64(x, int(n))); }
178	#endif
179
180	template<typename X, typename Y>
181	BL_INLINE X blBitRol(const X& x, const Y& n) noexcept { return X(blBitRolImpl(blAsUIntLeast32(x), unsigned(n))); }
182
183	template<typename X, typename Y>
184	BL_INLINE X blBitRor(const X& x, const Y& n) noexcept { return X(blBitRorImpl(blAsUIntLeast32(x), unsigned(n))); }
185
186	//! Returns `x \| (x >> y)` - helper used by some bit manipulation helpers.
187	template<typename X, typename Y>
188	constexpr X blBitShrOr(const X& x, const Y& y) noexcept { return X(x \| blBitShr(x, y)); }
189
190	template<typename X, typename Y, typename... Args>
191	constexpr X blBitShrOr(const X& x, const Y& y, Args... args) noexcept { return blBitShrOr(blBitShrOr(x, y), args...); }
192
193	//! Fill all trailing bits right from the first most significant bit set.
194	template<typename T>
195	constexpr T blFillTrailingBits(const T& x) noexcept {
196	typedef typename BLInternal::StdInt<sizeof(T), `1`>::Type U;
197	return T(blFillTrailingBits(U(x)));
198	}
199
200	template<> constexpr uint8_t blFillTrailingBits(const uint8_t& x) noexcept { return blBitShrOr(x, `1`, `2`, `4`); }
201	template<> constexpr uint16_t blFillTrailingBits(const uint16_t& x) noexcept { return blBitShrOr(x, `1`, `2`, `4`, `8`); }
202	template<> constexpr uint32_t blFillTrailingBits(const uint32_t& x) noexcept { return blBitShrOr(x, `1`, `2`, `4`, `8`, `16`); }
203	template<> constexpr uint64_t blFillTrailingBits(const uint64_t& x) noexcept { return blBitShrOr(x, `1`, `2`, `4`, `8`, `16`, `32`); }
204
205	//! Return a bit-mask that has `x` bit set.
206	template<typename T, typename Arg>
207	constexpr T blBitMask(Arg x) noexcept { return T(T(`1u`) << x); }
208
209	//! Return a bit-mask that has `x` bit set (multiple arguments).
210	template<typename T, typename Arg, typename... Args>
211	constexpr T blBitMask(Arg x, Args... args) noexcept { return T(blBitMask<T>(x) \| blBitMask<T>(args...)); }
212
213	//! Returns a bit-mask where all bits are set if the given value `x` is 1, or
214	//! zero otherwise. Please note that `x` must be either 0 or 1, all other
215	//! values will produce invalid output.
216	template<typename T, typename B>
217	constexpr T blBitMaskFromBool(const B& x) noexcept { return blNegate(T(x)); }
218
219	//! Tests whether `x` has `n`th bit set.
220	template<typename T, typename I>
221	constexpr bool blBitTest(const T& x, const I& i) noexcept {
222	typedef typename std::make_unsigned<T>::type U;
223	return (U(x) & (U(`1`) << i)) != `0`;
224	}
225
226	//! Tests whether bits specified by `y` are all set in `x`.
227	template<typename X, typename Y>
228	constexpr bool blBitMatch(const X& x, const Y& y) noexcept { return (x & y) == y; }
229
230	constexpr uint32_t blBitCtzFallback(uint32_t xAndNegX) noexcept {
231	return `31` - ((xAndNegX & `0x0000FFFFu`) ? `16` : `0`)
232	- ((xAndNegX & `0x00FF00FFu`) ? `8` : `0`)
233	- ((xAndNegX & `0x0F0F0F0Fu`) ? `4` : `0`)
234	- ((xAndNegX & `0x33333333u`) ? `2` : `0`)
235	- ((xAndNegX & `0x55555555u`) ? `1` : `0`);
236	}
237
238	constexpr uint32_t blBitCtzFallback(uint64_t xAndNegX) noexcept {
239	return `63` - ((xAndNegX & `0x00000000FFFFFFFFu`) ? `32` : `0`)
240	- ((xAndNegX & `0x0000FFFF0000FFFFu`) ? `16` : `0`)
241	- ((xAndNegX & `0x00FF00FF00FF00FFu`) ? `8` : `0`)
242	- ((xAndNegX & `0x0F0F0F0F0F0F0F0Fu`) ? `4` : `0`)
243	- ((xAndNegX & `0x3333333333333333u`) ? `2` : `0`)
244	- ((xAndNegX & `0x5555555555555555u`) ? `1` : `0`);
245	}
246
247	template<typename T>
248	constexpr uint32_t blBitCtzStatic(const T& x) noexcept { return blBitCtzFallback(blAsUIntLeast32(x) & blNegate(blAsUIntLeast32(x))); }
249
250	template<typename T>
251	BL_INLINE uint32_t blBitCtzImpl(const T& x) noexcept { return blBitCtzStatic(x); }
252
253	#if !defined(BL_BUILD_NO_INTRINSICS)
254	# if defined(__GNUC__)
255	template<> BL_INLINE uint32_t blBitCtzImpl(const uint32_t& x) noexcept { return uint32_t(__builtin_ctz(x)); }
256	template<> BL_INLINE uint32_t blBitCtzImpl(const uint64_t& x) noexcept { return uint32_t(__builtin_ctzll(x)); }
257	# elif defined(_MSC_VER)
258	template<> BL_INLINE uint32_t blBitCtzImpl(const uint32_t& x) noexcept { unsigned long i; _BitScanForward(&i, x); return uint32_t(i); }
259	# if BL_TARGET_ARCH_X86 == 64 \|\| BL_TARGET_ARCH_ARM == 64
260	template<> BL_INLINE uint32_t blBitCtzImpl(const uint64_t& x) noexcept { unsigned long i; _BitScanForward64(&i, x); return uint32_t(i); }
261	# endif
262	# endif
263	#endif
264
265	//! Count trailing zeros in `x` (returns a position of a first bit set in `x`).
266	//!
267	//! \note The input MUST NOT be zero, otherwise the result is undefined.
268	template<typename T>
269	static BL_INLINE uint32_t blBitCtz(T x) noexcept { return blBitCtzImpl(blAsUIntLeast32(x)); }
270
271	//! Generate a trailing bit-mask that has `n` least significant (trailing) bits set.
272	template<typename T, typename N>
273	constexpr T blTrailingBitMask(const N& n) noexcept {
274	typedef typename std::make_unsigned<T>::type U;
275	return (sizeof(U) < sizeof(uintptr_t))
276	? T(U((uintptr_t(`1`) << n) - uintptr_t(`1`)))
277	// Shifting more bits than the type provides is UNDEFINED BEHAVIOR.
278	// In such case we trash the result by ORing it with a mask that has
279	// all bits set and discards the UNDEFINED RESULT of the shift.
280	: T(((U(`1`) << n) - U(`1u`)) \| blNegate(U(n >= N(blBitSizeOf<T>()))));
281	}
282
283	template<typename T>
284	constexpr bool blIsBitMaskConsecutive(const T& x) noexcept {
285	typedef typename std::make_unsigned<T>::type U;
286	return x != `0` && (U(x) ^ (U(x) + (U(x) & (~U(x) + `1u`)))) >= U(x);
287	}
288
289	template<typename T>
290	constexpr uint32_t blBitShiftOf(const T& x) noexcept { return blBitCtzStatic(x); }
291
292	} // {anonymous}
293
294	// ============================================================================
295	// [ByteSwap]
296	// ============================================================================
297
298	namespace {
299
300	template<typename T>
301	static BL_INLINE T blByteSwap32(const T& x) noexcept {
302	#if defined(__GNUC__) && !defined(BL_BUILD_NO_INTRINSICS)
303	return T(uint32_t(__builtin_bswap32(uint32_t(x))));
304	#elif defined(_MSC_VER) && !defined(BL_BUILD_NO_INTRINSICS)
305	return T(uint32_t(_byteswap_ulong(uint32_t(x))));
306	#else
307	return T((uint32_t(x) << `24`) \| (uint32_t(x) >> `24`) \| ((uint32_t(x) << `8`) & `0x00FF0000u`) \| ((uint32_t(x) >> `8`) & `0x0000FF00`));
308	#endif
309	}
310
311	template<typename T>
312	static BL_INLINE T blByteSwap24(const T& x) noexcept {
313	// This produces always much better code than trying to do a real 24-bit byteswap.
314	return T(blByteSwap32(uint32_t(x)) >> `8`);
315	}
316
317	template<typename T>
318	static BL_INLINE T blByteSwap16(const T& x) noexcept {
319	#if defined(_MSC_VER) && !defined(BL_BUILD_NO_INTRINSICS)
320	return T(uint16_t(_byteswap_ushort(uint16_t(x))));
321	#else
322	return T((uint16_t(x) << `8`) \| (uint16_t(x) >> `8`));
323	#endif
324	}
325
326	template<typename T>
327	static BL_INLINE T blByteSwap64(const T& x) noexcept {
328	#if defined(__GNUC__) && !defined(BL_BUILD_NO_INTRINSICS)
329	return T(uint64_t(__builtin_bswap64(uint64_t(x))));
330	#elif defined(_MSC_VER) && !defined(BL_BUILD_NO_INTRINSICS)
331	return T(uint64_t(_byteswap_uint64(uint64_t(x))));
332	#else
333	return T( (uint64_t(blByteSwap32(uint32_t(uint64_t(x) >> `32` ))) ) \|
334	(uint64_t(blByteSwap32(uint32_t(uint64_t(x) & `0xFFFFFFFFu`))) << `32`) );
335	#endif
336	}
337
338	// TODO: [GLOBAL] REMOVE?
339	template<typename T> static BL_INLINE T blByteSwap16LE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_LE ? T(x) : T(blByteSwap16(int16_t(x))); }
340	template<typename T> static BL_INLINE T blByteSwap24LE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_LE ? T(x) : T(blByteSwap24(uint32_t(x))); }
341	template<typename T> static BL_INLINE T blByteSwap32LE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_LE ? T(x) : T(blByteSwap32(uint32_t(x))); }
342	template<typename T> static BL_INLINE T blByteSwap64LE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_LE ? T(x) : T(blByteSwap64(uint64_t(x))); }
343
344	template<typename T> static BL_INLINE T blByteSwap16BE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_BE ? T(x) : T(blByteSwap16(uint16_t(x))); }
345	template<typename T> static BL_INLINE T blByteSwap24BE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_BE ? T(x) : T(blByteSwap24(uint32_t(x))); }
346	template<typename T> static BL_INLINE T blByteSwap32BE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_BE ? T(x) : T(blByteSwap32(uint32_t(x))); }
347	template<typename T> static BL_INLINE T blByteSwap64BE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_BE ? T(x) : T(blByteSwap64(uint64_t(x))); }
348
349	} // {anonymous}
350
351	// ============================================================================
352	// [Alignment]
353	// ============================================================================
354
355	template<typename X, typename Y>
356	static constexpr bool blIsAligned(const X& base, const Y& alignment) noexcept {
357	typedef typename BLInternal::StdInt<sizeof(X), `1`>::Type U;
358	return ((U)base % (U)alignment) == `0`;
359	}
360
361	//! Tests whether the `x` is a power of two (only one bit is set).
362	template<typename T>
363	static constexpr bool blIsPowerOf2(const T& x) noexcept {
364	typedef typename std::make_unsigned<T>::type U;
365	return x && !(U(x) & (U(x) - U(`1`)));
366	}
367
368	template<typename X, typename Y>
369	static constexpr X blAlignUp(const X& x, const Y& alignment) noexcept {
370	typedef typename BLInternal::StdInt<sizeof(X), `1`>::Type U;
371	return (X)( ((U)x + ((U)(alignment) - `1u`)) & ~((U)(alignment) - `1u`) );
372	}
373
374	template<typename T>
375	static constexpr T blAlignUpPowerOf2(const T& x) noexcept {
376	typedef typename BLInternal::StdInt<sizeof(T), `1`>::Type U;
377	return (T)(blFillTrailingBits(U(x) - `1u`) + `1u`);
378	}
379
380	//! Returns zero or a positive difference between `base` and `base` aligned to `alignment`.
381	template<typename X, typename Y>
382	static constexpr X blAlignUpDiff(const X& base, const Y& alignment) noexcept {
383	typedef typename BLInternal::StdInt<sizeof(X), `1`>::Type U;
384	return blAlignUp(U(base), alignment) - U(base);
385	}
386
387	template<typename X, typename Y>
388	static constexpr X blAlignDown(const X& x, const Y& alignment) noexcept {
389	typedef typename BLInternal::StdInt<sizeof(X), `1`>::Type U;
390	return (X)( (U)x & ~((U)(alignment) - `1u`) );
391	}
392
393	// ============================================================================
394	// [Pointer Utilities]
395	// ============================================================================
396
397	template<typename T, typename Offset>
398	static constexpr T* blOffsetPtr(T* ptr, Offset offset) noexcept { return (T*)((uintptr_t)(ptr) + (uintptr_t)(intptr_t)offset); }
399
400	template<typename T, typename P, typename Offset>
401	static constexpr T* blOffsetPtr(P* ptr, Offset offset) noexcept { return (T*)((uintptr_t)(ptr) + (uintptr_t)(intptr_t)offset); }
402
403	// ============================================================================
404	// [ClampTo]
405	// ============================================================================
406
407	template<typename SrcT, typename DstT>
408	static constexpr DstT blClampToImpl(const SrcT& x, const DstT& y) noexcept {
409	typedef typename std::make_unsigned<SrcT>::type U;
410	return U(x) <= U(y) ? DstT(x) :
411	blIsUnsigned<SrcT>() ? DstT(y) : DstT(SrcT(y) & SrcT(blBitSar(blNegate(x), sizeof(SrcT) * `8` - `1`)));
412	}
413
414	//! Clamp a value `x` to a byte (unsigned 8-bit type).
415	template<typename T>
416	static constexpr uint8_t blClampToByte(const T& x) noexcept {
417	return blClampToImpl<T, uint8_t>(x, uint8_t(`0xFFu`));
418	}
419
420	//! Clamp a value `x` to a word (unsigned 16-bit type).
421	template<typename T>
422	static constexpr uint16_t blClampToWord(const T& x) noexcept {
423	return blClampToImpl<T, uint16_t>(x, uint16_t(`0xFFFFu`));
424	}
425
426	// ============================================================================
427	// [Arithmetic]
428	// ============================================================================
429
430	typedef unsigned char BLOverflowFlag;
431
432	namespace BLInternal {
433	template<typename T>
434	BL_INLINE T addOverflowFallback(T x, T y, BLOverflowFlag* of) noexcept {
435	typedef typename std::make_unsigned<T>::type U;
436
437	U result = U(x) + U(y);
438	*of \|= BLOverflowFlag(blIsUnsigned<T>() ? result < U(x) : T((U(x) ^ ~U(y)) & (U(x) ^ result)) < `0`);
439	return T(result);
440	}
441
442	template<typename T>
443	BL_INLINE T subOverflowFallback(T x, T y, BLOverflowFlag* of) noexcept {
444	typedef typename std::make_unsigned<T>::type U;
445
446	U result = U(x) - U(y);
447	*of \|= BLOverflowFlag(blIsUnsigned<T>() ? result > U(x) : T((U(x) ^ U(y)) & (U(x) ^ result)) < `0`);
448	return T(result);
449	}
450
451	template<typename T>
452	BL_INLINE T mulOverflowFallback(T x, T y, BLOverflowFlag* of) noexcept {
453	typedef typename BLInternal::StdInt<sizeof(T) * `2`, blIsUnsigned<T>()>::Type I;
454	typedef typename std::make_unsigned<I>::type U;
455
456	U mask = U(blMaxValue<typename std::make_unsigned<T>::type>());
457	if (std::is_signed<T>::value) {
458	U prod = U(I(x)) * U(I(y));
459	*of \|= BLOverflowFlag(I(prod) < I(blMinValue<T>()) \|\| I(prod) > I(blMaxValue<T>()));
460	return T(I(prod & mask));
461	}
462	else {
463	U prod = U(x) * U(y);
464	*of \|= BLOverflowFlag((prod & ~mask) != `0`);
465	return T(prod & mask);
466	}
467	}
468
469	template<>
470	BL_INLINE int64_t mulOverflowFallback(int64_t x, int64_t y, BLOverflowFlag* of) noexcept {
471	int64_t result = int64_t(uint64_t(x) * uint64_t(y));
472	*of \|= BLOverflowFlag(x && (result / x != y));
473	return result;
474	}
475
476	template<>
477	BL_INLINE uint64_t mulOverflowFallback(uint64_t x, uint64_t y, BLOverflowFlag* of) noexcept {
478	uint64_t result = x * y;
479	*of \|= BLOverflowFlag(y != `0` && blMaxValue<uint64_t>() / y < x);
480	return result;
481	}
482
483	// These can be specialized.
484	template<typename T> BL_INLINE T addOverflowImpl(const T& x, const T& y, BLOverflowFlag* of) noexcept { return addOverflowFallback(x, y, of); }
485	template<typename T> BL_INLINE T subOverflowImpl(const T& x, const T& y, BLOverflowFlag* of) noexcept { return subOverflowFallback(x, y, of); }
486	template<typename T> BL_INLINE T mulOverflowImpl(const T& x, const T& y, BLOverflowFlag* of) noexcept { return mulOverflowFallback(x, y, of); }
487
488	#if defined(__GNUC__) && !defined(BL_BUILD_NO_INTRINSICS)
489	#if defined(__clang__) \|\| __GNUC__ >= 5
490	#define BL_ARITH_OVERFLOW_SPECIALIZE(FUNC, T, RESULT_T, BUILTIN) \
491	template<> \
492	BL_INLINE T FUNC(const T& x, const T& y, BLOverflowFlag* of) noexcept { \
493	RESULT_T result; \
494	*of \|= BLOverflowFlag(BUILTIN((RESULT_T)x, (RESULT_T)y, &result)); \
495	return T(result); \
496	}
497	BL_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, int32_t , int , __builtin_sadd_overflow )
498	BL_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint32_t, unsigned int , __builtin_uadd_overflow )
499	BL_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, int64_t , long long , __builtin_saddll_overflow)
500	BL_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint64_t, unsigned long long, __builtin_uaddll_overflow)
501	BL_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, int32_t , int , __builtin_ssub_overflow )
502	BL_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint32_t, unsigned int , __builtin_usub_overflow )
503	BL_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, int64_t , long long , __builtin_ssubll_overflow)
504	BL_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint64_t, unsigned long long, __builtin_usubll_overflow)
505	BL_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, int32_t , int , __builtin_smul_overflow )
506	BL_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, uint32_t, unsigned int , __builtin_umul_overflow )
507	BL_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, int64_t , long long , __builtin_smulll_overflow)
508	BL_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, uint64_t, unsigned long long, __builtin_umulll_overflow)
509	#undef BL_ARITH_OVERFLOW_SPECIALIZE
510	#endif
511	#endif
512
513	// There is a bug in MSVC that makes these specializations unusable, maybe in the future...
514	#if defined(_MSC_VER) && 0
515	#define BL_ARITH_OVERFLOW_SPECIALIZE(FUNC, T, ALT_T, BUILTIN) \
516	template<> \
517	BL_INLINE T FUNC(T x, T y, BLOverflowFlag* of) noexcept { \
518	ALT_T result; \
519	*of \|= BLOverflowFlag(BUILTIN(0, (ALT_T)x, (ALT_T)y, &result)); \
520	return T(result); \
521	}
522	BL_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint32_t, unsigned int , _addcarry_u32 )
523	BL_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint32_t, unsigned int , _subborrow_u32)
524	#if ARCH_BITS >= 64
525	BL_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint64_t, unsigned __int64 , _addcarry_u64 )
526	BL_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint64_t, unsigned __int64 , _subborrow_u64)
527	#endif
528	#undef BL_ARITH_OVERFLOW_SPECIALIZE
529	#endif
530	} // {BLInternal}
531
532	template<typename T>
533	static BL_INLINE T blAddOverflow(const T& x, const T& y, BLOverflowFlag* of) noexcept { return T(BLInternal::addOverflowImpl(blAsStdInt(x), blAsStdInt(y), of)); }
534
535	template<typename T>
536	static BL_INLINE T blSubOverflow(const T& x, const T& y, BLOverflowFlag* of) noexcept { return T(BLInternal::subOverflowImpl(blAsStdInt(x), blAsStdInt(y), of)); }
537
538	template<typename T>
539	static BL_INLINE T blMulOverflow(const T& x, const T& y, BLOverflowFlag* of) noexcept { return T(BLInternal::mulOverflowImpl(blAsStdInt(x), blAsStdInt(y), of)); }
540
541	template<typename T>
542	static BL_INLINE T blUAddSaturate(const T& x, const T& y) noexcept {
543	BLOverflowFlag of = `0`;
544	T result = blAddOverflow(x, y, &of);
545	return T(result \| blBitMaskFromBool<T>(of));
546	}
547
548	template<typename T>
549	static BL_INLINE T blUSubSaturate(const T& x, const T& y) noexcept {
550	BLOverflowFlag of = `0`;
551	T result = blSubOverflow(x, y, &of);
552	return T(result & blBitMaskFromBool<T>(!of));
553	}
554
555	template<typename T>
556	static BL_INLINE T blUMulSaturate(const T& x, const T& y) noexcept {
557	BLOverflowFlag of = `0`;
558	T result = blMulOverflow(x, y, &of);
559	return T(result \| blBitMaskFromBool<T>(of));
560	}
561
562	// ============================================================================
563	// [Udiv255]
564	// ============================================================================
565
566	//! Integer division by 255, compatible with `(x + (x >> 8)) >> 8` used by SIMD.
567	static constexpr uint32_t blUdiv255(uint32_t x) noexcept { return ((x + `128`) * `257`) >> `16`; }
568
569	// ============================================================================
570	// [blMemRead]
571	// ============================================================================
572
573	namespace {
574
575	BL_INLINE uint32_t blMemReadU8(const void* p) noexcept { return uint32_t(static_cast<const uint8_t*>(p)[`0`]); }
576	BL_INLINE int32_t blMemReadI8(const void* p) noexcept { return int32_t(static_cast<const int8_t*>(p)[`0`]); }
577
578	template<uint32_t ByteOrder, size_t Alignment>
579	BL_INLINE uint32_t blMemReadU16(const void* p) noexcept {
580	if (ByteOrder == BL_BYTE_ORDER_NATIVE && (BL_UNALIGNED_IO_16 \|\| Alignment >= `2`)) {
581	typedef typename BLMisalignedUInt<uint16_t, Alignment>::T U16AlignedToN;
582	return uint32_t(static_cast<const U16AlignedToN*>(p)[`0`]);
583	}
584	else {
585	uint32_t hi = blMemReadU8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? `1` : `0`));
586	uint32_t lo = blMemReadU8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? `0` : `1`));
587	return blBitShl(hi, `8`) \| lo;
588	}
589	}
590
591	template<uint32_t ByteOrder, size_t Alignment>
592	BL_INLINE int32_t blMemReadI16(const void* p) noexcept {
593	if (ByteOrder == BL_BYTE_ORDER_NATIVE && (BL_UNALIGNED_IO_16 \|\| Alignment >= `2`)) {
594	typedef typename BLMisalignedUInt<uint16_t, Alignment>::T U16AlignedToN;
595	return int32_t(int16_t(static_cast<const U16AlignedToN*>(p)[`0`]));
596	}
597	else {
598	int32_t hi = int32_t(blMemReadI8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? `1` : `0`)));
599	int32_t lo = int32_t(blMemReadU8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? `0` : `1`)));
600	return blBitShl(hi, `8`) \| lo;
601	}
602	}
603
604	template<uint32_t ByteOrder = BL_BYTE_ORDER_NATIVE>
605	BL_INLINE uint32_t blMemReadU24u(const void* p) noexcept {
606	uint32_t b0 = blMemReadU8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? `2` : `0`));
607	uint32_t b1 = blMemReadU8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? `1` : `1`));
608	uint32_t b2 = blMemReadU8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? `0` : `2`));
609	return blBitShl(b0, `16`) \| blBitShl(b1, `8`) \| b2;
610	}
611
612	template<uint32_t ByteOrder, size_t Alignment>
613	BL_INLINE uint32_t blMemReadU32(const void* p) noexcept {
614	if (BL_UNALIGNED_IO_32 \|\| Alignment >= `4`) {
615	typedef typename BLMisalignedUInt<uint32_t, Alignment>::T U32AlignedToN;
616	uint32_t x = static_cast<const U32AlignedToN*>(p)[`0`];
617	return ByteOrder == BL_BYTE_ORDER_NATIVE ? x : blByteSwap32(x);
618	}
619	else {
620	uint32_t hi = blMemReadU16<ByteOrder, Alignment >= `2` ? size_t(`2`) : Alignment>(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? `2` : `0`));
621	uint32_t lo = blMemReadU16<ByteOrder, Alignment >= `2` ? size_t(`2`) : Alignment>(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? `0` : `2`));
622	return blBitShl(hi, `16`) \| lo;
623	}
624	}
625
626	template<uint32_t ByteOrder, size_t Alignment>
627	BL_INLINE uint64_t blMemReadU64(const void* p) noexcept {
628	if (ByteOrder == BL_BYTE_ORDER_NATIVE && (BL_UNALIGNED_IO_64 \|\| Alignment >= `8`)) {
629	typedef typename BLMisalignedUInt<uint64_t, Alignment>::T U64AlignedToN;
630	return static_cast<const U64AlignedToN*>(p)[`0`];
631	}
632	else {
633	uint32_t hi = blMemReadU32<ByteOrder, Alignment >= `4` ? size_t(`4`) : Alignment>(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? `4` : `0`));
634	uint32_t lo = blMemReadU32<ByteOrder, Alignment >= `4` ? size_t(`4`) : Alignment>(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? `0` : `4`));
635	return blBitShl(uint64_t(hi), `32`) \| lo;
636	}
637	}
638
639	template<uint32_t ByteOrder, size_t Alignment>
640	BL_INLINE int32_t blMemReadI32(const void* p) noexcept { return int32_t(blMemReadU32<ByteOrder, Alignment>(p)); }
641
642	template<uint32_t ByteOrder, size_t Alignment>
643	BL_INLINE int64_t blMemReadI64(const void* p) noexcept { return int64_t(blMemReadU64<ByteOrder, Alignment>(p)); }
644
645	BL_INLINE int32_t blMemReadI16a(const void* p) noexcept { return blMemReadI16<BL_BYTE_ORDER_NATIVE, `2`>(p); }
646	BL_INLINE int32_t blMemReadI16u(const void* p) noexcept { return blMemReadI16<BL_BYTE_ORDER_NATIVE, `1`>(p); }
647	BL_INLINE uint32_t blMemReadU16a(const void* p) noexcept { return blMemReadU16<BL_BYTE_ORDER_NATIVE, `2`>(p); }
648	BL_INLINE uint32_t blMemReadU16u(const void* p) noexcept { return blMemReadU16<BL_BYTE_ORDER_NATIVE, `1`>(p); }
649
650	BL_INLINE int32_t blMemReadI16aLE(const void* p) noexcept { return blMemReadI16<BL_BYTE_ORDER_LE, `2`>(p); }
651	BL_INLINE int32_t blMemReadI16uLE(const void* p) noexcept { return blMemReadI16<BL_BYTE_ORDER_LE, `1`>(p); }
652	BL_INLINE uint32_t blMemReadU16aLE(const void* p) noexcept { return blMemReadU16<BL_BYTE_ORDER_LE, `2`>(p); }
653	BL_INLINE uint32_t blMemReadU16uLE(const void* p) noexcept { return blMemReadU16<BL_BYTE_ORDER_LE, `1`>(p); }
654
655	BL_INLINE int32_t blMemReadI16aBE(const void* p) noexcept { return blMemReadI16<BL_BYTE_ORDER_BE, `2`>(p); }
656	BL_INLINE int32_t blMemReadI16uBE(const void* p) noexcept { return blMemReadI16<BL_BYTE_ORDER_BE, `1`>(p); }
657	BL_INLINE uint32_t blMemReadU16aBE(const void* p) noexcept { return blMemReadU16<BL_BYTE_ORDER_BE, `2`>(p); }
658	BL_INLINE uint32_t blMemReadU16uBE(const void* p) noexcept { return blMemReadU16<BL_BYTE_ORDER_BE, `1`>(p); }
659
660	BL_INLINE uint32_t blMemReadU24uLE(const void* p) noexcept { return blMemReadU24u<BL_BYTE_ORDER_LE>(p); }
661	BL_INLINE uint32_t blMemReadU24uBE(const void* p) noexcept { return blMemReadU24u<BL_BYTE_ORDER_BE>(p); }
662
663	BL_INLINE int32_t blMemReadI32a(const void* p) noexcept { return blMemReadI32<BL_BYTE_ORDER_NATIVE, `4`>(p); }
664	BL_INLINE int32_t blMemReadI32u(const void* p) noexcept { return blMemReadI32<BL_BYTE_ORDER_NATIVE, `1`>(p); }
665	BL_INLINE uint32_t blMemReadU32a(const void* p) noexcept { return blMemReadU32<BL_BYTE_ORDER_NATIVE, `4`>(p); }
666	BL_INLINE uint32_t blMemReadU32u(const void* p) noexcept { return blMemReadU32<BL_BYTE_ORDER_NATIVE, `1`>(p); }
667
668	BL_INLINE int32_t blMemReadI32aLE(const void* p) noexcept { return blMemReadI32<BL_BYTE_ORDER_LE, `4`>(p); }
669	BL_INLINE int32_t blMemReadI32uLE(const void* p) noexcept { return blMemReadI32<BL_BYTE_ORDER_LE, `1`>(p); }
670	BL_INLINE uint32_t blMemReadU32aLE(const void* p) noexcept { return blMemReadU32<BL_BYTE_ORDER_LE, `4`>(p); }
671	BL_INLINE uint32_t blMemReadU32uLE(const void* p) noexcept { return blMemReadU32<BL_BYTE_ORDER_LE, `1`>(p); }
672
673	BL_INLINE int32_t blMemReadI32aBE(const void* p) noexcept { return blMemReadI32<BL_BYTE_ORDER_BE, `4`>(p); }
674	BL_INLINE int32_t blMemReadI32uBE(const void* p) noexcept { return blMemReadI32<BL_BYTE_ORDER_BE, `1`>(p); }
675	BL_INLINE uint32_t blMemReadU32aBE(const void* p) noexcept { return blMemReadU32<BL_BYTE_ORDER_BE, `4`>(p); }
676	BL_INLINE uint32_t blMemReadU32uBE(const void* p) noexcept { return blMemReadU32<BL_BYTE_ORDER_BE, `1`>(p); }
677
678	BL_INLINE int64_t blMemReadI64a(const void* p) noexcept { return blMemReadI64<BL_BYTE_ORDER_NATIVE, `8`>(p); }
679	BL_INLINE int64_t blMemReadI64u(const void* p) noexcept { return blMemReadI64<BL_BYTE_ORDER_NATIVE, `1`>(p); }
680	BL_INLINE uint64_t blMemReadU64a(const void* p) noexcept { return blMemReadU64<BL_BYTE_ORDER_NATIVE, `8`>(p); }
681	BL_INLINE uint64_t blMemReadU64u(const void* p) noexcept { return blMemReadU64<BL_BYTE_ORDER_NATIVE, `1`>(p); }
682
683	BL_INLINE int64_t blMemReadI64aLE(const void* p) noexcept { return blMemReadI64<BL_BYTE_ORDER_LE, `8`>(p); }
684	BL_INLINE int64_t blMemReadI64uLE(const void* p) noexcept { return blMemReadI64<BL_BYTE_ORDER_LE, `1`>(p); }
685	BL_INLINE uint64_t blMemReadU64aLE(const void* p) noexcept { return blMemReadU64<BL_BYTE_ORDER_LE, `8`>(p); }
686	BL_INLINE uint64_t blMemReadU64uLE(const void* p) noexcept { return blMemReadU64<BL_BYTE_ORDER_LE, `1`>(p); }
687
688	BL_INLINE int64_t blMemReadI64aBE(const void* p) noexcept { return blMemReadI64<BL_BYTE_ORDER_BE, `8`>(p); }
689	BL_INLINE int64_t blMemReadI64uBE(const void* p) noexcept { return blMemReadI64<BL_BYTE_ORDER_BE, `1`>(p); }
690	BL_INLINE uint64_t blMemReadU64aBE(const void* p) noexcept { return blMemReadU64<BL_BYTE_ORDER_BE, `8`>(p); }
691	BL_INLINE uint64_t blMemReadU64uBE(const void* p) noexcept { return blMemReadU64<BL_BYTE_ORDER_BE, `1`>(p); }
692
693	} // {anonymous}
694
695	// ============================================================================
696	// [blMemWrite]
697	// ============================================================================
698
699	namespace {
700
701	BL_INLINE void blMemWriteU8(void* p, uint32_t x) noexcept { static_cast<uint8_t*>(p)[`0`] = uint8_t(x & `0xFFu`); }
702	BL_INLINE void blMemWriteI8(void* p, int32_t x) noexcept { static_cast<uint8_t*>(p)[`0`] = uint8_t(x & `0xFF`); }
703
704	template<uint32_t ByteOrder, size_t Alignment>
705	BL_INLINE void blMemWriteU16(void* p, uint32_t x) noexcept {
706	if (ByteOrder == BL_BYTE_ORDER_NATIVE && (BL_UNALIGNED_IO_16 \|\| Alignment >= `2`)) {
707	typedef typename BLMisalignedUInt<uint16_t, Alignment>::T U16AlignedToN;
708	static_cast<U16AlignedToN*>(p)[`0`] = uint16_t(x & `0xFFFFu`);
709	}
710	else {
711	static_cast<uint8_t*>(p)[`0`] = uint8_t((x >> (ByteOrder == BL_BYTE_ORDER_LE ? `0` : `8`)) & `0xFFu`);
712	static_cast<uint8_t*>(p)[`1`] = uint8_t((x >> (ByteOrder == BL_BYTE_ORDER_LE ? `8` : `0`)) & `0xFFu`);
713	}
714	}
715
716	template<uint32_t ByteOrder = BL_BYTE_ORDER_NATIVE>
717	BL_INLINE void blMemWriteU24u(void* p, uint32_t v) noexcept {
718	static_cast<uint8_t*>(p)[`0`] = uint8_t((v >> (ByteOrder == BL_BYTE_ORDER_LE ? `0` : `16`)) & `0xFFu`);
719	static_cast<uint8_t*>(p)[`1`] = uint8_t((v >> (ByteOrder == BL_BYTE_ORDER_LE ? `8` : `8`)) & `0xFFu`);
720	static_cast<uint8_t*>(p)[`2`] = uint8_t((v >> (ByteOrder == BL_BYTE_ORDER_LE ? `16` : `0`)) & `0xFFu`);
721	}
722
723	template<uint32_t ByteOrder, size_t Alignment>
724	BL_INLINE void blMemWriteU32(void* p, uint32_t x) noexcept {
725	if (BL_UNALIGNED_IO_32 \|\| Alignment >= `4`) {
726	typedef typename BLMisalignedUInt<uint32_t, Alignment>::T U32AlignedToN;
727	static_cast<U32AlignedToN*>(p)[`0`] = (ByteOrder == BL_BYTE_ORDER_NATIVE) ? x : blByteSwap32(x);
728	}
729	else {
730	blMemWriteU16<ByteOrder, Alignment >= `2` ? size_t(`2`) : Alignment>(static_cast<uint8_t*>(p) + `0`, x >> (ByteOrder == BL_BYTE_ORDER_LE ? `0` : `16`));
731	blMemWriteU16<ByteOrder, Alignment >= `2` ? size_t(`2`) : Alignment>(static_cast<uint8_t*>(p) + `2`, x >> (ByteOrder == BL_BYTE_ORDER_LE ? `16` : `0`));
732	}
733	}
734
735	template<uint32_t ByteOrder, size_t Alignment>
736	BL_INLINE void blMemWriteU64(void* p, uint64_t x) noexcept {
737	if (ByteOrder == BL_BYTE_ORDER_NATIVE && (BL_UNALIGNED_IO_64 \|\| Alignment >= `8`)) {
738	typedef typename BLMisalignedUInt<uint64_t, Alignment>::T U64AlignedToN;
739	static_cast<U64AlignedToN*>(p)[`0`] = x;
740	}
741	else {
742	blMemWriteU32<ByteOrder, Alignment >= `4` ? size_t(`4`) : Alignment>(static_cast<uint8_t*>(p) + `0`, uint32_t((x >> (ByteOrder == BL_BYTE_ORDER_LE ? `0` : `32`)) & `0xFFFFFFFFu`));
743	blMemWriteU32<ByteOrder, Alignment >= `4` ? size_t(`4`) : Alignment>(static_cast<uint8_t*>(p) + `4`, uint32_t((x >> (ByteOrder == BL_BYTE_ORDER_LE ? `32` : `0`)) & `0xFFFFFFFFu`));
744	}
745	}
746
747	template<uint32_t ByteOrder, size_t Alignment> BL_INLINE void blMemWriteI16(void* p, int32_t x) noexcept { blMemWriteU16<ByteOrder, Alignment>(p, uint32_t(x)); }
748	template<uint32_t ByteOrder, size_t Alignment> BL_INLINE void blMemWriteI32(void* p, int32_t x) noexcept { blMemWriteU32<ByteOrder, Alignment>(p, uint32_t(x)); }
749	template<uint32_t ByteOrder, size_t Alignment> BL_INLINE void blMemWriteI64(void* p, int64_t x) noexcept { blMemWriteU64<ByteOrder, Alignment>(p, uint64_t(x)); }
750
751	BL_INLINE void blMemWriteI16a(void* p, int32_t x) noexcept { blMemWriteI16<BL_BYTE_ORDER_NATIVE, `2`>(p, x); }
752	BL_INLINE void blMemWriteI16u(void* p, int32_t x) noexcept { blMemWriteI16<BL_BYTE_ORDER_NATIVE, `1`>(p, x); }
753	BL_INLINE void blMemWriteU16a(void* p, uint32_t x) noexcept { blMemWriteU16<BL_BYTE_ORDER_NATIVE, `2`>(p, x); }
754	BL_INLINE void blMemWriteU16u(void* p, uint32_t x) noexcept { blMemWriteU16<BL_BYTE_ORDER_NATIVE, `1`>(p, x); }
755
756	BL_INLINE void blMemWriteI16aLE(void* p, int32_t x) noexcept { blMemWriteI16<BL_BYTE_ORDER_LE, `2`>(p, x); }
757	BL_INLINE void blMemWriteI16uLE(void* p, int32_t x) noexcept { blMemWriteI16<BL_BYTE_ORDER_LE, `1`>(p, x); }
758	BL_INLINE void blMemWriteU16aLE(void* p, uint32_t x) noexcept { blMemWriteU16<BL_BYTE_ORDER_LE, `2`>(p, x); }
759	BL_INLINE void blMemWriteU16uLE(void* p, uint32_t x) noexcept { blMemWriteU16<BL_BYTE_ORDER_LE, `1`>(p, x); }
760
761	BL_INLINE void blMemWriteI16aBE(void* p, int32_t x) noexcept { blMemWriteI16<BL_BYTE_ORDER_BE, `2`>(p, x); }
762	BL_INLINE void blMemWriteI16uBE(void* p, int32_t x) noexcept { blMemWriteI16<BL_BYTE_ORDER_BE, `1`>(p, x); }
763	BL_INLINE void blMemWriteU16aBE(void* p, uint32_t x) noexcept { blMemWriteU16<BL_BYTE_ORDER_BE, `2`>(p, x); }
764	BL_INLINE void blMemWriteU16uBE(void* p, uint32_t x) noexcept { blMemWriteU16<BL_BYTE_ORDER_BE, `1`>(p, x); }
765
766	BL_INLINE void blMemWriteU24uLE(void* p, uint32_t v) noexcept { blMemWriteU24u<BL_BYTE_ORDER_LE>(p, v); }
767	BL_INLINE void blMemWriteU24uBE(void* p, uint32_t v) noexcept { blMemWriteU24u<BL_BYTE_ORDER_BE>(p, v); }
768
769	BL_INLINE void blMemWriteI32a(void* p, int32_t x) noexcept { blMemWriteI32<BL_BYTE_ORDER_NATIVE, `4`>(p, x); }
770	BL_INLINE void blMemWriteI32u(void* p, int32_t x) noexcept { blMemWriteI32<BL_BYTE_ORDER_NATIVE, `1`>(p, x); }
771	BL_INLINE void blMemWriteU32a(void* p, uint32_t x) noexcept { blMemWriteU32<BL_BYTE_ORDER_NATIVE, `4`>(p, x); }
772	BL_INLINE void blMemWriteU32u(void* p, uint32_t x) noexcept { blMemWriteU32<BL_BYTE_ORDER_NATIVE, `1`>(p, x); }
773
774	BL_INLINE void blMemWriteI32aLE(void* p, int32_t x) noexcept { blMemWriteI32<BL_BYTE_ORDER_LE, `4`>(p, x); }
775	BL_INLINE void blMemWriteI32uLE(void* p, int32_t x) noexcept { blMemWriteI32<BL_BYTE_ORDER_LE, `1`>(p, x); }
776	BL_INLINE void blMemWriteU32aLE(void* p, uint32_t x) noexcept { blMemWriteU32<BL_BYTE_ORDER_LE, `4`>(p, x); }
777	BL_INLINE void blMemWriteU32uLE(void* p, uint32_t x) noexcept { blMemWriteU32<BL_BYTE_ORDER_LE, `1`>(p, x); }
778
779	BL_INLINE void blMemWriteI32aBE(void* p, int32_t x) noexcept { blMemWriteI32<BL_BYTE_ORDER_BE, `4`>(p, x); }
780	BL_INLINE void blMemWriteI32uBE(void* p, int32_t x) noexcept { blMemWriteI32<BL_BYTE_ORDER_BE, `1`>(p, x); }
781	BL_INLINE void blMemWriteU32aBE(void* p, uint32_t x) noexcept { blMemWriteU32<BL_BYTE_ORDER_BE, `4`>(p, x); }
782	BL_INLINE void blMemWriteU32uBE(void* p, uint32_t x) noexcept { blMemWriteU32<BL_BYTE_ORDER_BE, `1`>(p, x); }
783
784	BL_INLINE void blMemWriteI64a(void* p, int64_t x) noexcept { blMemWriteI64<BL_BYTE_ORDER_NATIVE, `8`>(p, x); }
785	BL_INLINE void blMemWriteI64u(void* p, int64_t x) noexcept { blMemWriteI64<BL_BYTE_ORDER_NATIVE, `1`>(p, x); }
786	BL_INLINE void blMemWriteU64a(void* p, uint64_t x) noexcept { blMemWriteU64<BL_BYTE_ORDER_NATIVE, `8`>(p, x); }
787	BL_INLINE void blMemWriteU64u(void* p, uint64_t x) noexcept { blMemWriteU64<BL_BYTE_ORDER_NATIVE, `1`>(p, x); }
788
789	BL_INLINE void blMemWriteI64aLE(void* p, int64_t x) noexcept { blMemWriteI64<BL_BYTE_ORDER_LE, `8`>(p, x); }
790	BL_INLINE void blMemWriteI64uLE(void* p, int64_t x) noexcept { blMemWriteI64<BL_BYTE_ORDER_LE, `1`>(p, x); }
791	BL_INLINE void blMemWriteU64aLE(void* p, uint64_t x) noexcept { blMemWriteU64<BL_BYTE_ORDER_LE, `8`>(p, x); }
792	BL_INLINE void blMemWriteU64uLE(void* p, uint64_t x) noexcept { blMemWriteU64<BL_BYTE_ORDER_LE, `1`>(p, x); }
793
794	BL_INLINE void blMemWriteI64aBE(void* p, int64_t x) noexcept { blMemWriteI64<BL_BYTE_ORDER_BE, `8`>(p, x); }
795	BL_INLINE void blMemWriteI64uBE(void* p, int64_t x) noexcept { blMemWriteI64<BL_BYTE_ORDER_BE, `1`>(p, x); }
796	BL_INLINE void blMemWriteU64aBE(void* p, uint64_t x) noexcept { blMemWriteU64<BL_BYTE_ORDER_BE, `8`>(p, x); }
797	BL_INLINE void blMemWriteU64uBE(void* p, uint64_t x) noexcept { blMemWriteU64<BL_BYTE_ORDER_BE, `1`>(p, x); }
798
799	} // {anonymous}
800
801	// ============================================================================
802	// [blMemCopySmall]
803	// ============================================================================
804
805	namespace {
806
807	template<typename T>
808	static BL_INLINE void blMemCopyInlineT(T* dst, const T* src, size_t count) noexcept {
809	for (size_t i = `0`; i < count; i++)
810	dst[i] = src[i];
811	}
812
813	//! Copies `n` bytes from `src` to `dst` optimized for small buffers.
814	static BL_INLINE void blMemCopyInline(void* dst, const void* src, size_t n) noexcept {
815	#if defined(__GNUC__) && BL_TARGET_ARCH_X86
816	size_t unused;
817	__asm__ __volatile__(
818	"rep movsb" : "=&D"(dst), "=&S"(src), "=&c"(unused)
819	: "0"(dst), "1"(src), "2"(n)
820	: "memory"
821	);
822	#elif defined(_MSC_VER) && BL_TARGET_ARCH_X86
823	__movsb(static_cast<unsigned char >(dst), static_cast<const* unsigned char *>(src), n);
824	#else
825	blMemCopyInlineT<uint8_t>(static_cast<uint8_t>(dst), static_cast<const* uint8_t*>(src), n);
826	#endif
827	}
828
829	} // {anonymous}
830
831	// ============================================================================
832	// [BLWrap<T>]
833	// ============================================================================
834
835	//! Wrapper to control construction & destruction of `T`.
836	template<typename T>
837	struct alignas(alignof(T)) BLWrap {
838	//! Storage required to instantiate `T`.
839	char _data[sizeof(T)];
840
841	//! \name Init / Destroy
842	//! \{
843
844	//! Placement new constructor.
845	BL_INLINE T* init() noexcept {
846	#if defined(_MSC_VER) && !defined(__clang__)
847	BL_ASSUME(_data != nullptr);
848	#endif
849	return new(static_cast<void*>(_data)) T;
850	}
851
852	//! Placement new constructor with arguments.
853	template<typename... Args>
854	BL_INLINE T* init(Args&&... args) noexcept {
855	#if defined(_MSC_VER) && !defined(__clang__)
856	BL_ASSUME(_data != nullptr);
857	#endif
858	return new(static_cast<void*>(_data)) T(std::forward<Args>(args)...);
859	}
860
861	//! Placement delete destructor.
862	BL_INLINE void destroy() noexcept {
863	#if defined(_MSC_VER) && !defined(__clang__)
864	BL_ASSUME(_data != nullptr);
865	#endif
866	static_cast<T>(static_cast<void**>(_data))->~T();
867	}
868
869	//! \}
870
871	//! \name Accessors
872	//! \{
873
874	BL_INLINE T* p() noexcept { return static_cast<T>(static_cast<void**>(_data)); }
875	BL_INLINE const T* p() const noexcept { return static_cast<const T>(static_cast<const* void*>(_data)); }
876
877	BL_INLINE operator T&() noexcept { return *p(); }
878	BL_INLINE operator const T&() const noexcept { return *p(); }
879
880	BL_INLINE T& operator()() noexcept { return *p(); }
881	BL_INLINE const T& operator()() const noexcept { return *p(); }
882
883	BL_INLINE T* operator&() noexcept { return p(); }
884	BL_INLINE const T* operator&() const noexcept { return p(); }
885
886	BL_INLINE T* operator->() noexcept { return p(); }
887	BL_INLINE T const* operator->() const noexcept { return p(); }
888
889	//! \}
890	};
891
892	// ============================================================================
893	// [BLScopedAllocator]
894	// ============================================================================
895
896	//! A simple allocator that can be used to remember allocated memory so it can
897	//! be then freed in one go. Typically used in areas where some heap allocation
898	//! is required and at the end of the work it will all be freed.
899	class BLScopedAllocator {
900	public:
901	BL_NONCOPYABLE(BLScopedAllocator)
902	struct Link { Link* next; };
903
904	Link* links;
905	uint8_t* poolPtr;
906	uint8_t* poolMem;
907	uint8_t* poolEnd;
908
909	BL_INLINE BLScopedAllocator() noexcept
910	: links(nullptr),
911	poolPtr(nullptr),
912	poolMem(nullptr),
913	poolEnd(nullptr) {}
914
915	BL_INLINE BLScopedAllocator(void* poolMem, size_t poolSize) noexcept
916	: links(nullptr),
917	poolPtr(static_cast<uint8_t*>(poolMem)),
918	poolMem(static_cast<uint8_t*>(poolMem)),
919	poolEnd(static_cast<uint8_t*>(poolMem) + poolSize) {}
920
921	BL_INLINE ~BLScopedAllocator() noexcept { reset(); }
922
923	void* alloc(size_t size, size_t alignment = `1`) noexcept;
924	void reset() noexcept;
925	};
926
927	// ============================================================================
928	// [BLMemBuffer]
929	// ============================================================================
930
931	//! Memory buffer.
932	//!
933	//! Memory buffer is a helper class which holds pointer to an allocated memory
934	//! block, which will be released automatically by `BLMemBuffer` destructor or by
935	//! `reset()` call.
936	class BLMemBuffer {
937	public:
938	BL_NONCOPYABLE(BLMemBuffer)
939
940	void* _mem;
941	void* _buf;
942	size_t _capacity;
943
944	BL_INLINE BLMemBuffer() noexcept
945	: _mem(nullptr),
946	_buf(nullptr),
947	_capacity(`0`) {}
948
949	BL_INLINE ~BLMemBuffer() noexcept {
950	_reset();
951	}
952
953	protected:
954	BL_INLINE BLMemBuffer(void* mem, void* buf, size_t capacity) noexcept
955	: _mem(mem),
956	_buf(buf),
957	_capacity(capacity) {}
958
959	public:
960	BL_INLINE void* get() const noexcept { return _mem; }
961	BL_INLINE size_t capacity() const noexcept { return _capacity; }
962
963	BL_INLINE void* alloc(size_t size) noexcept {
964	if (size <= _capacity)
965	return _mem;
966
967	if (_mem != _buf)
968	free(_mem);
969
970	_mem = malloc(size);
971	_capacity = size;
972
973	return _mem;
974	}
975
976	BL_INLINE void _reset() noexcept {
977	if (_mem != _buf)
978	free(_mem);
979	}
980
981	BL_INLINE void reset() noexcept {
982	_reset();
983
984	_mem = nullptr;
985	_capacity = `0`;
986	}
987	};
988
989	// ============================================================================
990	// [BLMemBufferTmp<>]
991	// ============================================================================
992
993	//! Memory buffer (temporary).
994	//!
995	//! This template is for fast routines that need to use memory allocated on
996	//! the stack, but the memory requirement is not known at compile time. The
997	//! number of bytes allocated on the stack is described by `N` parameter.
998	template<size_t N>
999	class BLMemBufferTmp : public BLMemBuffer {
1000	public:
1001	BL_NONCOPYABLE(BLMemBufferTmp<N>)
1002
1003	uint8_t _storage[N];
1004
1005	BL_INLINE BLMemBufferTmp() noexcept
1006	: BLMemBuffer(_storage, _storage, N) {}
1007
1008	BL_INLINE ~BLMemBufferTmp() noexcept {}
1009
1010	using BLMemBuffer::alloc;
1011
1012	BL_INLINE void reset() noexcept {
1013	_reset();
1014	_mem = _buf;
1015	_capacity = N;
1016	}
1017	};
1018
1019	//! \}
1020	//! \endcond
1021
1022	#endif // BLEND2D_BLSUPPORT_P_H
1023

Browse the source code of Blend2d/src/blend2d/blsupport_p.h