1// [Blend2D]
2// 2D Vector Graphics Powered by a JIT Compiler.
3//
4// [License]
5// Zlib - See LICENSE.md file in the package.
6
7#ifndef BLEND2D_BLSUPPORT_P_H
8#define BLEND2D_BLSUPPORT_P_H
9
10#include "./blapi-internal_p.h"
11
12//! \cond INTERNAL
13//! \addtogroup blend2d_internal
14//! \{
15
16// ============================================================================
17// [StdInt]
18// ============================================================================
19
20template<typename T>
21static constexpr bool blIsUnsigned() noexcept { return std::is_unsigned<T>::value; }
22
23//! Cast an integer `x` to a fixed-width type as defined by <stdint.h>
24//!
25//! This can help when specializing some functions for a particular type. Since
26//! some C/C++ types may overlap (like `long` vs `long long`) it's easier to
27//! just cast to a type as defined by <stdint.h> and specialize for it.
28template<typename T>
29static constexpr typename BLInternal::StdInt<sizeof(T), blIsUnsigned<T>()>::Type blAsStdInt(T x) noexcept {
30 return (typename BLInternal::StdInt<sizeof(T), blIsUnsigned<T>()>::Type)x;
31}
32
33//! Cast an integer `x` to a fixed-width unsigned type as defined by <stdint.h>
34template<typename T>
35static constexpr typename BLInternal::StdInt<sizeof(T), 1>::Type blAsStdUInt(T x) noexcept {
36 return (typename BLInternal::StdInt<sizeof(T), 1>::Type)x;
37}
38
39//! Cast an integer `x` to either `int32_t`, uint32_t`, `int64_t`, or `uint64_t`.
40//!
41//! Used to keep a signedness of `T`, but to promote it to at least 32-bit type.
42template<typename T>
43static constexpr typename BLInternal::StdInt<blMax<size_t>(sizeof(T), 4), blIsUnsigned<T>()>::Type blAsIntLeast32(T x) noexcept {
44 typedef typename BLInternal::StdInt<blMax<size_t>(sizeof(T), 4), blIsUnsigned<T>()>::Type Result;
45 return Result(x);
46}
47
48//! Cast an integer `x` to either `uint32_t` or `uint64_t`.
49template<typename T>
50static constexpr typename BLInternal::StdInt<blMax<size_t>(sizeof(T), 4), 1>::Type blAsUIntLeast32(T x) noexcept {
51 typedef typename BLInternal::StdInt<blMax<size_t>(sizeof(T), 4), 1>::Type Result;
52 typedef typename std::make_unsigned<T>::type U;
53 return Result(U(x));
54}
55
56// ============================================================================
57// [MisalignedInt]
58// ============================================================================
59
60static const constexpr bool BL_UNALIGNED_IO_16 = BL_TARGET_ARCH_X86 != 0;
61static const constexpr bool BL_UNALIGNED_IO_32 = BL_TARGET_ARCH_X86 != 0;
62static const constexpr bool BL_UNALIGNED_IO_64 = BL_TARGET_ARCH_X86 != 0;
63
64// Type alignment (not allowed by C++11 'alignas' keyword).
65#if defined(__GNUC__)
66 #define BL_MISALIGN_TYPE(TYPE, N) __attribute__((__aligned__(N))) TYPE
67#elif defined(_MSC_VER)
68 #define BL_MISALIGN_TYPE(TYPE, N) __declspec(align(N)) TYPE
69#else
70 #define BL_MISALIGN_TYPE(TYPE, N) TYPE
71#endif
72
73//! An integer type that has possibly less alignment than its size.
74template<typename T, size_t Alignment>
75struct BLMisalignedUInt {};
76
77template<> struct BLMisalignedUInt<uint8_t, 1> { typedef uint8_t T; };
78template<> struct BLMisalignedUInt<uint16_t, 1> { typedef uint16_t BL_MISALIGN_TYPE(T, 1); };
79template<> struct BLMisalignedUInt<uint16_t, 2> { typedef uint16_t T; };
80template<> struct BLMisalignedUInt<uint32_t, 1> { typedef uint32_t BL_MISALIGN_TYPE(T, 1); };
81template<> struct BLMisalignedUInt<uint32_t, 2> { typedef uint32_t BL_MISALIGN_TYPE(T, 2); };
82template<> struct BLMisalignedUInt<uint32_t, 4> { typedef uint32_t T; };
83template<> struct BLMisalignedUInt<uint64_t, 1> { typedef uint64_t BL_MISALIGN_TYPE(T, 1); };
84template<> struct BLMisalignedUInt<uint64_t, 2> { typedef uint64_t BL_MISALIGN_TYPE(T, 2); };
85template<> struct BLMisalignedUInt<uint64_t, 4> { typedef uint64_t BL_MISALIGN_TYPE(T, 4); };
86template<> struct BLMisalignedUInt<uint64_t, 8> { typedef uint64_t T; };
87
88#undef BL_MISALIGN_TYPE
89
90// ============================================================================
91// [Numeric Limits]
92// ============================================================================
93
94template<typename T> static constexpr T blInf() noexcept { return std::numeric_limits<T>::infinity(); }
95template<typename T> static constexpr T blNaN() noexcept { return std::numeric_limits<T>::quiet_NaN(); }
96template<typename T> static constexpr T blMinValue() noexcept { return std::numeric_limits<T>::lowest(); }
97template<typename T> static constexpr T blMaxValue() noexcept { return std::numeric_limits<T>::max(); }
98
99// ============================================================================
100// [Bit Utilities]
101// ============================================================================
102
103namespace {
104
105template<typename T>
106constexpr T blBitOnes() noexcept { return T(~T(0)); }
107
108//! Returns `0 - x` in a safe way (no undefined behavior), works for both signed and unsigned numbers.
109template<typename T>
110constexpr T blNegate(const T& x) noexcept {
111 typedef typename std::make_unsigned<T>::type U;
112 return T(U(0) - U(x));
113}
114
115template<typename T>
116constexpr uint32_t blBitSizeOf() noexcept { return uint32_t(sizeof(T) * 8u); }
117
118template<typename T>
119constexpr size_t blBitWordCountFromBitCount(size_t nBits) noexcept {
120 return (nBits + blBitSizeOf<T>() - 1) / blBitSizeOf<T>();
121}
122
123//! Bit-cast `x` of `In` type to the given `Out` type.
124//!
125//! Useful to bit-cast between integers and floating points. The size of `Out`
126//! and `In` must be the same otherwise the compilation would fail. Bit casting
127//! is used by `blEquals` to implement bit equality for floating point types.
128template<typename Out, typename In>
129BL_INLINE Out blBitCast(const In& x) noexcept {
130 static_assert(sizeof(Out) == sizeof(In), "The size of In and Out must match");
131 union { In in; Out out; } u;
132 u.in = x;
133 return u.out;
134}
135
136//! Returns `x << y` (shift left logical) by explicitly casting `x` to an unsigned type and back.
137template<typename X, typename Y>
138constexpr X blBitShl(const X& x, const Y& y) noexcept {
139 typedef typename std::make_unsigned<X>::type U;
140 return X(U(x) << y);
141}
142
143//! Returns `x >> y` (shift right logical) by explicitly casting `x` to an unsigned type and back.
144template<typename X, typename Y>
145constexpr X blBitShr(const X& x, const Y& y) noexcept {
146 typedef typename std::make_unsigned<X>::type U;
147 return X(U(x) >> y);
148}
149
150//! Returns `x >> y` (shift right arithmetic) by explicitly casting `x` to a signed type and back.
151template<typename X, typename Y>
152constexpr X blBitSar(const X& x, const Y& y) noexcept {
153 typedef typename std::make_signed<X>::type S;
154 return X(S(x) >> y);
155}
156
157template<typename T>
158BL_INLINE T blBitRolImpl(const T& x, unsigned n) noexcept {
159 return blBitShl(x, n % unsigned(sizeof(T) * 8u)) | blBitShr(x, (0u - n) % unsigned(sizeof(T) * 8u)) ;
160}
161
162template<typename T>
163BL_INLINE T blBitRorImpl(const T& x, unsigned n) noexcept {
164 return blBitShr(x, n % unsigned(sizeof(T) * 8u)) | blBitShl(x, (0u - n) % unsigned(sizeof(T) * 8u)) ;
165}
166
167// MSVC is unable to emit `rol|ror` instruction when `n` is not constant so we
168// have to help it a bit. This, however, prevents us from using `constexpr`.
169#if defined(_MSC_VER)
170template<> BL_INLINE uint8_t blBitRolImpl(const uint8_t& x, unsigned n) noexcept { return uint8_t(_rotl8(x, uint8_t(n))); }
171template<> BL_INLINE uint8_t blBitRorImpl(const uint8_t& x, unsigned n) noexcept { return uint8_t(_rotr8(x, uint8_t(n))); }
172template<> BL_INLINE uint16_t blBitRolImpl(const uint16_t& x, unsigned n) noexcept { return uint16_t(_rotl16(x, uint8_t(n))); }
173template<> BL_INLINE uint16_t blBitRorImpl(const uint16_t& x, unsigned n) noexcept { return uint16_t(_rotr16(x, uint8_t(n))); }
174template<> BL_INLINE uint32_t blBitRolImpl(const uint32_t& x, unsigned n) noexcept { return uint32_t(_rotl(x, int(n))); }
175template<> BL_INLINE uint32_t blBitRorImpl(const uint32_t& x, unsigned n) noexcept { return uint32_t(_rotr(x, int(n))); }
176template<> BL_INLINE uint64_t blBitRolImpl(const uint64_t& x, unsigned n) noexcept { return uint64_t(_rotl64(x, int(n))); }
177template<> BL_INLINE uint64_t blBitRorImpl(const uint64_t& x, unsigned n) noexcept { return uint64_t(_rotr64(x, int(n))); }
178#endif
179
180template<typename X, typename Y>
181BL_INLINE X blBitRol(const X& x, const Y& n) noexcept { return X(blBitRolImpl(blAsUIntLeast32(x), unsigned(n))); }
182
183template<typename X, typename Y>
184BL_INLINE X blBitRor(const X& x, const Y& n) noexcept { return X(blBitRorImpl(blAsUIntLeast32(x), unsigned(n))); }
185
186//! Returns `x | (x >> y)` - helper used by some bit manipulation helpers.
187template<typename X, typename Y>
188constexpr X blBitShrOr(const X& x, const Y& y) noexcept { return X(x | blBitShr(x, y)); }
189
190template<typename X, typename Y, typename... Args>
191constexpr X blBitShrOr(const X& x, const Y& y, Args... args) noexcept { return blBitShrOr(blBitShrOr(x, y), args...); }
192
193//! Fill all trailing bits right from the first most significant bit set.
194template<typename T>
195constexpr T blFillTrailingBits(const T& x) noexcept {
196 typedef typename BLInternal::StdInt<sizeof(T), 1>::Type U;
197 return T(blFillTrailingBits(U(x)));
198}
199
200template<> constexpr uint8_t blFillTrailingBits(const uint8_t& x) noexcept { return blBitShrOr(x, 1, 2, 4); }
201template<> constexpr uint16_t blFillTrailingBits(const uint16_t& x) noexcept { return blBitShrOr(x, 1, 2, 4, 8); }
202template<> constexpr uint32_t blFillTrailingBits(const uint32_t& x) noexcept { return blBitShrOr(x, 1, 2, 4, 8, 16); }
203template<> constexpr uint64_t blFillTrailingBits(const uint64_t& x) noexcept { return blBitShrOr(x, 1, 2, 4, 8, 16, 32); }
204
205//! Return a bit-mask that has `x` bit set.
206template<typename T, typename Arg>
207constexpr T blBitMask(Arg x) noexcept { return T(T(1u) << x); }
208
209//! Return a bit-mask that has `x` bit set (multiple arguments).
210template<typename T, typename Arg, typename... Args>
211constexpr T blBitMask(Arg x, Args... args) noexcept { return T(blBitMask<T>(x) | blBitMask<T>(args...)); }
212
213//! Returns a bit-mask where all bits are set if the given value `x` is 1, or
214//! zero otherwise. Please note that `x` must be either 0 or 1, all other
215//! values will produce invalid output.
216template<typename T, typename B>
217constexpr T blBitMaskFromBool(const B& x) noexcept { return blNegate(T(x)); }
218
219//! Tests whether `x` has `n`th bit set.
220template<typename T, typename I>
221constexpr bool blBitTest(const T& x, const I& i) noexcept {
222 typedef typename std::make_unsigned<T>::type U;
223 return (U(x) & (U(1) << i)) != 0;
224}
225
226//! Tests whether bits specified by `y` are all set in `x`.
227template<typename X, typename Y>
228constexpr bool blBitMatch(const X& x, const Y& y) noexcept { return (x & y) == y; }
229
230constexpr uint32_t blBitCtzFallback(uint32_t xAndNegX) noexcept {
231 return 31 - ((xAndNegX & 0x0000FFFFu) ? 16 : 0)
232 - ((xAndNegX & 0x00FF00FFu) ? 8 : 0)
233 - ((xAndNegX & 0x0F0F0F0Fu) ? 4 : 0)
234 - ((xAndNegX & 0x33333333u) ? 2 : 0)
235 - ((xAndNegX & 0x55555555u) ? 1 : 0);
236}
237
238constexpr uint32_t blBitCtzFallback(uint64_t xAndNegX) noexcept {
239 return 63 - ((xAndNegX & 0x00000000FFFFFFFFu) ? 32 : 0)
240 - ((xAndNegX & 0x0000FFFF0000FFFFu) ? 16 : 0)
241 - ((xAndNegX & 0x00FF00FF00FF00FFu) ? 8 : 0)
242 - ((xAndNegX & 0x0F0F0F0F0F0F0F0Fu) ? 4 : 0)
243 - ((xAndNegX & 0x3333333333333333u) ? 2 : 0)
244 - ((xAndNegX & 0x5555555555555555u) ? 1 : 0);
245}
246
247template<typename T>
248constexpr uint32_t blBitCtzStatic(const T& x) noexcept { return blBitCtzFallback(blAsUIntLeast32(x) & blNegate(blAsUIntLeast32(x))); }
249
250template<typename T>
251BL_INLINE uint32_t blBitCtzImpl(const T& x) noexcept { return blBitCtzStatic(x); }
252
253#if !defined(BL_BUILD_NO_INTRINSICS)
254# if defined(__GNUC__)
255template<> BL_INLINE uint32_t blBitCtzImpl(const uint32_t& x) noexcept { return uint32_t(__builtin_ctz(x)); }
256template<> BL_INLINE uint32_t blBitCtzImpl(const uint64_t& x) noexcept { return uint32_t(__builtin_ctzll(x)); }
257# elif defined(_MSC_VER)
258template<> BL_INLINE uint32_t blBitCtzImpl(const uint32_t& x) noexcept { unsigned long i; _BitScanForward(&i, x); return uint32_t(i); }
259# if BL_TARGET_ARCH_X86 == 64 || BL_TARGET_ARCH_ARM == 64
260template<> BL_INLINE uint32_t blBitCtzImpl(const uint64_t& x) noexcept { unsigned long i; _BitScanForward64(&i, x); return uint32_t(i); }
261# endif
262# endif
263#endif
264
265//! Count trailing zeros in `x` (returns a position of a first bit set in `x`).
266//!
267//! \note The input MUST NOT be zero, otherwise the result is undefined.
268template<typename T>
269static BL_INLINE uint32_t blBitCtz(T x) noexcept { return blBitCtzImpl(blAsUIntLeast32(x)); }
270
271//! Generate a trailing bit-mask that has `n` least significant (trailing) bits set.
272template<typename T, typename N>
273constexpr T blTrailingBitMask(const N& n) noexcept {
274 typedef typename std::make_unsigned<T>::type U;
275 return (sizeof(U) < sizeof(uintptr_t))
276 ? T(U((uintptr_t(1) << n) - uintptr_t(1)))
277 // Shifting more bits than the type provides is UNDEFINED BEHAVIOR.
278 // In such case we trash the result by ORing it with a mask that has
279 // all bits set and discards the UNDEFINED RESULT of the shift.
280 : T(((U(1) << n) - U(1u)) | blNegate(U(n >= N(blBitSizeOf<T>()))));
281}
282
283template<typename T>
284constexpr bool blIsBitMaskConsecutive(const T& x) noexcept {
285 typedef typename std::make_unsigned<T>::type U;
286 return x != 0 && (U(x) ^ (U(x) + (U(x) & (~U(x) + 1u)))) >= U(x);
287}
288
289template<typename T>
290constexpr uint32_t blBitShiftOf(const T& x) noexcept { return blBitCtzStatic(x); }
291
292} // {anonymous}
293
294// ============================================================================
295// [ByteSwap]
296// ============================================================================
297
298namespace {
299
300template<typename T>
301static BL_INLINE T blByteSwap32(const T& x) noexcept {
302#if defined(__GNUC__) && !defined(BL_BUILD_NO_INTRINSICS)
303 return T(uint32_t(__builtin_bswap32(uint32_t(x))));
304#elif defined(_MSC_VER) && !defined(BL_BUILD_NO_INTRINSICS)
305 return T(uint32_t(_byteswap_ulong(uint32_t(x))));
306#else
307 return T((uint32_t(x) << 24) | (uint32_t(x) >> 24) | ((uint32_t(x) << 8) & 0x00FF0000u) | ((uint32_t(x) >> 8) & 0x0000FF00));
308#endif
309}
310
311template<typename T>
312static BL_INLINE T blByteSwap24(const T& x) noexcept {
313 // This produces always much better code than trying to do a real 24-bit byteswap.
314 return T(blByteSwap32(uint32_t(x)) >> 8);
315}
316
317template<typename T>
318static BL_INLINE T blByteSwap16(const T& x) noexcept {
319#if defined(_MSC_VER) && !defined(BL_BUILD_NO_INTRINSICS)
320 return T(uint16_t(_byteswap_ushort(uint16_t(x))));
321#else
322 return T((uint16_t(x) << 8) | (uint16_t(x) >> 8));
323#endif
324}
325
326template<typename T>
327static BL_INLINE T blByteSwap64(const T& x) noexcept {
328#if defined(__GNUC__) && !defined(BL_BUILD_NO_INTRINSICS)
329 return T(uint64_t(__builtin_bswap64(uint64_t(x))));
330#elif defined(_MSC_VER) && !defined(BL_BUILD_NO_INTRINSICS)
331 return T(uint64_t(_byteswap_uint64(uint64_t(x))));
332#else
333 return T( (uint64_t(blByteSwap32(uint32_t(uint64_t(x) >> 32 ))) ) |
334 (uint64_t(blByteSwap32(uint32_t(uint64_t(x) & 0xFFFFFFFFu))) << 32) );
335#endif
336}
337
338// TODO: [GLOBAL] REMOVE?
339template<typename T> static BL_INLINE T blByteSwap16LE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_LE ? T(x) : T(blByteSwap16(int16_t(x))); }
340template<typename T> static BL_INLINE T blByteSwap24LE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_LE ? T(x) : T(blByteSwap24(uint32_t(x))); }
341template<typename T> static BL_INLINE T blByteSwap32LE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_LE ? T(x) : T(blByteSwap32(uint32_t(x))); }
342template<typename T> static BL_INLINE T blByteSwap64LE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_LE ? T(x) : T(blByteSwap64(uint64_t(x))); }
343
344template<typename T> static BL_INLINE T blByteSwap16BE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_BE ? T(x) : T(blByteSwap16(uint16_t(x))); }
345template<typename T> static BL_INLINE T blByteSwap24BE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_BE ? T(x) : T(blByteSwap24(uint32_t(x))); }
346template<typename T> static BL_INLINE T blByteSwap32BE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_BE ? T(x) : T(blByteSwap32(uint32_t(x))); }
347template<typename T> static BL_INLINE T blByteSwap64BE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_BE ? T(x) : T(blByteSwap64(uint64_t(x))); }
348
349} // {anonymous}
350
351// ============================================================================
352// [Alignment]
353// ============================================================================
354
355template<typename X, typename Y>
356static constexpr bool blIsAligned(const X& base, const Y& alignment) noexcept {
357 typedef typename BLInternal::StdInt<sizeof(X), 1>::Type U;
358 return ((U)base % (U)alignment) == 0;
359}
360
361//! Tests whether the `x` is a power of two (only one bit is set).
362template<typename T>
363static constexpr bool blIsPowerOf2(const T& x) noexcept {
364 typedef typename std::make_unsigned<T>::type U;
365 return x && !(U(x) & (U(x) - U(1)));
366}
367
368template<typename X, typename Y>
369static constexpr X blAlignUp(const X& x, const Y& alignment) noexcept {
370 typedef typename BLInternal::StdInt<sizeof(X), 1>::Type U;
371 return (X)( ((U)x + ((U)(alignment) - 1u)) & ~((U)(alignment) - 1u) );
372}
373
374template<typename T>
375static constexpr T blAlignUpPowerOf2(const T& x) noexcept {
376 typedef typename BLInternal::StdInt<sizeof(T), 1>::Type U;
377 return (T)(blFillTrailingBits(U(x) - 1u) + 1u);
378}
379
380//! Returns zero or a positive difference between `base` and `base` aligned to `alignment`.
381template<typename X, typename Y>
382static constexpr X blAlignUpDiff(const X& base, const Y& alignment) noexcept {
383 typedef typename BLInternal::StdInt<sizeof(X), 1>::Type U;
384 return blAlignUp(U(base), alignment) - U(base);
385}
386
387template<typename X, typename Y>
388static constexpr X blAlignDown(const X& x, const Y& alignment) noexcept {
389 typedef typename BLInternal::StdInt<sizeof(X), 1>::Type U;
390 return (X)( (U)x & ~((U)(alignment) - 1u) );
391}
392
393// ============================================================================
394// [Pointer Utilities]
395// ============================================================================
396
397template<typename T, typename Offset>
398static constexpr T* blOffsetPtr(T* ptr, Offset offset) noexcept { return (T*)((uintptr_t)(ptr) + (uintptr_t)(intptr_t)offset); }
399
400template<typename T, typename P, typename Offset>
401static constexpr T* blOffsetPtr(P* ptr, Offset offset) noexcept { return (T*)((uintptr_t)(ptr) + (uintptr_t)(intptr_t)offset); }
402
403// ============================================================================
404// [ClampTo]
405// ============================================================================
406
407template<typename SrcT, typename DstT>
408static constexpr DstT blClampToImpl(const SrcT& x, const DstT& y) noexcept {
409 typedef typename std::make_unsigned<SrcT>::type U;
410 return U(x) <= U(y) ? DstT(x) :
411 blIsUnsigned<SrcT>() ? DstT(y) : DstT(SrcT(y) & SrcT(blBitSar(blNegate(x), sizeof(SrcT) * 8 - 1)));
412}
413
414//! Clamp a value `x` to a byte (unsigned 8-bit type).
415template<typename T>
416static constexpr uint8_t blClampToByte(const T& x) noexcept {
417 return blClampToImpl<T, uint8_t>(x, uint8_t(0xFFu));
418}
419
420//! Clamp a value `x` to a word (unsigned 16-bit type).
421template<typename T>
422static constexpr uint16_t blClampToWord(const T& x) noexcept {
423 return blClampToImpl<T, uint16_t>(x, uint16_t(0xFFFFu));
424}
425
426// ============================================================================
427// [Arithmetic]
428// ============================================================================
429
430typedef unsigned char BLOverflowFlag;
431
432namespace BLInternal {
433 template<typename T>
434 BL_INLINE T addOverflowFallback(T x, T y, BLOverflowFlag* of) noexcept {
435 typedef typename std::make_unsigned<T>::type U;
436
437 U result = U(x) + U(y);
438 *of |= BLOverflowFlag(blIsUnsigned<T>() ? result < U(x) : T((U(x) ^ ~U(y)) & (U(x) ^ result)) < 0);
439 return T(result);
440 }
441
442 template<typename T>
443 BL_INLINE T subOverflowFallback(T x, T y, BLOverflowFlag* of) noexcept {
444 typedef typename std::make_unsigned<T>::type U;
445
446 U result = U(x) - U(y);
447 *of |= BLOverflowFlag(blIsUnsigned<T>() ? result > U(x) : T((U(x) ^ U(y)) & (U(x) ^ result)) < 0);
448 return T(result);
449 }
450
451 template<typename T>
452 BL_INLINE T mulOverflowFallback(T x, T y, BLOverflowFlag* of) noexcept {
453 typedef typename BLInternal::StdInt<sizeof(T) * 2, blIsUnsigned<T>()>::Type I;
454 typedef typename std::make_unsigned<I>::type U;
455
456 U mask = U(blMaxValue<typename std::make_unsigned<T>::type>());
457 if (std::is_signed<T>::value) {
458 U prod = U(I(x)) * U(I(y));
459 *of |= BLOverflowFlag(I(prod) < I(blMinValue<T>()) || I(prod) > I(blMaxValue<T>()));
460 return T(I(prod & mask));
461 }
462 else {
463 U prod = U(x) * U(y);
464 *of |= BLOverflowFlag((prod & ~mask) != 0);
465 return T(prod & mask);
466 }
467 }
468
469 template<>
470 BL_INLINE int64_t mulOverflowFallback(int64_t x, int64_t y, BLOverflowFlag* of) noexcept {
471 int64_t result = int64_t(uint64_t(x) * uint64_t(y));
472 *of |= BLOverflowFlag(x && (result / x != y));
473 return result;
474 }
475
476 template<>
477 BL_INLINE uint64_t mulOverflowFallback(uint64_t x, uint64_t y, BLOverflowFlag* of) noexcept {
478 uint64_t result = x * y;
479 *of |= BLOverflowFlag(y != 0 && blMaxValue<uint64_t>() / y < x);
480 return result;
481 }
482
483 // These can be specialized.
484 template<typename T> BL_INLINE T addOverflowImpl(const T& x, const T& y, BLOverflowFlag* of) noexcept { return addOverflowFallback(x, y, of); }
485 template<typename T> BL_INLINE T subOverflowImpl(const T& x, const T& y, BLOverflowFlag* of) noexcept { return subOverflowFallback(x, y, of); }
486 template<typename T> BL_INLINE T mulOverflowImpl(const T& x, const T& y, BLOverflowFlag* of) noexcept { return mulOverflowFallback(x, y, of); }
487
488 #if defined(__GNUC__) && !defined(BL_BUILD_NO_INTRINSICS)
489 #if defined(__clang__) || __GNUC__ >= 5
490 #define BL_ARITH_OVERFLOW_SPECIALIZE(FUNC, T, RESULT_T, BUILTIN) \
491 template<> \
492 BL_INLINE T FUNC(const T& x, const T& y, BLOverflowFlag* of) noexcept { \
493 RESULT_T result; \
494 *of |= BLOverflowFlag(BUILTIN((RESULT_T)x, (RESULT_T)y, &result)); \
495 return T(result); \
496 }
497 BL_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, int32_t , int , __builtin_sadd_overflow )
498 BL_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint32_t, unsigned int , __builtin_uadd_overflow )
499 BL_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, int64_t , long long , __builtin_saddll_overflow)
500 BL_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint64_t, unsigned long long, __builtin_uaddll_overflow)
501 BL_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, int32_t , int , __builtin_ssub_overflow )
502 BL_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint32_t, unsigned int , __builtin_usub_overflow )
503 BL_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, int64_t , long long , __builtin_ssubll_overflow)
504 BL_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint64_t, unsigned long long, __builtin_usubll_overflow)
505 BL_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, int32_t , int , __builtin_smul_overflow )
506 BL_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, uint32_t, unsigned int , __builtin_umul_overflow )
507 BL_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, int64_t , long long , __builtin_smulll_overflow)
508 BL_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, uint64_t, unsigned long long, __builtin_umulll_overflow)
509 #undef BL_ARITH_OVERFLOW_SPECIALIZE
510 #endif
511 #endif
512
513 // There is a bug in MSVC that makes these specializations unusable, maybe in the future...
514 #if defined(_MSC_VER) && 0
515 #define BL_ARITH_OVERFLOW_SPECIALIZE(FUNC, T, ALT_T, BUILTIN) \
516 template<> \
517 BL_INLINE T FUNC(T x, T y, BLOverflowFlag* of) noexcept { \
518 ALT_T result; \
519 *of |= BLOverflowFlag(BUILTIN(0, (ALT_T)x, (ALT_T)y, &result)); \
520 return T(result); \
521 }
522 BL_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint32_t, unsigned int , _addcarry_u32 )
523 BL_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint32_t, unsigned int , _subborrow_u32)
524 #if ARCH_BITS >= 64
525 BL_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint64_t, unsigned __int64 , _addcarry_u64 )
526 BL_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint64_t, unsigned __int64 , _subborrow_u64)
527 #endif
528 #undef BL_ARITH_OVERFLOW_SPECIALIZE
529 #endif
530} // {BLInternal}
531
532template<typename T>
533static BL_INLINE T blAddOverflow(const T& x, const T& y, BLOverflowFlag* of) noexcept { return T(BLInternal::addOverflowImpl(blAsStdInt(x), blAsStdInt(y), of)); }
534
535template<typename T>
536static BL_INLINE T blSubOverflow(const T& x, const T& y, BLOverflowFlag* of) noexcept { return T(BLInternal::subOverflowImpl(blAsStdInt(x), blAsStdInt(y), of)); }
537
538template<typename T>
539static BL_INLINE T blMulOverflow(const T& x, const T& y, BLOverflowFlag* of) noexcept { return T(BLInternal::mulOverflowImpl(blAsStdInt(x), blAsStdInt(y), of)); }
540
541template<typename T>
542static BL_INLINE T blUAddSaturate(const T& x, const T& y) noexcept {
543 BLOverflowFlag of = 0;
544 T result = blAddOverflow(x, y, &of);
545 return T(result | blBitMaskFromBool<T>(of));
546}
547
548template<typename T>
549static BL_INLINE T blUSubSaturate(const T& x, const T& y) noexcept {
550 BLOverflowFlag of = 0;
551 T result = blSubOverflow(x, y, &of);
552 return T(result & blBitMaskFromBool<T>(!of));
553}
554
555template<typename T>
556static BL_INLINE T blUMulSaturate(const T& x, const T& y) noexcept {
557 BLOverflowFlag of = 0;
558 T result = blMulOverflow(x, y, &of);
559 return T(result | blBitMaskFromBool<T>(of));
560}
561
562// ============================================================================
563// [Udiv255]
564// ============================================================================
565
566//! Integer division by 255, compatible with `(x + (x >> 8)) >> 8` used by SIMD.
567static constexpr uint32_t blUdiv255(uint32_t x) noexcept { return ((x + 128) * 257) >> 16; }
568
569// ============================================================================
570// [blMemRead]
571// ============================================================================
572
573namespace {
574
575BL_INLINE uint32_t blMemReadU8(const void* p) noexcept { return uint32_t(static_cast<const uint8_t*>(p)[0]); }
576BL_INLINE int32_t blMemReadI8(const void* p) noexcept { return int32_t(static_cast<const int8_t*>(p)[0]); }
577
578template<uint32_t ByteOrder, size_t Alignment>
579BL_INLINE uint32_t blMemReadU16(const void* p) noexcept {
580 if (ByteOrder == BL_BYTE_ORDER_NATIVE && (BL_UNALIGNED_IO_16 || Alignment >= 2)) {
581 typedef typename BLMisalignedUInt<uint16_t, Alignment>::T U16AlignedToN;
582 return uint32_t(static_cast<const U16AlignedToN*>(p)[0]);
583 }
584 else {
585 uint32_t hi = blMemReadU8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 1 : 0));
586 uint32_t lo = blMemReadU8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 0 : 1));
587 return blBitShl(hi, 8) | lo;
588 }
589}
590
591template<uint32_t ByteOrder, size_t Alignment>
592BL_INLINE int32_t blMemReadI16(const void* p) noexcept {
593 if (ByteOrder == BL_BYTE_ORDER_NATIVE && (BL_UNALIGNED_IO_16 || Alignment >= 2)) {
594 typedef typename BLMisalignedUInt<uint16_t, Alignment>::T U16AlignedToN;
595 return int32_t(int16_t(static_cast<const U16AlignedToN*>(p)[0]));
596 }
597 else {
598 int32_t hi = int32_t(blMemReadI8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 1 : 0)));
599 int32_t lo = int32_t(blMemReadU8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 0 : 1)));
600 return blBitShl(hi, 8) | lo;
601 }
602}
603
604template<uint32_t ByteOrder = BL_BYTE_ORDER_NATIVE>
605BL_INLINE uint32_t blMemReadU24u(const void* p) noexcept {
606 uint32_t b0 = blMemReadU8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 2 : 0));
607 uint32_t b1 = blMemReadU8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 1 : 1));
608 uint32_t b2 = blMemReadU8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 0 : 2));
609 return blBitShl(b0, 16) | blBitShl(b1, 8) | b2;
610}
611
612template<uint32_t ByteOrder, size_t Alignment>
613BL_INLINE uint32_t blMemReadU32(const void* p) noexcept {
614 if (BL_UNALIGNED_IO_32 || Alignment >= 4) {
615 typedef typename BLMisalignedUInt<uint32_t, Alignment>::T U32AlignedToN;
616 uint32_t x = static_cast<const U32AlignedToN*>(p)[0];
617 return ByteOrder == BL_BYTE_ORDER_NATIVE ? x : blByteSwap32(x);
618 }
619 else {
620 uint32_t hi = blMemReadU16<ByteOrder, Alignment >= 2 ? size_t(2) : Alignment>(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 2 : 0));
621 uint32_t lo = blMemReadU16<ByteOrder, Alignment >= 2 ? size_t(2) : Alignment>(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 0 : 2));
622 return blBitShl(hi, 16) | lo;
623 }
624}
625
626template<uint32_t ByteOrder, size_t Alignment>
627BL_INLINE uint64_t blMemReadU64(const void* p) noexcept {
628 if (ByteOrder == BL_BYTE_ORDER_NATIVE && (BL_UNALIGNED_IO_64 || Alignment >= 8)) {
629 typedef typename BLMisalignedUInt<uint64_t, Alignment>::T U64AlignedToN;
630 return static_cast<const U64AlignedToN*>(p)[0];
631 }
632 else {
633 uint32_t hi = blMemReadU32<ByteOrder, Alignment >= 4 ? size_t(4) : Alignment>(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 4 : 0));
634 uint32_t lo = blMemReadU32<ByteOrder, Alignment >= 4 ? size_t(4) : Alignment>(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 0 : 4));
635 return blBitShl(uint64_t(hi), 32) | lo;
636 }
637}
638
639template<uint32_t ByteOrder, size_t Alignment>
640BL_INLINE int32_t blMemReadI32(const void* p) noexcept { return int32_t(blMemReadU32<ByteOrder, Alignment>(p)); }
641
642template<uint32_t ByteOrder, size_t Alignment>
643BL_INLINE int64_t blMemReadI64(const void* p) noexcept { return int64_t(blMemReadU64<ByteOrder, Alignment>(p)); }
644
645BL_INLINE int32_t blMemReadI16a(const void* p) noexcept { return blMemReadI16<BL_BYTE_ORDER_NATIVE, 2>(p); }
646BL_INLINE int32_t blMemReadI16u(const void* p) noexcept { return blMemReadI16<BL_BYTE_ORDER_NATIVE, 1>(p); }
647BL_INLINE uint32_t blMemReadU16a(const void* p) noexcept { return blMemReadU16<BL_BYTE_ORDER_NATIVE, 2>(p); }
648BL_INLINE uint32_t blMemReadU16u(const void* p) noexcept { return blMemReadU16<BL_BYTE_ORDER_NATIVE, 1>(p); }
649
650BL_INLINE int32_t blMemReadI16aLE(const void* p) noexcept { return blMemReadI16<BL_BYTE_ORDER_LE, 2>(p); }
651BL_INLINE int32_t blMemReadI16uLE(const void* p) noexcept { return blMemReadI16<BL_BYTE_ORDER_LE, 1>(p); }
652BL_INLINE uint32_t blMemReadU16aLE(const void* p) noexcept { return blMemReadU16<BL_BYTE_ORDER_LE, 2>(p); }
653BL_INLINE uint32_t blMemReadU16uLE(const void* p) noexcept { return blMemReadU16<BL_BYTE_ORDER_LE, 1>(p); }
654
655BL_INLINE int32_t blMemReadI16aBE(const void* p) noexcept { return blMemReadI16<BL_BYTE_ORDER_BE, 2>(p); }
656BL_INLINE int32_t blMemReadI16uBE(const void* p) noexcept { return blMemReadI16<BL_BYTE_ORDER_BE, 1>(p); }
657BL_INLINE uint32_t blMemReadU16aBE(const void* p) noexcept { return blMemReadU16<BL_BYTE_ORDER_BE, 2>(p); }
658BL_INLINE uint32_t blMemReadU16uBE(const void* p) noexcept { return blMemReadU16<BL_BYTE_ORDER_BE, 1>(p); }
659
660BL_INLINE uint32_t blMemReadU24uLE(const void* p) noexcept { return blMemReadU24u<BL_BYTE_ORDER_LE>(p); }
661BL_INLINE uint32_t blMemReadU24uBE(const void* p) noexcept { return blMemReadU24u<BL_BYTE_ORDER_BE>(p); }
662
663BL_INLINE int32_t blMemReadI32a(const void* p) noexcept { return blMemReadI32<BL_BYTE_ORDER_NATIVE, 4>(p); }
664BL_INLINE int32_t blMemReadI32u(const void* p) noexcept { return blMemReadI32<BL_BYTE_ORDER_NATIVE, 1>(p); }
665BL_INLINE uint32_t blMemReadU32a(const void* p) noexcept { return blMemReadU32<BL_BYTE_ORDER_NATIVE, 4>(p); }
666BL_INLINE uint32_t blMemReadU32u(const void* p) noexcept { return blMemReadU32<BL_BYTE_ORDER_NATIVE, 1>(p); }
667
668BL_INLINE int32_t blMemReadI32aLE(const void* p) noexcept { return blMemReadI32<BL_BYTE_ORDER_LE, 4>(p); }
669BL_INLINE int32_t blMemReadI32uLE(const void* p) noexcept { return blMemReadI32<BL_BYTE_ORDER_LE, 1>(p); }
670BL_INLINE uint32_t blMemReadU32aLE(const void* p) noexcept { return blMemReadU32<BL_BYTE_ORDER_LE, 4>(p); }
671BL_INLINE uint32_t blMemReadU32uLE(const void* p) noexcept { return blMemReadU32<BL_BYTE_ORDER_LE, 1>(p); }
672
673BL_INLINE int32_t blMemReadI32aBE(const void* p) noexcept { return blMemReadI32<BL_BYTE_ORDER_BE, 4>(p); }
674BL_INLINE int32_t blMemReadI32uBE(const void* p) noexcept { return blMemReadI32<BL_BYTE_ORDER_BE, 1>(p); }
675BL_INLINE uint32_t blMemReadU32aBE(const void* p) noexcept { return blMemReadU32<BL_BYTE_ORDER_BE, 4>(p); }
676BL_INLINE uint32_t blMemReadU32uBE(const void* p) noexcept { return blMemReadU32<BL_BYTE_ORDER_BE, 1>(p); }
677
678BL_INLINE int64_t blMemReadI64a(const void* p) noexcept { return blMemReadI64<BL_BYTE_ORDER_NATIVE, 8>(p); }
679BL_INLINE int64_t blMemReadI64u(const void* p) noexcept { return blMemReadI64<BL_BYTE_ORDER_NATIVE, 1>(p); }
680BL_INLINE uint64_t blMemReadU64a(const void* p) noexcept { return blMemReadU64<BL_BYTE_ORDER_NATIVE, 8>(p); }
681BL_INLINE uint64_t blMemReadU64u(const void* p) noexcept { return blMemReadU64<BL_BYTE_ORDER_NATIVE, 1>(p); }
682
683BL_INLINE int64_t blMemReadI64aLE(const void* p) noexcept { return blMemReadI64<BL_BYTE_ORDER_LE, 8>(p); }
684BL_INLINE int64_t blMemReadI64uLE(const void* p) noexcept { return blMemReadI64<BL_BYTE_ORDER_LE, 1>(p); }
685BL_INLINE uint64_t blMemReadU64aLE(const void* p) noexcept { return blMemReadU64<BL_BYTE_ORDER_LE, 8>(p); }
686BL_INLINE uint64_t blMemReadU64uLE(const void* p) noexcept { return blMemReadU64<BL_BYTE_ORDER_LE, 1>(p); }
687
688BL_INLINE int64_t blMemReadI64aBE(const void* p) noexcept { return blMemReadI64<BL_BYTE_ORDER_BE, 8>(p); }
689BL_INLINE int64_t blMemReadI64uBE(const void* p) noexcept { return blMemReadI64<BL_BYTE_ORDER_BE, 1>(p); }
690BL_INLINE uint64_t blMemReadU64aBE(const void* p) noexcept { return blMemReadU64<BL_BYTE_ORDER_BE, 8>(p); }
691BL_INLINE uint64_t blMemReadU64uBE(const void* p) noexcept { return blMemReadU64<BL_BYTE_ORDER_BE, 1>(p); }
692
693} // {anonymous}
694
695// ============================================================================
696// [blMemWrite]
697// ============================================================================
698
699namespace {
700
701BL_INLINE void blMemWriteU8(void* p, uint32_t x) noexcept { static_cast<uint8_t*>(p)[0] = uint8_t(x & 0xFFu); }
702BL_INLINE void blMemWriteI8(void* p, int32_t x) noexcept { static_cast<uint8_t*>(p)[0] = uint8_t(x & 0xFF); }
703
704template<uint32_t ByteOrder, size_t Alignment>
705BL_INLINE void blMemWriteU16(void* p, uint32_t x) noexcept {
706 if (ByteOrder == BL_BYTE_ORDER_NATIVE && (BL_UNALIGNED_IO_16 || Alignment >= 2)) {
707 typedef typename BLMisalignedUInt<uint16_t, Alignment>::T U16AlignedToN;
708 static_cast<U16AlignedToN*>(p)[0] = uint16_t(x & 0xFFFFu);
709 }
710 else {
711 static_cast<uint8_t*>(p)[0] = uint8_t((x >> (ByteOrder == BL_BYTE_ORDER_LE ? 0 : 8)) & 0xFFu);
712 static_cast<uint8_t*>(p)[1] = uint8_t((x >> (ByteOrder == BL_BYTE_ORDER_LE ? 8 : 0)) & 0xFFu);
713 }
714}
715
716template<uint32_t ByteOrder = BL_BYTE_ORDER_NATIVE>
717BL_INLINE void blMemWriteU24u(void* p, uint32_t v) noexcept {
718 static_cast<uint8_t*>(p)[0] = uint8_t((v >> (ByteOrder == BL_BYTE_ORDER_LE ? 0 : 16)) & 0xFFu);
719 static_cast<uint8_t*>(p)[1] = uint8_t((v >> (ByteOrder == BL_BYTE_ORDER_LE ? 8 : 8)) & 0xFFu);
720 static_cast<uint8_t*>(p)[2] = uint8_t((v >> (ByteOrder == BL_BYTE_ORDER_LE ? 16 : 0)) & 0xFFu);
721}
722
723template<uint32_t ByteOrder, size_t Alignment>
724BL_INLINE void blMemWriteU32(void* p, uint32_t x) noexcept {
725 if (BL_UNALIGNED_IO_32 || Alignment >= 4) {
726 typedef typename BLMisalignedUInt<uint32_t, Alignment>::T U32AlignedToN;
727 static_cast<U32AlignedToN*>(p)[0] = (ByteOrder == BL_BYTE_ORDER_NATIVE) ? x : blByteSwap32(x);
728 }
729 else {
730 blMemWriteU16<ByteOrder, Alignment >= 2 ? size_t(2) : Alignment>(static_cast<uint8_t*>(p) + 0, x >> (ByteOrder == BL_BYTE_ORDER_LE ? 0 : 16));
731 blMemWriteU16<ByteOrder, Alignment >= 2 ? size_t(2) : Alignment>(static_cast<uint8_t*>(p) + 2, x >> (ByteOrder == BL_BYTE_ORDER_LE ? 16 : 0));
732 }
733}
734
735template<uint32_t ByteOrder, size_t Alignment>
736BL_INLINE void blMemWriteU64(void* p, uint64_t x) noexcept {
737 if (ByteOrder == BL_BYTE_ORDER_NATIVE && (BL_UNALIGNED_IO_64 || Alignment >= 8)) {
738 typedef typename BLMisalignedUInt<uint64_t, Alignment>::T U64AlignedToN;
739 static_cast<U64AlignedToN*>(p)[0] = x;
740 }
741 else {
742 blMemWriteU32<ByteOrder, Alignment >= 4 ? size_t(4) : Alignment>(static_cast<uint8_t*>(p) + 0, uint32_t((x >> (ByteOrder == BL_BYTE_ORDER_LE ? 0 : 32)) & 0xFFFFFFFFu));
743 blMemWriteU32<ByteOrder, Alignment >= 4 ? size_t(4) : Alignment>(static_cast<uint8_t*>(p) + 4, uint32_t((x >> (ByteOrder == BL_BYTE_ORDER_LE ? 32 : 0)) & 0xFFFFFFFFu));
744 }
745}
746
747template<uint32_t ByteOrder, size_t Alignment> BL_INLINE void blMemWriteI16(void* p, int32_t x) noexcept { blMemWriteU16<ByteOrder, Alignment>(p, uint32_t(x)); }
748template<uint32_t ByteOrder, size_t Alignment> BL_INLINE void blMemWriteI32(void* p, int32_t x) noexcept { blMemWriteU32<ByteOrder, Alignment>(p, uint32_t(x)); }
749template<uint32_t ByteOrder, size_t Alignment> BL_INLINE void blMemWriteI64(void* p, int64_t x) noexcept { blMemWriteU64<ByteOrder, Alignment>(p, uint64_t(x)); }
750
751BL_INLINE void blMemWriteI16a(void* p, int32_t x) noexcept { blMemWriteI16<BL_BYTE_ORDER_NATIVE, 2>(p, x); }
752BL_INLINE void blMemWriteI16u(void* p, int32_t x) noexcept { blMemWriteI16<BL_BYTE_ORDER_NATIVE, 1>(p, x); }
753BL_INLINE void blMemWriteU16a(void* p, uint32_t x) noexcept { blMemWriteU16<BL_BYTE_ORDER_NATIVE, 2>(p, x); }
754BL_INLINE void blMemWriteU16u(void* p, uint32_t x) noexcept { blMemWriteU16<BL_BYTE_ORDER_NATIVE, 1>(p, x); }
755
756BL_INLINE void blMemWriteI16aLE(void* p, int32_t x) noexcept { blMemWriteI16<BL_BYTE_ORDER_LE, 2>(p, x); }
757BL_INLINE void blMemWriteI16uLE(void* p, int32_t x) noexcept { blMemWriteI16<BL_BYTE_ORDER_LE, 1>(p, x); }
758BL_INLINE void blMemWriteU16aLE(void* p, uint32_t x) noexcept { blMemWriteU16<BL_BYTE_ORDER_LE, 2>(p, x); }
759BL_INLINE void blMemWriteU16uLE(void* p, uint32_t x) noexcept { blMemWriteU16<BL_BYTE_ORDER_LE, 1>(p, x); }
760
761BL_INLINE void blMemWriteI16aBE(void* p, int32_t x) noexcept { blMemWriteI16<BL_BYTE_ORDER_BE, 2>(p, x); }
762BL_INLINE void blMemWriteI16uBE(void* p, int32_t x) noexcept { blMemWriteI16<BL_BYTE_ORDER_BE, 1>(p, x); }
763BL_INLINE void blMemWriteU16aBE(void* p, uint32_t x) noexcept { blMemWriteU16<BL_BYTE_ORDER_BE, 2>(p, x); }
764BL_INLINE void blMemWriteU16uBE(void* p, uint32_t x) noexcept { blMemWriteU16<BL_BYTE_ORDER_BE, 1>(p, x); }
765
766BL_INLINE void blMemWriteU24uLE(void* p, uint32_t v) noexcept { blMemWriteU24u<BL_BYTE_ORDER_LE>(p, v); }
767BL_INLINE void blMemWriteU24uBE(void* p, uint32_t v) noexcept { blMemWriteU24u<BL_BYTE_ORDER_BE>(p, v); }
768
769BL_INLINE void blMemWriteI32a(void* p, int32_t x) noexcept { blMemWriteI32<BL_BYTE_ORDER_NATIVE, 4>(p, x); }
770BL_INLINE void blMemWriteI32u(void* p, int32_t x) noexcept { blMemWriteI32<BL_BYTE_ORDER_NATIVE, 1>(p, x); }
771BL_INLINE void blMemWriteU32a(void* p, uint32_t x) noexcept { blMemWriteU32<BL_BYTE_ORDER_NATIVE, 4>(p, x); }
772BL_INLINE void blMemWriteU32u(void* p, uint32_t x) noexcept { blMemWriteU32<BL_BYTE_ORDER_NATIVE, 1>(p, x); }
773
774BL_INLINE void blMemWriteI32aLE(void* p, int32_t x) noexcept { blMemWriteI32<BL_BYTE_ORDER_LE, 4>(p, x); }
775BL_INLINE void blMemWriteI32uLE(void* p, int32_t x) noexcept { blMemWriteI32<BL_BYTE_ORDER_LE, 1>(p, x); }
776BL_INLINE void blMemWriteU32aLE(void* p, uint32_t x) noexcept { blMemWriteU32<BL_BYTE_ORDER_LE, 4>(p, x); }
777BL_INLINE void blMemWriteU32uLE(void* p, uint32_t x) noexcept { blMemWriteU32<BL_BYTE_ORDER_LE, 1>(p, x); }
778
779BL_INLINE void blMemWriteI32aBE(void* p, int32_t x) noexcept { blMemWriteI32<BL_BYTE_ORDER_BE, 4>(p, x); }
780BL_INLINE void blMemWriteI32uBE(void* p, int32_t x) noexcept { blMemWriteI32<BL_BYTE_ORDER_BE, 1>(p, x); }
781BL_INLINE void blMemWriteU32aBE(void* p, uint32_t x) noexcept { blMemWriteU32<BL_BYTE_ORDER_BE, 4>(p, x); }
782BL_INLINE void blMemWriteU32uBE(void* p, uint32_t x) noexcept { blMemWriteU32<BL_BYTE_ORDER_BE, 1>(p, x); }
783
784BL_INLINE void blMemWriteI64a(void* p, int64_t x) noexcept { blMemWriteI64<BL_BYTE_ORDER_NATIVE, 8>(p, x); }
785BL_INLINE void blMemWriteI64u(void* p, int64_t x) noexcept { blMemWriteI64<BL_BYTE_ORDER_NATIVE, 1>(p, x); }
786BL_INLINE void blMemWriteU64a(void* p, uint64_t x) noexcept { blMemWriteU64<BL_BYTE_ORDER_NATIVE, 8>(p, x); }
787BL_INLINE void blMemWriteU64u(void* p, uint64_t x) noexcept { blMemWriteU64<BL_BYTE_ORDER_NATIVE, 1>(p, x); }
788
789BL_INLINE void blMemWriteI64aLE(void* p, int64_t x) noexcept { blMemWriteI64<BL_BYTE_ORDER_LE, 8>(p, x); }
790BL_INLINE void blMemWriteI64uLE(void* p, int64_t x) noexcept { blMemWriteI64<BL_BYTE_ORDER_LE, 1>(p, x); }
791BL_INLINE void blMemWriteU64aLE(void* p, uint64_t x) noexcept { blMemWriteU64<BL_BYTE_ORDER_LE, 8>(p, x); }
792BL_INLINE void blMemWriteU64uLE(void* p, uint64_t x) noexcept { blMemWriteU64<BL_BYTE_ORDER_LE, 1>(p, x); }
793
794BL_INLINE void blMemWriteI64aBE(void* p, int64_t x) noexcept { blMemWriteI64<BL_BYTE_ORDER_BE, 8>(p, x); }
795BL_INLINE void blMemWriteI64uBE(void* p, int64_t x) noexcept { blMemWriteI64<BL_BYTE_ORDER_BE, 1>(p, x); }
796BL_INLINE void blMemWriteU64aBE(void* p, uint64_t x) noexcept { blMemWriteU64<BL_BYTE_ORDER_BE, 8>(p, x); }
797BL_INLINE void blMemWriteU64uBE(void* p, uint64_t x) noexcept { blMemWriteU64<BL_BYTE_ORDER_BE, 1>(p, x); }
798
799} // {anonymous}
800
801// ============================================================================
802// [blMemCopySmall]
803// ============================================================================
804
805namespace {
806
807template<typename T>
808static BL_INLINE void blMemCopyInlineT(T* dst, const T* src, size_t count) noexcept {
809 for (size_t i = 0; i < count; i++)
810 dst[i] = src[i];
811}
812
813//! Copies `n` bytes from `src` to `dst` optimized for small buffers.
814static BL_INLINE void blMemCopyInline(void* dst, const void* src, size_t n) noexcept {
815#if defined(__GNUC__) && BL_TARGET_ARCH_X86
816 size_t unused;
817 __asm__ __volatile__(
818 "rep movsb" : "=&D"(dst), "=&S"(src), "=&c"(unused)
819 : "0"(dst), "1"(src), "2"(n)
820 : "memory"
821 );
822#elif defined(_MSC_VER) && BL_TARGET_ARCH_X86
823 __movsb(static_cast<unsigned char *>(dst), static_cast<const unsigned char *>(src), n);
824#else
825 blMemCopyInlineT<uint8_t>(static_cast<uint8_t*>(dst), static_cast<const uint8_t*>(src), n);
826#endif
827}
828
829} // {anonymous}
830
831// ============================================================================
832// [BLWrap<T>]
833// ============================================================================
834
835//! Wrapper to control construction & destruction of `T`.
836template<typename T>
837struct alignas(alignof(T)) BLWrap {
838 //! Storage required to instantiate `T`.
839 char _data[sizeof(T)];
840
841 //! \name Init / Destroy
842 //! \{
843
844 //! Placement new constructor.
845 BL_INLINE T* init() noexcept {
846 #if defined(_MSC_VER) && !defined(__clang__)
847 BL_ASSUME(_data != nullptr);
848 #endif
849 return new(static_cast<void*>(_data)) T;
850 }
851
852 //! Placement new constructor with arguments.
853 template<typename... Args>
854 BL_INLINE T* init(Args&&... args) noexcept {
855 #if defined(_MSC_VER) && !defined(__clang__)
856 BL_ASSUME(_data != nullptr);
857 #endif
858 return new(static_cast<void*>(_data)) T(std::forward<Args>(args)...);
859 }
860
861 //! Placement delete destructor.
862 BL_INLINE void destroy() noexcept {
863 #if defined(_MSC_VER) && !defined(__clang__)
864 BL_ASSUME(_data != nullptr);
865 #endif
866 static_cast<T*>(static_cast<void*>(_data))->~T();
867 }
868
869 //! \}
870
871 //! \name Accessors
872 //! \{
873
874 BL_INLINE T* p() noexcept { return static_cast<T*>(static_cast<void*>(_data)); }
875 BL_INLINE const T* p() const noexcept { return static_cast<const T*>(static_cast<const void*>(_data)); }
876
877 BL_INLINE operator T&() noexcept { return *p(); }
878 BL_INLINE operator const T&() const noexcept { return *p(); }
879
880 BL_INLINE T& operator()() noexcept { return *p(); }
881 BL_INLINE const T& operator()() const noexcept { return *p(); }
882
883 BL_INLINE T* operator&() noexcept { return p(); }
884 BL_INLINE const T* operator&() const noexcept { return p(); }
885
886 BL_INLINE T* operator->() noexcept { return p(); }
887 BL_INLINE T const* operator->() const noexcept { return p(); }
888
889 //! \}
890};
891
892// ============================================================================
893// [BLScopedAllocator]
894// ============================================================================
895
896//! A simple allocator that can be used to remember allocated memory so it can
897//! be then freed in one go. Typically used in areas where some heap allocation
898//! is required and at the end of the work it will all be freed.
899class BLScopedAllocator {
900public:
901 BL_NONCOPYABLE(BLScopedAllocator)
902 struct Link { Link* next; };
903
904 Link* links;
905 uint8_t* poolPtr;
906 uint8_t* poolMem;
907 uint8_t* poolEnd;
908
909 BL_INLINE BLScopedAllocator() noexcept
910 : links(nullptr),
911 poolPtr(nullptr),
912 poolMem(nullptr),
913 poolEnd(nullptr) {}
914
915 BL_INLINE BLScopedAllocator(void* poolMem, size_t poolSize) noexcept
916 : links(nullptr),
917 poolPtr(static_cast<uint8_t*>(poolMem)),
918 poolMem(static_cast<uint8_t*>(poolMem)),
919 poolEnd(static_cast<uint8_t*>(poolMem) + poolSize) {}
920
921 BL_INLINE ~BLScopedAllocator() noexcept { reset(); }
922
923 void* alloc(size_t size, size_t alignment = 1) noexcept;
924 void reset() noexcept;
925};
926
927// ============================================================================
928// [BLMemBuffer]
929// ============================================================================
930
931//! Memory buffer.
932//!
933//! Memory buffer is a helper class which holds pointer to an allocated memory
934//! block, which will be released automatically by `BLMemBuffer` destructor or by
935//! `reset()` call.
936class BLMemBuffer {
937public:
938 BL_NONCOPYABLE(BLMemBuffer)
939
940 void* _mem;
941 void* _buf;
942 size_t _capacity;
943
944 BL_INLINE BLMemBuffer() noexcept
945 : _mem(nullptr),
946 _buf(nullptr),
947 _capacity(0) {}
948
949 BL_INLINE ~BLMemBuffer() noexcept {
950 _reset();
951 }
952
953protected:
954 BL_INLINE BLMemBuffer(void* mem, void* buf, size_t capacity) noexcept
955 : _mem(mem),
956 _buf(buf),
957 _capacity(capacity) {}
958
959public:
960 BL_INLINE void* get() const noexcept { return _mem; }
961 BL_INLINE size_t capacity() const noexcept { return _capacity; }
962
963 BL_INLINE void* alloc(size_t size) noexcept {
964 if (size <= _capacity)
965 return _mem;
966
967 if (_mem != _buf)
968 free(_mem);
969
970 _mem = malloc(size);
971 _capacity = size;
972
973 return _mem;
974 }
975
976 BL_INLINE void _reset() noexcept {
977 if (_mem != _buf)
978 free(_mem);
979 }
980
981 BL_INLINE void reset() noexcept {
982 _reset();
983
984 _mem = nullptr;
985 _capacity = 0;
986 }
987};
988
989// ============================================================================
990// [BLMemBufferTmp<>]
991// ============================================================================
992
993//! Memory buffer (temporary).
994//!
995//! This template is for fast routines that need to use memory allocated on
996//! the stack, but the memory requirement is not known at compile time. The
997//! number of bytes allocated on the stack is described by `N` parameter.
998template<size_t N>
999class BLMemBufferTmp : public BLMemBuffer {
1000public:
1001 BL_NONCOPYABLE(BLMemBufferTmp<N>)
1002
1003 uint8_t _storage[N];
1004
1005 BL_INLINE BLMemBufferTmp() noexcept
1006 : BLMemBuffer(_storage, _storage, N) {}
1007
1008 BL_INLINE ~BLMemBufferTmp() noexcept {}
1009
1010 using BLMemBuffer::alloc;
1011
1012 BL_INLINE void reset() noexcept {
1013 _reset();
1014 _mem = _buf;
1015 _capacity = N;
1016 }
1017};
1018
1019//! \}
1020//! \endcond
1021
1022#endif // BLEND2D_BLSUPPORT_P_H
1023