1 | // [Blend2D] |
2 | // 2D Vector Graphics Powered by a JIT Compiler. |
3 | // |
4 | // [License] |
5 | // Zlib - See LICENSE.md file in the package. |
6 | |
7 | #ifndef BLEND2D_BLSUPPORT_P_H |
8 | #define BLEND2D_BLSUPPORT_P_H |
9 | |
10 | #include "./blapi-internal_p.h" |
11 | |
12 | //! \cond INTERNAL |
13 | //! \addtogroup blend2d_internal |
14 | //! \{ |
15 | |
16 | // ============================================================================ |
17 | // [StdInt] |
18 | // ============================================================================ |
19 | |
20 | template<typename T> |
21 | static constexpr bool blIsUnsigned() noexcept { return std::is_unsigned<T>::value; } |
22 | |
23 | //! Cast an integer `x` to a fixed-width type as defined by <stdint.h> |
24 | //! |
25 | //! This can help when specializing some functions for a particular type. Since |
26 | //! some C/C++ types may overlap (like `long` vs `long long`) it's easier to |
27 | //! just cast to a type as defined by <stdint.h> and specialize for it. |
28 | template<typename T> |
29 | static constexpr typename BLInternal::StdInt<sizeof(T), blIsUnsigned<T>()>::Type blAsStdInt(T x) noexcept { |
30 | return (typename BLInternal::StdInt<sizeof(T), blIsUnsigned<T>()>::Type)x; |
31 | } |
32 | |
33 | //! Cast an integer `x` to a fixed-width unsigned type as defined by <stdint.h> |
34 | template<typename T> |
35 | static constexpr typename BLInternal::StdInt<sizeof(T), 1>::Type blAsStdUInt(T x) noexcept { |
36 | return (typename BLInternal::StdInt<sizeof(T), 1>::Type)x; |
37 | } |
38 | |
39 | //! Cast an integer `x` to either `int32_t`, uint32_t`, `int64_t`, or `uint64_t`. |
40 | //! |
41 | //! Used to keep a signedness of `T`, but to promote it to at least 32-bit type. |
42 | template<typename T> |
43 | static constexpr typename BLInternal::StdInt<blMax<size_t>(sizeof(T), 4), blIsUnsigned<T>()>::Type blAsIntLeast32(T x) noexcept { |
44 | typedef typename BLInternal::StdInt<blMax<size_t>(sizeof(T), 4), blIsUnsigned<T>()>::Type Result; |
45 | return Result(x); |
46 | } |
47 | |
48 | //! Cast an integer `x` to either `uint32_t` or `uint64_t`. |
49 | template<typename T> |
50 | static constexpr typename BLInternal::StdInt<blMax<size_t>(sizeof(T), 4), 1>::Type blAsUIntLeast32(T x) noexcept { |
51 | typedef typename BLInternal::StdInt<blMax<size_t>(sizeof(T), 4), 1>::Type Result; |
52 | typedef typename std::make_unsigned<T>::type U; |
53 | return Result(U(x)); |
54 | } |
55 | |
56 | // ============================================================================ |
57 | // [MisalignedInt] |
58 | // ============================================================================ |
59 | |
60 | static const constexpr bool BL_UNALIGNED_IO_16 = BL_TARGET_ARCH_X86 != 0; |
61 | static const constexpr bool BL_UNALIGNED_IO_32 = BL_TARGET_ARCH_X86 != 0; |
62 | static const constexpr bool BL_UNALIGNED_IO_64 = BL_TARGET_ARCH_X86 != 0; |
63 | |
64 | // Type alignment (not allowed by C++11 'alignas' keyword). |
65 | #if defined(__GNUC__) |
66 | #define BL_MISALIGN_TYPE(TYPE, N) __attribute__((__aligned__(N))) TYPE |
67 | #elif defined(_MSC_VER) |
68 | #define BL_MISALIGN_TYPE(TYPE, N) __declspec(align(N)) TYPE |
69 | #else |
70 | #define BL_MISALIGN_TYPE(TYPE, N) TYPE |
71 | #endif |
72 | |
73 | //! An integer type that has possibly less alignment than its size. |
74 | template<typename T, size_t Alignment> |
75 | struct BLMisalignedUInt {}; |
76 | |
77 | template<> struct BLMisalignedUInt<uint8_t, 1> { typedef uint8_t T; }; |
78 | template<> struct BLMisalignedUInt<uint16_t, 1> { typedef uint16_t BL_MISALIGN_TYPE(T, 1); }; |
79 | template<> struct BLMisalignedUInt<uint16_t, 2> { typedef uint16_t T; }; |
80 | template<> struct BLMisalignedUInt<uint32_t, 1> { typedef uint32_t BL_MISALIGN_TYPE(T, 1); }; |
81 | template<> struct BLMisalignedUInt<uint32_t, 2> { typedef uint32_t BL_MISALIGN_TYPE(T, 2); }; |
82 | template<> struct BLMisalignedUInt<uint32_t, 4> { typedef uint32_t T; }; |
83 | template<> struct BLMisalignedUInt<uint64_t, 1> { typedef uint64_t BL_MISALIGN_TYPE(T, 1); }; |
84 | template<> struct BLMisalignedUInt<uint64_t, 2> { typedef uint64_t BL_MISALIGN_TYPE(T, 2); }; |
85 | template<> struct BLMisalignedUInt<uint64_t, 4> { typedef uint64_t BL_MISALIGN_TYPE(T, 4); }; |
86 | template<> struct BLMisalignedUInt<uint64_t, 8> { typedef uint64_t T; }; |
87 | |
88 | #undef BL_MISALIGN_TYPE |
89 | |
90 | // ============================================================================ |
91 | // [Numeric Limits] |
92 | // ============================================================================ |
93 | |
94 | template<typename T> static constexpr T blInf() noexcept { return std::numeric_limits<T>::infinity(); } |
95 | template<typename T> static constexpr T blNaN() noexcept { return std::numeric_limits<T>::quiet_NaN(); } |
96 | template<typename T> static constexpr T blMinValue() noexcept { return std::numeric_limits<T>::lowest(); } |
97 | template<typename T> static constexpr T blMaxValue() noexcept { return std::numeric_limits<T>::max(); } |
98 | |
99 | // ============================================================================ |
100 | // [Bit Utilities] |
101 | // ============================================================================ |
102 | |
103 | namespace { |
104 | |
105 | template<typename T> |
106 | constexpr T blBitOnes() noexcept { return T(~T(0)); } |
107 | |
108 | //! Returns `0 - x` in a safe way (no undefined behavior), works for both signed and unsigned numbers. |
109 | template<typename T> |
110 | constexpr T blNegate(const T& x) noexcept { |
111 | typedef typename std::make_unsigned<T>::type U; |
112 | return T(U(0) - U(x)); |
113 | } |
114 | |
115 | template<typename T> |
116 | constexpr uint32_t blBitSizeOf() noexcept { return uint32_t(sizeof(T) * 8u); } |
117 | |
118 | template<typename T> |
119 | constexpr size_t blBitWordCountFromBitCount(size_t nBits) noexcept { |
120 | return (nBits + blBitSizeOf<T>() - 1) / blBitSizeOf<T>(); |
121 | } |
122 | |
123 | //! Bit-cast `x` of `In` type to the given `Out` type. |
124 | //! |
125 | //! Useful to bit-cast between integers and floating points. The size of `Out` |
126 | //! and `In` must be the same otherwise the compilation would fail. Bit casting |
127 | //! is used by `blEquals` to implement bit equality for floating point types. |
128 | template<typename Out, typename In> |
129 | BL_INLINE Out blBitCast(const In& x) noexcept { |
130 | static_assert(sizeof(Out) == sizeof(In), "The size of In and Out must match" ); |
131 | union { In in; Out out; } u; |
132 | u.in = x; |
133 | return u.out; |
134 | } |
135 | |
136 | //! Returns `x << y` (shift left logical) by explicitly casting `x` to an unsigned type and back. |
137 | template<typename X, typename Y> |
138 | constexpr X blBitShl(const X& x, const Y& y) noexcept { |
139 | typedef typename std::make_unsigned<X>::type U; |
140 | return X(U(x) << y); |
141 | } |
142 | |
143 | //! Returns `x >> y` (shift right logical) by explicitly casting `x` to an unsigned type and back. |
144 | template<typename X, typename Y> |
145 | constexpr X blBitShr(const X& x, const Y& y) noexcept { |
146 | typedef typename std::make_unsigned<X>::type U; |
147 | return X(U(x) >> y); |
148 | } |
149 | |
150 | //! Returns `x >> y` (shift right arithmetic) by explicitly casting `x` to a signed type and back. |
151 | template<typename X, typename Y> |
152 | constexpr X blBitSar(const X& x, const Y& y) noexcept { |
153 | typedef typename std::make_signed<X>::type S; |
154 | return X(S(x) >> y); |
155 | } |
156 | |
157 | template<typename T> |
158 | BL_INLINE T blBitRolImpl(const T& x, unsigned n) noexcept { |
159 | return blBitShl(x, n % unsigned(sizeof(T) * 8u)) | blBitShr(x, (0u - n) % unsigned(sizeof(T) * 8u)) ; |
160 | } |
161 | |
162 | template<typename T> |
163 | BL_INLINE T blBitRorImpl(const T& x, unsigned n) noexcept { |
164 | return blBitShr(x, n % unsigned(sizeof(T) * 8u)) | blBitShl(x, (0u - n) % unsigned(sizeof(T) * 8u)) ; |
165 | } |
166 | |
167 | // MSVC is unable to emit `rol|ror` instruction when `n` is not constant so we |
168 | // have to help it a bit. This, however, prevents us from using `constexpr`. |
169 | #if defined(_MSC_VER) |
170 | template<> BL_INLINE uint8_t blBitRolImpl(const uint8_t& x, unsigned n) noexcept { return uint8_t(_rotl8(x, uint8_t(n))); } |
171 | template<> BL_INLINE uint8_t blBitRorImpl(const uint8_t& x, unsigned n) noexcept { return uint8_t(_rotr8(x, uint8_t(n))); } |
172 | template<> BL_INLINE uint16_t blBitRolImpl(const uint16_t& x, unsigned n) noexcept { return uint16_t(_rotl16(x, uint8_t(n))); } |
173 | template<> BL_INLINE uint16_t blBitRorImpl(const uint16_t& x, unsigned n) noexcept { return uint16_t(_rotr16(x, uint8_t(n))); } |
174 | template<> BL_INLINE uint32_t blBitRolImpl(const uint32_t& x, unsigned n) noexcept { return uint32_t(_rotl(x, int(n))); } |
175 | template<> BL_INLINE uint32_t blBitRorImpl(const uint32_t& x, unsigned n) noexcept { return uint32_t(_rotr(x, int(n))); } |
176 | template<> BL_INLINE uint64_t blBitRolImpl(const uint64_t& x, unsigned n) noexcept { return uint64_t(_rotl64(x, int(n))); } |
177 | template<> BL_INLINE uint64_t blBitRorImpl(const uint64_t& x, unsigned n) noexcept { return uint64_t(_rotr64(x, int(n))); } |
178 | #endif |
179 | |
180 | template<typename X, typename Y> |
181 | BL_INLINE X blBitRol(const X& x, const Y& n) noexcept { return X(blBitRolImpl(blAsUIntLeast32(x), unsigned(n))); } |
182 | |
183 | template<typename X, typename Y> |
184 | BL_INLINE X blBitRor(const X& x, const Y& n) noexcept { return X(blBitRorImpl(blAsUIntLeast32(x), unsigned(n))); } |
185 | |
186 | //! Returns `x | (x >> y)` - helper used by some bit manipulation helpers. |
187 | template<typename X, typename Y> |
188 | constexpr X blBitShrOr(const X& x, const Y& y) noexcept { return X(x | blBitShr(x, y)); } |
189 | |
190 | template<typename X, typename Y, typename... Args> |
191 | constexpr X blBitShrOr(const X& x, const Y& y, Args... args) noexcept { return blBitShrOr(blBitShrOr(x, y), args...); } |
192 | |
193 | //! Fill all trailing bits right from the first most significant bit set. |
194 | template<typename T> |
195 | constexpr T blFillTrailingBits(const T& x) noexcept { |
196 | typedef typename BLInternal::StdInt<sizeof(T), 1>::Type U; |
197 | return T(blFillTrailingBits(U(x))); |
198 | } |
199 | |
200 | template<> constexpr uint8_t blFillTrailingBits(const uint8_t& x) noexcept { return blBitShrOr(x, 1, 2, 4); } |
201 | template<> constexpr uint16_t blFillTrailingBits(const uint16_t& x) noexcept { return blBitShrOr(x, 1, 2, 4, 8); } |
202 | template<> constexpr uint32_t blFillTrailingBits(const uint32_t& x) noexcept { return blBitShrOr(x, 1, 2, 4, 8, 16); } |
203 | template<> constexpr uint64_t blFillTrailingBits(const uint64_t& x) noexcept { return blBitShrOr(x, 1, 2, 4, 8, 16, 32); } |
204 | |
205 | //! Return a bit-mask that has `x` bit set. |
206 | template<typename T, typename Arg> |
207 | constexpr T blBitMask(Arg x) noexcept { return T(T(1u) << x); } |
208 | |
209 | //! Return a bit-mask that has `x` bit set (multiple arguments). |
210 | template<typename T, typename Arg, typename... Args> |
211 | constexpr T blBitMask(Arg x, Args... args) noexcept { return T(blBitMask<T>(x) | blBitMask<T>(args...)); } |
212 | |
213 | //! Returns a bit-mask where all bits are set if the given value `x` is 1, or |
214 | //! zero otherwise. Please note that `x` must be either 0 or 1, all other |
215 | //! values will produce invalid output. |
216 | template<typename T, typename B> |
217 | constexpr T blBitMaskFromBool(const B& x) noexcept { return blNegate(T(x)); } |
218 | |
219 | //! Tests whether `x` has `n`th bit set. |
220 | template<typename T, typename I> |
221 | constexpr bool blBitTest(const T& x, const I& i) noexcept { |
222 | typedef typename std::make_unsigned<T>::type U; |
223 | return (U(x) & (U(1) << i)) != 0; |
224 | } |
225 | |
226 | //! Tests whether bits specified by `y` are all set in `x`. |
227 | template<typename X, typename Y> |
228 | constexpr bool blBitMatch(const X& x, const Y& y) noexcept { return (x & y) == y; } |
229 | |
230 | constexpr uint32_t blBitCtzFallback(uint32_t xAndNegX) noexcept { |
231 | return 31 - ((xAndNegX & 0x0000FFFFu) ? 16 : 0) |
232 | - ((xAndNegX & 0x00FF00FFu) ? 8 : 0) |
233 | - ((xAndNegX & 0x0F0F0F0Fu) ? 4 : 0) |
234 | - ((xAndNegX & 0x33333333u) ? 2 : 0) |
235 | - ((xAndNegX & 0x55555555u) ? 1 : 0); |
236 | } |
237 | |
238 | constexpr uint32_t blBitCtzFallback(uint64_t xAndNegX) noexcept { |
239 | return 63 - ((xAndNegX & 0x00000000FFFFFFFFu) ? 32 : 0) |
240 | - ((xAndNegX & 0x0000FFFF0000FFFFu) ? 16 : 0) |
241 | - ((xAndNegX & 0x00FF00FF00FF00FFu) ? 8 : 0) |
242 | - ((xAndNegX & 0x0F0F0F0F0F0F0F0Fu) ? 4 : 0) |
243 | - ((xAndNegX & 0x3333333333333333u) ? 2 : 0) |
244 | - ((xAndNegX & 0x5555555555555555u) ? 1 : 0); |
245 | } |
246 | |
247 | template<typename T> |
248 | constexpr uint32_t blBitCtzStatic(const T& x) noexcept { return blBitCtzFallback(blAsUIntLeast32(x) & blNegate(blAsUIntLeast32(x))); } |
249 | |
250 | template<typename T> |
251 | BL_INLINE uint32_t blBitCtzImpl(const T& x) noexcept { return blBitCtzStatic(x); } |
252 | |
253 | #if !defined(BL_BUILD_NO_INTRINSICS) |
254 | # if defined(__GNUC__) |
255 | template<> BL_INLINE uint32_t blBitCtzImpl(const uint32_t& x) noexcept { return uint32_t(__builtin_ctz(x)); } |
256 | template<> BL_INLINE uint32_t blBitCtzImpl(const uint64_t& x) noexcept { return uint32_t(__builtin_ctzll(x)); } |
257 | # elif defined(_MSC_VER) |
258 | template<> BL_INLINE uint32_t blBitCtzImpl(const uint32_t& x) noexcept { unsigned long i; _BitScanForward(&i, x); return uint32_t(i); } |
259 | # if BL_TARGET_ARCH_X86 == 64 || BL_TARGET_ARCH_ARM == 64 |
260 | template<> BL_INLINE uint32_t blBitCtzImpl(const uint64_t& x) noexcept { unsigned long i; _BitScanForward64(&i, x); return uint32_t(i); } |
261 | # endif |
262 | # endif |
263 | #endif |
264 | |
265 | //! Count trailing zeros in `x` (returns a position of a first bit set in `x`). |
266 | //! |
267 | //! \note The input MUST NOT be zero, otherwise the result is undefined. |
268 | template<typename T> |
269 | static BL_INLINE uint32_t blBitCtz(T x) noexcept { return blBitCtzImpl(blAsUIntLeast32(x)); } |
270 | |
271 | //! Generate a trailing bit-mask that has `n` least significant (trailing) bits set. |
272 | template<typename T, typename N> |
273 | constexpr T blTrailingBitMask(const N& n) noexcept { |
274 | typedef typename std::make_unsigned<T>::type U; |
275 | return (sizeof(U) < sizeof(uintptr_t)) |
276 | ? T(U((uintptr_t(1) << n) - uintptr_t(1))) |
277 | // Shifting more bits than the type provides is UNDEFINED BEHAVIOR. |
278 | // In such case we trash the result by ORing it with a mask that has |
279 | // all bits set and discards the UNDEFINED RESULT of the shift. |
280 | : T(((U(1) << n) - U(1u)) | blNegate(U(n >= N(blBitSizeOf<T>())))); |
281 | } |
282 | |
283 | template<typename T> |
284 | constexpr bool blIsBitMaskConsecutive(const T& x) noexcept { |
285 | typedef typename std::make_unsigned<T>::type U; |
286 | return x != 0 && (U(x) ^ (U(x) + (U(x) & (~U(x) + 1u)))) >= U(x); |
287 | } |
288 | |
289 | template<typename T> |
290 | constexpr uint32_t blBitShiftOf(const T& x) noexcept { return blBitCtzStatic(x); } |
291 | |
292 | } // {anonymous} |
293 | |
294 | // ============================================================================ |
295 | // [ByteSwap] |
296 | // ============================================================================ |
297 | |
298 | namespace { |
299 | |
300 | template<typename T> |
301 | static BL_INLINE T blByteSwap32(const T& x) noexcept { |
302 | #if defined(__GNUC__) && !defined(BL_BUILD_NO_INTRINSICS) |
303 | return T(uint32_t(__builtin_bswap32(uint32_t(x)))); |
304 | #elif defined(_MSC_VER) && !defined(BL_BUILD_NO_INTRINSICS) |
305 | return T(uint32_t(_byteswap_ulong(uint32_t(x)))); |
306 | #else |
307 | return T((uint32_t(x) << 24) | (uint32_t(x) >> 24) | ((uint32_t(x) << 8) & 0x00FF0000u) | ((uint32_t(x) >> 8) & 0x0000FF00)); |
308 | #endif |
309 | } |
310 | |
311 | template<typename T> |
312 | static BL_INLINE T blByteSwap24(const T& x) noexcept { |
313 | // This produces always much better code than trying to do a real 24-bit byteswap. |
314 | return T(blByteSwap32(uint32_t(x)) >> 8); |
315 | } |
316 | |
317 | template<typename T> |
318 | static BL_INLINE T blByteSwap16(const T& x) noexcept { |
319 | #if defined(_MSC_VER) && !defined(BL_BUILD_NO_INTRINSICS) |
320 | return T(uint16_t(_byteswap_ushort(uint16_t(x)))); |
321 | #else |
322 | return T((uint16_t(x) << 8) | (uint16_t(x) >> 8)); |
323 | #endif |
324 | } |
325 | |
326 | template<typename T> |
327 | static BL_INLINE T blByteSwap64(const T& x) noexcept { |
328 | #if defined(__GNUC__) && !defined(BL_BUILD_NO_INTRINSICS) |
329 | return T(uint64_t(__builtin_bswap64(uint64_t(x)))); |
330 | #elif defined(_MSC_VER) && !defined(BL_BUILD_NO_INTRINSICS) |
331 | return T(uint64_t(_byteswap_uint64(uint64_t(x)))); |
332 | #else |
333 | return T( (uint64_t(blByteSwap32(uint32_t(uint64_t(x) >> 32 ))) ) | |
334 | (uint64_t(blByteSwap32(uint32_t(uint64_t(x) & 0xFFFFFFFFu))) << 32) ); |
335 | #endif |
336 | } |
337 | |
338 | // TODO: [GLOBAL] REMOVE? |
339 | template<typename T> static BL_INLINE T blByteSwap16LE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_LE ? T(x) : T(blByteSwap16(int16_t(x))); } |
340 | template<typename T> static BL_INLINE T blByteSwap24LE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_LE ? T(x) : T(blByteSwap24(uint32_t(x))); } |
341 | template<typename T> static BL_INLINE T blByteSwap32LE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_LE ? T(x) : T(blByteSwap32(uint32_t(x))); } |
342 | template<typename T> static BL_INLINE T blByteSwap64LE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_LE ? T(x) : T(blByteSwap64(uint64_t(x))); } |
343 | |
344 | template<typename T> static BL_INLINE T blByteSwap16BE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_BE ? T(x) : T(blByteSwap16(uint16_t(x))); } |
345 | template<typename T> static BL_INLINE T blByteSwap24BE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_BE ? T(x) : T(blByteSwap24(uint32_t(x))); } |
346 | template<typename T> static BL_INLINE T blByteSwap32BE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_BE ? T(x) : T(blByteSwap32(uint32_t(x))); } |
347 | template<typename T> static BL_INLINE T blByteSwap64BE(T x) noexcept { return BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_BE ? T(x) : T(blByteSwap64(uint64_t(x))); } |
348 | |
349 | } // {anonymous} |
350 | |
351 | // ============================================================================ |
352 | // [Alignment] |
353 | // ============================================================================ |
354 | |
355 | template<typename X, typename Y> |
356 | static constexpr bool blIsAligned(const X& base, const Y& alignment) noexcept { |
357 | typedef typename BLInternal::StdInt<sizeof(X), 1>::Type U; |
358 | return ((U)base % (U)alignment) == 0; |
359 | } |
360 | |
361 | //! Tests whether the `x` is a power of two (only one bit is set). |
362 | template<typename T> |
363 | static constexpr bool blIsPowerOf2(const T& x) noexcept { |
364 | typedef typename std::make_unsigned<T>::type U; |
365 | return x && !(U(x) & (U(x) - U(1))); |
366 | } |
367 | |
368 | template<typename X, typename Y> |
369 | static constexpr X blAlignUp(const X& x, const Y& alignment) noexcept { |
370 | typedef typename BLInternal::StdInt<sizeof(X), 1>::Type U; |
371 | return (X)( ((U)x + ((U)(alignment) - 1u)) & ~((U)(alignment) - 1u) ); |
372 | } |
373 | |
374 | template<typename T> |
375 | static constexpr T blAlignUpPowerOf2(const T& x) noexcept { |
376 | typedef typename BLInternal::StdInt<sizeof(T), 1>::Type U; |
377 | return (T)(blFillTrailingBits(U(x) - 1u) + 1u); |
378 | } |
379 | |
380 | //! Returns zero or a positive difference between `base` and `base` aligned to `alignment`. |
381 | template<typename X, typename Y> |
382 | static constexpr X blAlignUpDiff(const X& base, const Y& alignment) noexcept { |
383 | typedef typename BLInternal::StdInt<sizeof(X), 1>::Type U; |
384 | return blAlignUp(U(base), alignment) - U(base); |
385 | } |
386 | |
387 | template<typename X, typename Y> |
388 | static constexpr X blAlignDown(const X& x, const Y& alignment) noexcept { |
389 | typedef typename BLInternal::StdInt<sizeof(X), 1>::Type U; |
390 | return (X)( (U)x & ~((U)(alignment) - 1u) ); |
391 | } |
392 | |
393 | // ============================================================================ |
394 | // [Pointer Utilities] |
395 | // ============================================================================ |
396 | |
397 | template<typename T, typename Offset> |
398 | static constexpr T* blOffsetPtr(T* ptr, Offset offset) noexcept { return (T*)((uintptr_t)(ptr) + (uintptr_t)(intptr_t)offset); } |
399 | |
400 | template<typename T, typename P, typename Offset> |
401 | static constexpr T* blOffsetPtr(P* ptr, Offset offset) noexcept { return (T*)((uintptr_t)(ptr) + (uintptr_t)(intptr_t)offset); } |
402 | |
403 | // ============================================================================ |
404 | // [ClampTo] |
405 | // ============================================================================ |
406 | |
407 | template<typename SrcT, typename DstT> |
408 | static constexpr DstT blClampToImpl(const SrcT& x, const DstT& y) noexcept { |
409 | typedef typename std::make_unsigned<SrcT>::type U; |
410 | return U(x) <= U(y) ? DstT(x) : |
411 | blIsUnsigned<SrcT>() ? DstT(y) : DstT(SrcT(y) & SrcT(blBitSar(blNegate(x), sizeof(SrcT) * 8 - 1))); |
412 | } |
413 | |
414 | //! Clamp a value `x` to a byte (unsigned 8-bit type). |
415 | template<typename T> |
416 | static constexpr uint8_t blClampToByte(const T& x) noexcept { |
417 | return blClampToImpl<T, uint8_t>(x, uint8_t(0xFFu)); |
418 | } |
419 | |
420 | //! Clamp a value `x` to a word (unsigned 16-bit type). |
421 | template<typename T> |
422 | static constexpr uint16_t blClampToWord(const T& x) noexcept { |
423 | return blClampToImpl<T, uint16_t>(x, uint16_t(0xFFFFu)); |
424 | } |
425 | |
426 | // ============================================================================ |
427 | // [Arithmetic] |
428 | // ============================================================================ |
429 | |
430 | typedef unsigned char BLOverflowFlag; |
431 | |
432 | namespace BLInternal { |
433 | template<typename T> |
434 | BL_INLINE T addOverflowFallback(T x, T y, BLOverflowFlag* of) noexcept { |
435 | typedef typename std::make_unsigned<T>::type U; |
436 | |
437 | U result = U(x) + U(y); |
438 | *of |= BLOverflowFlag(blIsUnsigned<T>() ? result < U(x) : T((U(x) ^ ~U(y)) & (U(x) ^ result)) < 0); |
439 | return T(result); |
440 | } |
441 | |
442 | template<typename T> |
443 | BL_INLINE T subOverflowFallback(T x, T y, BLOverflowFlag* of) noexcept { |
444 | typedef typename std::make_unsigned<T>::type U; |
445 | |
446 | U result = U(x) - U(y); |
447 | *of |= BLOverflowFlag(blIsUnsigned<T>() ? result > U(x) : T((U(x) ^ U(y)) & (U(x) ^ result)) < 0); |
448 | return T(result); |
449 | } |
450 | |
451 | template<typename T> |
452 | BL_INLINE T mulOverflowFallback(T x, T y, BLOverflowFlag* of) noexcept { |
453 | typedef typename BLInternal::StdInt<sizeof(T) * 2, blIsUnsigned<T>()>::Type I; |
454 | typedef typename std::make_unsigned<I>::type U; |
455 | |
456 | U mask = U(blMaxValue<typename std::make_unsigned<T>::type>()); |
457 | if (std::is_signed<T>::value) { |
458 | U prod = U(I(x)) * U(I(y)); |
459 | *of |= BLOverflowFlag(I(prod) < I(blMinValue<T>()) || I(prod) > I(blMaxValue<T>())); |
460 | return T(I(prod & mask)); |
461 | } |
462 | else { |
463 | U prod = U(x) * U(y); |
464 | *of |= BLOverflowFlag((prod & ~mask) != 0); |
465 | return T(prod & mask); |
466 | } |
467 | } |
468 | |
469 | template<> |
470 | BL_INLINE int64_t mulOverflowFallback(int64_t x, int64_t y, BLOverflowFlag* of) noexcept { |
471 | int64_t result = int64_t(uint64_t(x) * uint64_t(y)); |
472 | *of |= BLOverflowFlag(x && (result / x != y)); |
473 | return result; |
474 | } |
475 | |
476 | template<> |
477 | BL_INLINE uint64_t mulOverflowFallback(uint64_t x, uint64_t y, BLOverflowFlag* of) noexcept { |
478 | uint64_t result = x * y; |
479 | *of |= BLOverflowFlag(y != 0 && blMaxValue<uint64_t>() / y < x); |
480 | return result; |
481 | } |
482 | |
483 | // These can be specialized. |
484 | template<typename T> BL_INLINE T addOverflowImpl(const T& x, const T& y, BLOverflowFlag* of) noexcept { return addOverflowFallback(x, y, of); } |
485 | template<typename T> BL_INLINE T subOverflowImpl(const T& x, const T& y, BLOverflowFlag* of) noexcept { return subOverflowFallback(x, y, of); } |
486 | template<typename T> BL_INLINE T mulOverflowImpl(const T& x, const T& y, BLOverflowFlag* of) noexcept { return mulOverflowFallback(x, y, of); } |
487 | |
488 | #if defined(__GNUC__) && !defined(BL_BUILD_NO_INTRINSICS) |
489 | #if defined(__clang__) || __GNUC__ >= 5 |
490 | #define BL_ARITH_OVERFLOW_SPECIALIZE(FUNC, T, RESULT_T, BUILTIN) \ |
491 | template<> \ |
492 | BL_INLINE T FUNC(const T& x, const T& y, BLOverflowFlag* of) noexcept { \ |
493 | RESULT_T result; \ |
494 | *of |= BLOverflowFlag(BUILTIN((RESULT_T)x, (RESULT_T)y, &result)); \ |
495 | return T(result); \ |
496 | } |
497 | BL_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, int32_t , int , __builtin_sadd_overflow ) |
498 | BL_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint32_t, unsigned int , __builtin_uadd_overflow ) |
499 | BL_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, int64_t , long long , __builtin_saddll_overflow) |
500 | BL_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint64_t, unsigned long long, __builtin_uaddll_overflow) |
501 | BL_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, int32_t , int , __builtin_ssub_overflow ) |
502 | BL_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint32_t, unsigned int , __builtin_usub_overflow ) |
503 | BL_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, int64_t , long long , __builtin_ssubll_overflow) |
504 | BL_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint64_t, unsigned long long, __builtin_usubll_overflow) |
505 | BL_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, int32_t , int , __builtin_smul_overflow ) |
506 | BL_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, uint32_t, unsigned int , __builtin_umul_overflow ) |
507 | BL_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, int64_t , long long , __builtin_smulll_overflow) |
508 | BL_ARITH_OVERFLOW_SPECIALIZE(mulOverflowImpl, uint64_t, unsigned long long, __builtin_umulll_overflow) |
509 | #undef BL_ARITH_OVERFLOW_SPECIALIZE |
510 | #endif |
511 | #endif |
512 | |
513 | // There is a bug in MSVC that makes these specializations unusable, maybe in the future... |
514 | #if defined(_MSC_VER) && 0 |
515 | #define BL_ARITH_OVERFLOW_SPECIALIZE(FUNC, T, ALT_T, BUILTIN) \ |
516 | template<> \ |
517 | BL_INLINE T FUNC(T x, T y, BLOverflowFlag* of) noexcept { \ |
518 | ALT_T result; \ |
519 | *of |= BLOverflowFlag(BUILTIN(0, (ALT_T)x, (ALT_T)y, &result)); \ |
520 | return T(result); \ |
521 | } |
522 | BL_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint32_t, unsigned int , _addcarry_u32 ) |
523 | BL_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint32_t, unsigned int , _subborrow_u32) |
524 | #if ARCH_BITS >= 64 |
525 | BL_ARITH_OVERFLOW_SPECIALIZE(addOverflowImpl, uint64_t, unsigned __int64 , _addcarry_u64 ) |
526 | BL_ARITH_OVERFLOW_SPECIALIZE(subOverflowImpl, uint64_t, unsigned __int64 , _subborrow_u64) |
527 | #endif |
528 | #undef BL_ARITH_OVERFLOW_SPECIALIZE |
529 | #endif |
530 | } // {BLInternal} |
531 | |
532 | template<typename T> |
533 | static BL_INLINE T blAddOverflow(const T& x, const T& y, BLOverflowFlag* of) noexcept { return T(BLInternal::addOverflowImpl(blAsStdInt(x), blAsStdInt(y), of)); } |
534 | |
535 | template<typename T> |
536 | static BL_INLINE T blSubOverflow(const T& x, const T& y, BLOverflowFlag* of) noexcept { return T(BLInternal::subOverflowImpl(blAsStdInt(x), blAsStdInt(y), of)); } |
537 | |
538 | template<typename T> |
539 | static BL_INLINE T blMulOverflow(const T& x, const T& y, BLOverflowFlag* of) noexcept { return T(BLInternal::mulOverflowImpl(blAsStdInt(x), blAsStdInt(y), of)); } |
540 | |
541 | template<typename T> |
542 | static BL_INLINE T blUAddSaturate(const T& x, const T& y) noexcept { |
543 | BLOverflowFlag of = 0; |
544 | T result = blAddOverflow(x, y, &of); |
545 | return T(result | blBitMaskFromBool<T>(of)); |
546 | } |
547 | |
548 | template<typename T> |
549 | static BL_INLINE T blUSubSaturate(const T& x, const T& y) noexcept { |
550 | BLOverflowFlag of = 0; |
551 | T result = blSubOverflow(x, y, &of); |
552 | return T(result & blBitMaskFromBool<T>(!of)); |
553 | } |
554 | |
555 | template<typename T> |
556 | static BL_INLINE T blUMulSaturate(const T& x, const T& y) noexcept { |
557 | BLOverflowFlag of = 0; |
558 | T result = blMulOverflow(x, y, &of); |
559 | return T(result | blBitMaskFromBool<T>(of)); |
560 | } |
561 | |
562 | // ============================================================================ |
563 | // [Udiv255] |
564 | // ============================================================================ |
565 | |
566 | //! Integer division by 255, compatible with `(x + (x >> 8)) >> 8` used by SIMD. |
567 | static constexpr uint32_t blUdiv255(uint32_t x) noexcept { return ((x + 128) * 257) >> 16; } |
568 | |
569 | // ============================================================================ |
570 | // [blMemRead] |
571 | // ============================================================================ |
572 | |
573 | namespace { |
574 | |
575 | BL_INLINE uint32_t blMemReadU8(const void* p) noexcept { return uint32_t(static_cast<const uint8_t*>(p)[0]); } |
576 | BL_INLINE int32_t blMemReadI8(const void* p) noexcept { return int32_t(static_cast<const int8_t*>(p)[0]); } |
577 | |
578 | template<uint32_t ByteOrder, size_t Alignment> |
579 | BL_INLINE uint32_t blMemReadU16(const void* p) noexcept { |
580 | if (ByteOrder == BL_BYTE_ORDER_NATIVE && (BL_UNALIGNED_IO_16 || Alignment >= 2)) { |
581 | typedef typename BLMisalignedUInt<uint16_t, Alignment>::T U16AlignedToN; |
582 | return uint32_t(static_cast<const U16AlignedToN*>(p)[0]); |
583 | } |
584 | else { |
585 | uint32_t hi = blMemReadU8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 1 : 0)); |
586 | uint32_t lo = blMemReadU8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 0 : 1)); |
587 | return blBitShl(hi, 8) | lo; |
588 | } |
589 | } |
590 | |
591 | template<uint32_t ByteOrder, size_t Alignment> |
592 | BL_INLINE int32_t blMemReadI16(const void* p) noexcept { |
593 | if (ByteOrder == BL_BYTE_ORDER_NATIVE && (BL_UNALIGNED_IO_16 || Alignment >= 2)) { |
594 | typedef typename BLMisalignedUInt<uint16_t, Alignment>::T U16AlignedToN; |
595 | return int32_t(int16_t(static_cast<const U16AlignedToN*>(p)[0])); |
596 | } |
597 | else { |
598 | int32_t hi = int32_t(blMemReadI8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 1 : 0))); |
599 | int32_t lo = int32_t(blMemReadU8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 0 : 1))); |
600 | return blBitShl(hi, 8) | lo; |
601 | } |
602 | } |
603 | |
604 | template<uint32_t ByteOrder = BL_BYTE_ORDER_NATIVE> |
605 | BL_INLINE uint32_t blMemReadU24u(const void* p) noexcept { |
606 | uint32_t b0 = blMemReadU8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 2 : 0)); |
607 | uint32_t b1 = blMemReadU8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 1 : 1)); |
608 | uint32_t b2 = blMemReadU8(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 0 : 2)); |
609 | return blBitShl(b0, 16) | blBitShl(b1, 8) | b2; |
610 | } |
611 | |
612 | template<uint32_t ByteOrder, size_t Alignment> |
613 | BL_INLINE uint32_t blMemReadU32(const void* p) noexcept { |
614 | if (BL_UNALIGNED_IO_32 || Alignment >= 4) { |
615 | typedef typename BLMisalignedUInt<uint32_t, Alignment>::T U32AlignedToN; |
616 | uint32_t x = static_cast<const U32AlignedToN*>(p)[0]; |
617 | return ByteOrder == BL_BYTE_ORDER_NATIVE ? x : blByteSwap32(x); |
618 | } |
619 | else { |
620 | uint32_t hi = blMemReadU16<ByteOrder, Alignment >= 2 ? size_t(2) : Alignment>(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 2 : 0)); |
621 | uint32_t lo = blMemReadU16<ByteOrder, Alignment >= 2 ? size_t(2) : Alignment>(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 0 : 2)); |
622 | return blBitShl(hi, 16) | lo; |
623 | } |
624 | } |
625 | |
626 | template<uint32_t ByteOrder, size_t Alignment> |
627 | BL_INLINE uint64_t blMemReadU64(const void* p) noexcept { |
628 | if (ByteOrder == BL_BYTE_ORDER_NATIVE && (BL_UNALIGNED_IO_64 || Alignment >= 8)) { |
629 | typedef typename BLMisalignedUInt<uint64_t, Alignment>::T U64AlignedToN; |
630 | return static_cast<const U64AlignedToN*>(p)[0]; |
631 | } |
632 | else { |
633 | uint32_t hi = blMemReadU32<ByteOrder, Alignment >= 4 ? size_t(4) : Alignment>(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 4 : 0)); |
634 | uint32_t lo = blMemReadU32<ByteOrder, Alignment >= 4 ? size_t(4) : Alignment>(static_cast<const uint8_t*>(p) + (ByteOrder == BL_BYTE_ORDER_LE ? 0 : 4)); |
635 | return blBitShl(uint64_t(hi), 32) | lo; |
636 | } |
637 | } |
638 | |
639 | template<uint32_t ByteOrder, size_t Alignment> |
640 | BL_INLINE int32_t blMemReadI32(const void* p) noexcept { return int32_t(blMemReadU32<ByteOrder, Alignment>(p)); } |
641 | |
642 | template<uint32_t ByteOrder, size_t Alignment> |
643 | BL_INLINE int64_t blMemReadI64(const void* p) noexcept { return int64_t(blMemReadU64<ByteOrder, Alignment>(p)); } |
644 | |
645 | BL_INLINE int32_t blMemReadI16a(const void* p) noexcept { return blMemReadI16<BL_BYTE_ORDER_NATIVE, 2>(p); } |
646 | BL_INLINE int32_t blMemReadI16u(const void* p) noexcept { return blMemReadI16<BL_BYTE_ORDER_NATIVE, 1>(p); } |
647 | BL_INLINE uint32_t blMemReadU16a(const void* p) noexcept { return blMemReadU16<BL_BYTE_ORDER_NATIVE, 2>(p); } |
648 | BL_INLINE uint32_t blMemReadU16u(const void* p) noexcept { return blMemReadU16<BL_BYTE_ORDER_NATIVE, 1>(p); } |
649 | |
650 | BL_INLINE int32_t blMemReadI16aLE(const void* p) noexcept { return blMemReadI16<BL_BYTE_ORDER_LE, 2>(p); } |
651 | BL_INLINE int32_t blMemReadI16uLE(const void* p) noexcept { return blMemReadI16<BL_BYTE_ORDER_LE, 1>(p); } |
652 | BL_INLINE uint32_t blMemReadU16aLE(const void* p) noexcept { return blMemReadU16<BL_BYTE_ORDER_LE, 2>(p); } |
653 | BL_INLINE uint32_t blMemReadU16uLE(const void* p) noexcept { return blMemReadU16<BL_BYTE_ORDER_LE, 1>(p); } |
654 | |
655 | BL_INLINE int32_t blMemReadI16aBE(const void* p) noexcept { return blMemReadI16<BL_BYTE_ORDER_BE, 2>(p); } |
656 | BL_INLINE int32_t blMemReadI16uBE(const void* p) noexcept { return blMemReadI16<BL_BYTE_ORDER_BE, 1>(p); } |
657 | BL_INLINE uint32_t blMemReadU16aBE(const void* p) noexcept { return blMemReadU16<BL_BYTE_ORDER_BE, 2>(p); } |
658 | BL_INLINE uint32_t blMemReadU16uBE(const void* p) noexcept { return blMemReadU16<BL_BYTE_ORDER_BE, 1>(p); } |
659 | |
660 | BL_INLINE uint32_t blMemReadU24uLE(const void* p) noexcept { return blMemReadU24u<BL_BYTE_ORDER_LE>(p); } |
661 | BL_INLINE uint32_t blMemReadU24uBE(const void* p) noexcept { return blMemReadU24u<BL_BYTE_ORDER_BE>(p); } |
662 | |
663 | BL_INLINE int32_t blMemReadI32a(const void* p) noexcept { return blMemReadI32<BL_BYTE_ORDER_NATIVE, 4>(p); } |
664 | BL_INLINE int32_t blMemReadI32u(const void* p) noexcept { return blMemReadI32<BL_BYTE_ORDER_NATIVE, 1>(p); } |
665 | BL_INLINE uint32_t blMemReadU32a(const void* p) noexcept { return blMemReadU32<BL_BYTE_ORDER_NATIVE, 4>(p); } |
666 | BL_INLINE uint32_t blMemReadU32u(const void* p) noexcept { return blMemReadU32<BL_BYTE_ORDER_NATIVE, 1>(p); } |
667 | |
668 | BL_INLINE int32_t blMemReadI32aLE(const void* p) noexcept { return blMemReadI32<BL_BYTE_ORDER_LE, 4>(p); } |
669 | BL_INLINE int32_t blMemReadI32uLE(const void* p) noexcept { return blMemReadI32<BL_BYTE_ORDER_LE, 1>(p); } |
670 | BL_INLINE uint32_t blMemReadU32aLE(const void* p) noexcept { return blMemReadU32<BL_BYTE_ORDER_LE, 4>(p); } |
671 | BL_INLINE uint32_t blMemReadU32uLE(const void* p) noexcept { return blMemReadU32<BL_BYTE_ORDER_LE, 1>(p); } |
672 | |
673 | BL_INLINE int32_t blMemReadI32aBE(const void* p) noexcept { return blMemReadI32<BL_BYTE_ORDER_BE, 4>(p); } |
674 | BL_INLINE int32_t blMemReadI32uBE(const void* p) noexcept { return blMemReadI32<BL_BYTE_ORDER_BE, 1>(p); } |
675 | BL_INLINE uint32_t blMemReadU32aBE(const void* p) noexcept { return blMemReadU32<BL_BYTE_ORDER_BE, 4>(p); } |
676 | BL_INLINE uint32_t blMemReadU32uBE(const void* p) noexcept { return blMemReadU32<BL_BYTE_ORDER_BE, 1>(p); } |
677 | |
678 | BL_INLINE int64_t blMemReadI64a(const void* p) noexcept { return blMemReadI64<BL_BYTE_ORDER_NATIVE, 8>(p); } |
679 | BL_INLINE int64_t blMemReadI64u(const void* p) noexcept { return blMemReadI64<BL_BYTE_ORDER_NATIVE, 1>(p); } |
680 | BL_INLINE uint64_t blMemReadU64a(const void* p) noexcept { return blMemReadU64<BL_BYTE_ORDER_NATIVE, 8>(p); } |
681 | BL_INLINE uint64_t blMemReadU64u(const void* p) noexcept { return blMemReadU64<BL_BYTE_ORDER_NATIVE, 1>(p); } |
682 | |
683 | BL_INLINE int64_t blMemReadI64aLE(const void* p) noexcept { return blMemReadI64<BL_BYTE_ORDER_LE, 8>(p); } |
684 | BL_INLINE int64_t blMemReadI64uLE(const void* p) noexcept { return blMemReadI64<BL_BYTE_ORDER_LE, 1>(p); } |
685 | BL_INLINE uint64_t blMemReadU64aLE(const void* p) noexcept { return blMemReadU64<BL_BYTE_ORDER_LE, 8>(p); } |
686 | BL_INLINE uint64_t blMemReadU64uLE(const void* p) noexcept { return blMemReadU64<BL_BYTE_ORDER_LE, 1>(p); } |
687 | |
688 | BL_INLINE int64_t blMemReadI64aBE(const void* p) noexcept { return blMemReadI64<BL_BYTE_ORDER_BE, 8>(p); } |
689 | BL_INLINE int64_t blMemReadI64uBE(const void* p) noexcept { return blMemReadI64<BL_BYTE_ORDER_BE, 1>(p); } |
690 | BL_INLINE uint64_t blMemReadU64aBE(const void* p) noexcept { return blMemReadU64<BL_BYTE_ORDER_BE, 8>(p); } |
691 | BL_INLINE uint64_t blMemReadU64uBE(const void* p) noexcept { return blMemReadU64<BL_BYTE_ORDER_BE, 1>(p); } |
692 | |
693 | } // {anonymous} |
694 | |
695 | // ============================================================================ |
696 | // [blMemWrite] |
697 | // ============================================================================ |
698 | |
699 | namespace { |
700 | |
701 | BL_INLINE void blMemWriteU8(void* p, uint32_t x) noexcept { static_cast<uint8_t*>(p)[0] = uint8_t(x & 0xFFu); } |
702 | BL_INLINE void blMemWriteI8(void* p, int32_t x) noexcept { static_cast<uint8_t*>(p)[0] = uint8_t(x & 0xFF); } |
703 | |
704 | template<uint32_t ByteOrder, size_t Alignment> |
705 | BL_INLINE void blMemWriteU16(void* p, uint32_t x) noexcept { |
706 | if (ByteOrder == BL_BYTE_ORDER_NATIVE && (BL_UNALIGNED_IO_16 || Alignment >= 2)) { |
707 | typedef typename BLMisalignedUInt<uint16_t, Alignment>::T U16AlignedToN; |
708 | static_cast<U16AlignedToN*>(p)[0] = uint16_t(x & 0xFFFFu); |
709 | } |
710 | else { |
711 | static_cast<uint8_t*>(p)[0] = uint8_t((x >> (ByteOrder == BL_BYTE_ORDER_LE ? 0 : 8)) & 0xFFu); |
712 | static_cast<uint8_t*>(p)[1] = uint8_t((x >> (ByteOrder == BL_BYTE_ORDER_LE ? 8 : 0)) & 0xFFu); |
713 | } |
714 | } |
715 | |
716 | template<uint32_t ByteOrder = BL_BYTE_ORDER_NATIVE> |
717 | BL_INLINE void blMemWriteU24u(void* p, uint32_t v) noexcept { |
718 | static_cast<uint8_t*>(p)[0] = uint8_t((v >> (ByteOrder == BL_BYTE_ORDER_LE ? 0 : 16)) & 0xFFu); |
719 | static_cast<uint8_t*>(p)[1] = uint8_t((v >> (ByteOrder == BL_BYTE_ORDER_LE ? 8 : 8)) & 0xFFu); |
720 | static_cast<uint8_t*>(p)[2] = uint8_t((v >> (ByteOrder == BL_BYTE_ORDER_LE ? 16 : 0)) & 0xFFu); |
721 | } |
722 | |
723 | template<uint32_t ByteOrder, size_t Alignment> |
724 | BL_INLINE void blMemWriteU32(void* p, uint32_t x) noexcept { |
725 | if (BL_UNALIGNED_IO_32 || Alignment >= 4) { |
726 | typedef typename BLMisalignedUInt<uint32_t, Alignment>::T U32AlignedToN; |
727 | static_cast<U32AlignedToN*>(p)[0] = (ByteOrder == BL_BYTE_ORDER_NATIVE) ? x : blByteSwap32(x); |
728 | } |
729 | else { |
730 | blMemWriteU16<ByteOrder, Alignment >= 2 ? size_t(2) : Alignment>(static_cast<uint8_t*>(p) + 0, x >> (ByteOrder == BL_BYTE_ORDER_LE ? 0 : 16)); |
731 | blMemWriteU16<ByteOrder, Alignment >= 2 ? size_t(2) : Alignment>(static_cast<uint8_t*>(p) + 2, x >> (ByteOrder == BL_BYTE_ORDER_LE ? 16 : 0)); |
732 | } |
733 | } |
734 | |
735 | template<uint32_t ByteOrder, size_t Alignment> |
736 | BL_INLINE void blMemWriteU64(void* p, uint64_t x) noexcept { |
737 | if (ByteOrder == BL_BYTE_ORDER_NATIVE && (BL_UNALIGNED_IO_64 || Alignment >= 8)) { |
738 | typedef typename BLMisalignedUInt<uint64_t, Alignment>::T U64AlignedToN; |
739 | static_cast<U64AlignedToN*>(p)[0] = x; |
740 | } |
741 | else { |
742 | blMemWriteU32<ByteOrder, Alignment >= 4 ? size_t(4) : Alignment>(static_cast<uint8_t*>(p) + 0, uint32_t((x >> (ByteOrder == BL_BYTE_ORDER_LE ? 0 : 32)) & 0xFFFFFFFFu)); |
743 | blMemWriteU32<ByteOrder, Alignment >= 4 ? size_t(4) : Alignment>(static_cast<uint8_t*>(p) + 4, uint32_t((x >> (ByteOrder == BL_BYTE_ORDER_LE ? 32 : 0)) & 0xFFFFFFFFu)); |
744 | } |
745 | } |
746 | |
747 | template<uint32_t ByteOrder, size_t Alignment> BL_INLINE void blMemWriteI16(void* p, int32_t x) noexcept { blMemWriteU16<ByteOrder, Alignment>(p, uint32_t(x)); } |
748 | template<uint32_t ByteOrder, size_t Alignment> BL_INLINE void blMemWriteI32(void* p, int32_t x) noexcept { blMemWriteU32<ByteOrder, Alignment>(p, uint32_t(x)); } |
749 | template<uint32_t ByteOrder, size_t Alignment> BL_INLINE void blMemWriteI64(void* p, int64_t x) noexcept { blMemWriteU64<ByteOrder, Alignment>(p, uint64_t(x)); } |
750 | |
751 | BL_INLINE void blMemWriteI16a(void* p, int32_t x) noexcept { blMemWriteI16<BL_BYTE_ORDER_NATIVE, 2>(p, x); } |
752 | BL_INLINE void blMemWriteI16u(void* p, int32_t x) noexcept { blMemWriteI16<BL_BYTE_ORDER_NATIVE, 1>(p, x); } |
753 | BL_INLINE void blMemWriteU16a(void* p, uint32_t x) noexcept { blMemWriteU16<BL_BYTE_ORDER_NATIVE, 2>(p, x); } |
754 | BL_INLINE void blMemWriteU16u(void* p, uint32_t x) noexcept { blMemWriteU16<BL_BYTE_ORDER_NATIVE, 1>(p, x); } |
755 | |
756 | BL_INLINE void blMemWriteI16aLE(void* p, int32_t x) noexcept { blMemWriteI16<BL_BYTE_ORDER_LE, 2>(p, x); } |
757 | BL_INLINE void blMemWriteI16uLE(void* p, int32_t x) noexcept { blMemWriteI16<BL_BYTE_ORDER_LE, 1>(p, x); } |
758 | BL_INLINE void blMemWriteU16aLE(void* p, uint32_t x) noexcept { blMemWriteU16<BL_BYTE_ORDER_LE, 2>(p, x); } |
759 | BL_INLINE void blMemWriteU16uLE(void* p, uint32_t x) noexcept { blMemWriteU16<BL_BYTE_ORDER_LE, 1>(p, x); } |
760 | |
761 | BL_INLINE void blMemWriteI16aBE(void* p, int32_t x) noexcept { blMemWriteI16<BL_BYTE_ORDER_BE, 2>(p, x); } |
762 | BL_INLINE void blMemWriteI16uBE(void* p, int32_t x) noexcept { blMemWriteI16<BL_BYTE_ORDER_BE, 1>(p, x); } |
763 | BL_INLINE void blMemWriteU16aBE(void* p, uint32_t x) noexcept { blMemWriteU16<BL_BYTE_ORDER_BE, 2>(p, x); } |
764 | BL_INLINE void blMemWriteU16uBE(void* p, uint32_t x) noexcept { blMemWriteU16<BL_BYTE_ORDER_BE, 1>(p, x); } |
765 | |
766 | BL_INLINE void blMemWriteU24uLE(void* p, uint32_t v) noexcept { blMemWriteU24u<BL_BYTE_ORDER_LE>(p, v); } |
767 | BL_INLINE void blMemWriteU24uBE(void* p, uint32_t v) noexcept { blMemWriteU24u<BL_BYTE_ORDER_BE>(p, v); } |
768 | |
769 | BL_INLINE void blMemWriteI32a(void* p, int32_t x) noexcept { blMemWriteI32<BL_BYTE_ORDER_NATIVE, 4>(p, x); } |
770 | BL_INLINE void blMemWriteI32u(void* p, int32_t x) noexcept { blMemWriteI32<BL_BYTE_ORDER_NATIVE, 1>(p, x); } |
771 | BL_INLINE void blMemWriteU32a(void* p, uint32_t x) noexcept { blMemWriteU32<BL_BYTE_ORDER_NATIVE, 4>(p, x); } |
772 | BL_INLINE void blMemWriteU32u(void* p, uint32_t x) noexcept { blMemWriteU32<BL_BYTE_ORDER_NATIVE, 1>(p, x); } |
773 | |
774 | BL_INLINE void blMemWriteI32aLE(void* p, int32_t x) noexcept { blMemWriteI32<BL_BYTE_ORDER_LE, 4>(p, x); } |
775 | BL_INLINE void blMemWriteI32uLE(void* p, int32_t x) noexcept { blMemWriteI32<BL_BYTE_ORDER_LE, 1>(p, x); } |
776 | BL_INLINE void blMemWriteU32aLE(void* p, uint32_t x) noexcept { blMemWriteU32<BL_BYTE_ORDER_LE, 4>(p, x); } |
777 | BL_INLINE void blMemWriteU32uLE(void* p, uint32_t x) noexcept { blMemWriteU32<BL_BYTE_ORDER_LE, 1>(p, x); } |
778 | |
779 | BL_INLINE void blMemWriteI32aBE(void* p, int32_t x) noexcept { blMemWriteI32<BL_BYTE_ORDER_BE, 4>(p, x); } |
780 | BL_INLINE void blMemWriteI32uBE(void* p, int32_t x) noexcept { blMemWriteI32<BL_BYTE_ORDER_BE, 1>(p, x); } |
781 | BL_INLINE void blMemWriteU32aBE(void* p, uint32_t x) noexcept { blMemWriteU32<BL_BYTE_ORDER_BE, 4>(p, x); } |
782 | BL_INLINE void blMemWriteU32uBE(void* p, uint32_t x) noexcept { blMemWriteU32<BL_BYTE_ORDER_BE, 1>(p, x); } |
783 | |
784 | BL_INLINE void blMemWriteI64a(void* p, int64_t x) noexcept { blMemWriteI64<BL_BYTE_ORDER_NATIVE, 8>(p, x); } |
785 | BL_INLINE void blMemWriteI64u(void* p, int64_t x) noexcept { blMemWriteI64<BL_BYTE_ORDER_NATIVE, 1>(p, x); } |
786 | BL_INLINE void blMemWriteU64a(void* p, uint64_t x) noexcept { blMemWriteU64<BL_BYTE_ORDER_NATIVE, 8>(p, x); } |
787 | BL_INLINE void blMemWriteU64u(void* p, uint64_t x) noexcept { blMemWriteU64<BL_BYTE_ORDER_NATIVE, 1>(p, x); } |
788 | |
789 | BL_INLINE void blMemWriteI64aLE(void* p, int64_t x) noexcept { blMemWriteI64<BL_BYTE_ORDER_LE, 8>(p, x); } |
790 | BL_INLINE void blMemWriteI64uLE(void* p, int64_t x) noexcept { blMemWriteI64<BL_BYTE_ORDER_LE, 1>(p, x); } |
791 | BL_INLINE void blMemWriteU64aLE(void* p, uint64_t x) noexcept { blMemWriteU64<BL_BYTE_ORDER_LE, 8>(p, x); } |
792 | BL_INLINE void blMemWriteU64uLE(void* p, uint64_t x) noexcept { blMemWriteU64<BL_BYTE_ORDER_LE, 1>(p, x); } |
793 | |
794 | BL_INLINE void blMemWriteI64aBE(void* p, int64_t x) noexcept { blMemWriteI64<BL_BYTE_ORDER_BE, 8>(p, x); } |
795 | BL_INLINE void blMemWriteI64uBE(void* p, int64_t x) noexcept { blMemWriteI64<BL_BYTE_ORDER_BE, 1>(p, x); } |
796 | BL_INLINE void blMemWriteU64aBE(void* p, uint64_t x) noexcept { blMemWriteU64<BL_BYTE_ORDER_BE, 8>(p, x); } |
797 | BL_INLINE void blMemWriteU64uBE(void* p, uint64_t x) noexcept { blMemWriteU64<BL_BYTE_ORDER_BE, 1>(p, x); } |
798 | |
799 | } // {anonymous} |
800 | |
801 | // ============================================================================ |
802 | // [blMemCopySmall] |
803 | // ============================================================================ |
804 | |
805 | namespace { |
806 | |
807 | template<typename T> |
808 | static BL_INLINE void blMemCopyInlineT(T* dst, const T* src, size_t count) noexcept { |
809 | for (size_t i = 0; i < count; i++) |
810 | dst[i] = src[i]; |
811 | } |
812 | |
813 | //! Copies `n` bytes from `src` to `dst` optimized for small buffers. |
814 | static BL_INLINE void blMemCopyInline(void* dst, const void* src, size_t n) noexcept { |
815 | #if defined(__GNUC__) && BL_TARGET_ARCH_X86 |
816 | size_t unused; |
817 | __asm__ __volatile__( |
818 | "rep movsb" : "=&D" (dst), "=&S" (src), "=&c" (unused) |
819 | : "0" (dst), "1" (src), "2" (n) |
820 | : "memory" |
821 | ); |
822 | #elif defined(_MSC_VER) && BL_TARGET_ARCH_X86 |
823 | __movsb(static_cast<unsigned char *>(dst), static_cast<const unsigned char *>(src), n); |
824 | #else |
825 | blMemCopyInlineT<uint8_t>(static_cast<uint8_t*>(dst), static_cast<const uint8_t*>(src), n); |
826 | #endif |
827 | } |
828 | |
829 | } // {anonymous} |
830 | |
831 | // ============================================================================ |
832 | // [BLWrap<T>] |
833 | // ============================================================================ |
834 | |
835 | //! Wrapper to control construction & destruction of `T`. |
836 | template<typename T> |
837 | struct alignas(alignof(T)) BLWrap { |
838 | //! Storage required to instantiate `T`. |
839 | char _data[sizeof(T)]; |
840 | |
841 | //! \name Init / Destroy |
842 | //! \{ |
843 | |
844 | //! Placement new constructor. |
845 | BL_INLINE T* init() noexcept { |
846 | #if defined(_MSC_VER) && !defined(__clang__) |
847 | BL_ASSUME(_data != nullptr); |
848 | #endif |
849 | return new(static_cast<void*>(_data)) T; |
850 | } |
851 | |
852 | //! Placement new constructor with arguments. |
853 | template<typename... Args> |
854 | BL_INLINE T* init(Args&&... args) noexcept { |
855 | #if defined(_MSC_VER) && !defined(__clang__) |
856 | BL_ASSUME(_data != nullptr); |
857 | #endif |
858 | return new(static_cast<void*>(_data)) T(std::forward<Args>(args)...); |
859 | } |
860 | |
861 | //! Placement delete destructor. |
862 | BL_INLINE void destroy() noexcept { |
863 | #if defined(_MSC_VER) && !defined(__clang__) |
864 | BL_ASSUME(_data != nullptr); |
865 | #endif |
866 | static_cast<T*>(static_cast<void*>(_data))->~T(); |
867 | } |
868 | |
869 | //! \} |
870 | |
871 | //! \name Accessors |
872 | //! \{ |
873 | |
874 | BL_INLINE T* p() noexcept { return static_cast<T*>(static_cast<void*>(_data)); } |
875 | BL_INLINE const T* p() const noexcept { return static_cast<const T*>(static_cast<const void*>(_data)); } |
876 | |
877 | BL_INLINE operator T&() noexcept { return *p(); } |
878 | BL_INLINE operator const T&() const noexcept { return *p(); } |
879 | |
880 | BL_INLINE T& operator()() noexcept { return *p(); } |
881 | BL_INLINE const T& operator()() const noexcept { return *p(); } |
882 | |
883 | BL_INLINE T* operator&() noexcept { return p(); } |
884 | BL_INLINE const T* operator&() const noexcept { return p(); } |
885 | |
886 | BL_INLINE T* operator->() noexcept { return p(); } |
887 | BL_INLINE T const* operator->() const noexcept { return p(); } |
888 | |
889 | //! \} |
890 | }; |
891 | |
892 | // ============================================================================ |
893 | // [BLScopedAllocator] |
894 | // ============================================================================ |
895 | |
896 | //! A simple allocator that can be used to remember allocated memory so it can |
897 | //! be then freed in one go. Typically used in areas where some heap allocation |
898 | //! is required and at the end of the work it will all be freed. |
899 | class BLScopedAllocator { |
900 | public: |
901 | BL_NONCOPYABLE(BLScopedAllocator) |
902 | struct Link { Link* next; }; |
903 | |
904 | Link* links; |
905 | uint8_t* poolPtr; |
906 | uint8_t* poolMem; |
907 | uint8_t* poolEnd; |
908 | |
909 | BL_INLINE BLScopedAllocator() noexcept |
910 | : links(nullptr), |
911 | poolPtr(nullptr), |
912 | poolMem(nullptr), |
913 | poolEnd(nullptr) {} |
914 | |
915 | BL_INLINE BLScopedAllocator(void* poolMem, size_t poolSize) noexcept |
916 | : links(nullptr), |
917 | poolPtr(static_cast<uint8_t*>(poolMem)), |
918 | poolMem(static_cast<uint8_t*>(poolMem)), |
919 | poolEnd(static_cast<uint8_t*>(poolMem) + poolSize) {} |
920 | |
921 | BL_INLINE ~BLScopedAllocator() noexcept { reset(); } |
922 | |
923 | void* alloc(size_t size, size_t alignment = 1) noexcept; |
924 | void reset() noexcept; |
925 | }; |
926 | |
927 | // ============================================================================ |
928 | // [BLMemBuffer] |
929 | // ============================================================================ |
930 | |
931 | //! Memory buffer. |
932 | //! |
933 | //! Memory buffer is a helper class which holds pointer to an allocated memory |
934 | //! block, which will be released automatically by `BLMemBuffer` destructor or by |
935 | //! `reset()` call. |
936 | class BLMemBuffer { |
937 | public: |
938 | BL_NONCOPYABLE(BLMemBuffer) |
939 | |
940 | void* _mem; |
941 | void* _buf; |
942 | size_t _capacity; |
943 | |
944 | BL_INLINE BLMemBuffer() noexcept |
945 | : _mem(nullptr), |
946 | _buf(nullptr), |
947 | _capacity(0) {} |
948 | |
949 | BL_INLINE ~BLMemBuffer() noexcept { |
950 | _reset(); |
951 | } |
952 | |
953 | protected: |
954 | BL_INLINE BLMemBuffer(void* mem, void* buf, size_t capacity) noexcept |
955 | : _mem(mem), |
956 | _buf(buf), |
957 | _capacity(capacity) {} |
958 | |
959 | public: |
960 | BL_INLINE void* get() const noexcept { return _mem; } |
961 | BL_INLINE size_t capacity() const noexcept { return _capacity; } |
962 | |
963 | BL_INLINE void* alloc(size_t size) noexcept { |
964 | if (size <= _capacity) |
965 | return _mem; |
966 | |
967 | if (_mem != _buf) |
968 | free(_mem); |
969 | |
970 | _mem = malloc(size); |
971 | _capacity = size; |
972 | |
973 | return _mem; |
974 | } |
975 | |
976 | BL_INLINE void _reset() noexcept { |
977 | if (_mem != _buf) |
978 | free(_mem); |
979 | } |
980 | |
981 | BL_INLINE void reset() noexcept { |
982 | _reset(); |
983 | |
984 | _mem = nullptr; |
985 | _capacity = 0; |
986 | } |
987 | }; |
988 | |
989 | // ============================================================================ |
990 | // [BLMemBufferTmp<>] |
991 | // ============================================================================ |
992 | |
993 | //! Memory buffer (temporary). |
994 | //! |
995 | //! This template is for fast routines that need to use memory allocated on |
996 | //! the stack, but the memory requirement is not known at compile time. The |
997 | //! number of bytes allocated on the stack is described by `N` parameter. |
998 | template<size_t N> |
999 | class BLMemBufferTmp : public BLMemBuffer { |
1000 | public: |
1001 | BL_NONCOPYABLE(BLMemBufferTmp<N>) |
1002 | |
1003 | uint8_t _storage[N]; |
1004 | |
1005 | BL_INLINE BLMemBufferTmp() noexcept |
1006 | : BLMemBuffer(_storage, _storage, N) {} |
1007 | |
1008 | BL_INLINE ~BLMemBufferTmp() noexcept {} |
1009 | |
1010 | using BLMemBuffer::alloc; |
1011 | |
1012 | BL_INLINE void reset() noexcept { |
1013 | _reset(); |
1014 | _mem = _buf; |
1015 | _capacity = N; |
1016 | } |
1017 | }; |
1018 | |
1019 | //! \} |
1020 | //! \endcond |
1021 | |
1022 | #endif // BLEND2D_BLSUPPORT_P_H |
1023 | |