1// [Blend2D]
2// 2D Vector Graphics Powered by a JIT Compiler.
3//
4// [License]
5// Zlib - See LICENSE.md file in the package.
6
7#ifndef BLEND2D_BLTABLES_P_H
8#define BLEND2D_BLTABLES_P_H
9
10#include "./blsupport_p.h"
11
12//! \cond INTERNAL
13//! \addtogroup blend2d_internal
14//! \{
15
16// ============================================================================
17// [BLLookupTable]
18// ============================================================================
19
20//! Struct that holds `N` items of `T` type - output of lookup table generators.
21template<typename T, size_t N>
22struct BLLookupTable {
23 T data[N];
24
25 constexpr size_t size() const noexcept { return N; }
26 constexpr const T& operator[](size_t i) const noexcept { return data[i]; }
27};
28
29// NOTE: We only need `index_sequence` and `make_index_sequence` to generate
30// our lookup tables. These were added in C++14 so we have to provide our own
31// implementation for C++11 mode.
32#if __cplusplus >= 201402L
33namespace BLInternal {
34 // C++14 implementation uses `std::index_sequence` and `std::make_index_sequence`.
35 using std::index_sequence;
36 using std::make_index_sequence;
37}
38#else
39namespace BLInternal {
40 // C++11 implementation based on <https://stackoverflow.com/a/17426611/410767>.
41 template <size_t... Ints>
42 struct index_sequence {
43 using type = index_sequence;
44 using value_type = size_t;
45 static constexpr std::size_t size() noexcept { return sizeof...(Ints); }
46 };
47
48 template <class S1, class S2>
49 struct _merge_and_renumber;
50
51 template <size_t... I1, size_t... I2>
52 struct _merge_and_renumber<index_sequence<I1...>, index_sequence<I2...>>
53 : index_sequence<I1..., (sizeof...(I1)+I2)...> {};
54
55 template <size_t N>
56 struct make_index_sequence
57 : _merge_and_renumber<typename make_index_sequence<N/2>::type,
58 typename make_index_sequence<N - N/2>::type> {};
59
60 template<> struct make_index_sequence<0> : index_sequence<> { };
61 template<> struct make_index_sequence<1> : index_sequence<0> { };
62}
63#endif
64
65template<typename T, size_t N, class Gen, size_t... Indexes>
66static constexpr BLLookupTable<T, N> blLookupTableImpl(BLInternal::index_sequence<Indexes...>) noexcept {
67 return BLLookupTable<T, N> {{ T(Gen::value(Indexes))... }};
68}
69
70//! Creates a lookup table of `BLLookupTable<T[N]>` by using the generator `Gen`.
71template<typename T, size_t N, class Gen>
72static constexpr BLLookupTable<T, N> blLookupTable() noexcept {
73 return blLookupTableImpl<T, N, Gen>(BLInternal::make_index_sequence<N>{});
74}
75
76// ============================================================================
77// [BLBitCountOfByteTable]
78// ============================================================================
79
80BL_HIDDEN extern const BLLookupTable<uint8_t, 256> blBitCountOfByteTable;
81
82// ============================================================================
83// [BLModuloTable]
84// ============================================================================
85
86//! Table that contains precomputed `{1..16} % N`.
87struct alignas(16) BLModuloTable {
88 uint8_t x1_16[16];
89};
90
91BL_HIDDEN extern const BLModuloTable blModuloTable[18];
92
93// ============================================================================
94// [BLCommonTable]
95// ============================================================================
96
97//! Common table that contains constants used across Blend2D library, but most
98//! importantly in pipelines (either static or dynamic. The advantage of this
99//! table is that it contains all constants that SIMD code (or also a generic
100//! code) requires so only one register (pointer) is required to address all
101//! of them in either static or generated pipelines.
102struct alignas(32) BLCommonTable {
103 // --------------------------------------------------------------------------
104 // [I128 Constants]
105 // --------------------------------------------------------------------------
106
107 uint64_t i128_0000000000000000[2]; // 8xU16(0x0000).
108
109 uint64_t i128_007F007F007F007F[2]; // 8xU16(0x007F).
110 uint64_t i128_0080008000800080[2]; // 8xU16(0x0080).
111 uint64_t i128_00FF00FF00FF00FF[2]; // 8xU16(0x00FF).
112 uint64_t i128_0100010001000100[2]; // 8xU16(0x0100).
113 uint64_t i128_0101010101010101[2]; // 8xU16(0x0101).
114 uint64_t i128_01FF01FF01FF01FF[2]; // 8xU16(0x01FF).
115 uint64_t i128_0200020002000200[2]; // 8xU16(0x0200).
116 uint64_t i128_8000800080008000[2]; // 8xU16(0x8000).
117 uint64_t i128_FFFFFFFFFFFFFFFF[2]; // 8xU16(0xFFFF).
118
119 uint64_t i128_000000FF000000FF[2]; // 4xU32(0x000000FF).
120 uint64_t i128_0000010000000100[2]; // 4xU32(0x00000100).
121 uint64_t i128_000001FF000001FF[2]; // 4xU32(0x000001FF).
122 uint64_t i128_0000020000000200[2]; // 4xU32(0x00000200).
123 uint64_t i128_0000FFFF0000FFFF[2]; // 4xU32(0x0000FFFF).
124 uint64_t i128_0002000000020000[2]; // 4xU32(256u << 9)
125 uint64_t i128_00FFFFFF00FFFFFF[2]; // 4xU32(0x00FFFFFF).
126 uint64_t i128_FF000000FF000000[2]; // 4xU32(0xFF000000).
127 uint64_t i128_FFFF0000FFFF0000[2]; // 4xU32(0xFFFF0000).
128
129 uint64_t i128_000000FF00FF00FF[2]; // 2xU64(0x000000FF00FF00FF).
130 uint64_t i128_0000010001000100[2]; // 2xU64(0x0000010001000100).
131 uint64_t i128_0000080000000800[2]; // 2xU64(0x0000080000000800).
132 uint64_t i128_0000FFFFFFFFFFFF[2]; // 2xU64(0x0000FFFFFFFFFFFF).
133 uint64_t i128_00FF000000000000[2]; // 2xU64(0x00FF000000000000).
134 uint64_t i128_0100000000000000[2]; // 2xU64(0x0100000000000000).
135 uint64_t i128_0101010100000000[2]; // 2xU64(0x0101010100000000).
136 uint64_t i128_FFFF000000000000[2]; // 2xU64(0xFFFF000000000000).
137 uint64_t i128_FFFFFFFF00000000[2]; // 2xU64(0xFFFFFFFF00000000).
138
139 // TODO: These constants need some renaming...
140
141 //! uint32_t(0), uint32_t(0xFFFFFFFF), uint32_t(0xFFFFFFFF), uint32_t(0xFFFFFFFF).
142 uint32_t i128_FFFFFFFF_FFFFFFFF_FFFFFFFF_0[4];
143
144 uint32_t xmm_u32_0_1_2_3[4]; // uint32_t(0), uint32_t(1), uint32_t(2), uint32_t(3).
145 uint32_t xmm_u32_4[4]; // uint32_t(4).
146
147 // --------------------------------------------------------------------------
148 // [F128 Constants]
149 // --------------------------------------------------------------------------
150
151 uint64_t f128_sgn[2]; // Mask of all `float` bits containing a sign.
152 uint64_t f128_abs[2]; // Mask of all `float` bits without a sign.
153 uint64_t f128_abs_lo[2]; // Mask of all LO `float` bits without a sign.
154 uint64_t f128_abs_hi[2]; // Mask of all HI `float` bits without a sign.
155 float f128_round_max[4]; // Maximum float value to round (8388608).
156 float f128_round_magic[4]; // Magic float used by round (12582912).
157
158 float f128_1[4]; // Vector of `1.0f`.
159 float f128_4[4]; // Vector of `4.0f`.
160 float f128_255[4]; // Vector of `255.0f`.
161 float f128_1e_m3[4]; // Vector of `1e-3`.
162 float f128_1e_m20[4]; // Vector of `1e-20`.
163 float f128_1div255[4]; // Vector of `1.0f / 255.0f`.
164 float f128_3_2_1_0[4]; // Vector of `[3f, 2f, 1f, 0f]`.
165
166 // --------------------------------------------------------------------------
167 // [D128 Constants]
168 // --------------------------------------------------------------------------
169
170 uint64_t d128_sgn[2]; // Mask of all `double` bits containing a sign.
171 uint64_t d128_abs[2]; // Mask of all `double` bits without a sign.
172 uint64_t d128_abs_lo[2]; // Mask of LO `double` bits without a sign.
173 uint64_t d128_abs_hi[2]; // Mask of HI `double` bits without a sign.
174 double d128_round_max[4]; // Maximum double value to round (4503599627370496).
175 double d128_round_magic[4]; // Magic double used by round (6755399441055744).
176
177 double d128_1[2]; // Vector of `1.0`.
178 double d128_1e_m20[2]; // Vector of `1e-20`.
179 double d128_4[2]; // Vector of `4.0`.
180 double d128_m1[2]; // Vector of `-1.0`.
181
182 // --------------------------------------------------------------------------
183 // [PSHUFB Constants]
184 // --------------------------------------------------------------------------
185
186 // PSHUFB predicates for performing shuffles.
187 uint8_t i128_pshufb_u32_to_u8_lo[16];
188 uint8_t i128_pshufb_u32_to_u16_lo[16];
189
190 // PSHUFB predicates for unpacking ARGB32 into A8 components.
191 uint8_t i128_pshufb_packed_argb32_2x_lo_to_unpacked_a8[16];
192 uint8_t i128_pshufb_packed_argb32_2x_hi_to_unpacked_a8[16];
193
194 uint8_t dummy[32];
195
196 // --------------------------------------------------------------------------
197 // [I256 Constants]
198 // --------------------------------------------------------------------------
199
200 uint64_t i256_007F007F007F007F[4]; // 16xU16(0x007F).
201 uint64_t i256_0080008000800080[4]; // 16xU16(0x0080).
202 uint64_t i256_00FF00FF00FF00FF[4]; // 16xU16(0x00FF).
203 uint64_t i256_0100010001000100[4]; // 16xU16(0x0100).
204 uint64_t i256_0101010101010101[4]; // 16xU16(0x0101).
205 uint64_t i256_01FF01FF01FF01FF[4]; // 16xU16(0x01FF).
206 uint64_t i256_0200020002000200[4]; // 16xU16(0x0200).
207 uint64_t i256_8000800080008000[4]; // 16xU16(0x8000).
208 uint64_t i256_FFFFFFFFFFFFFFFF[4]; // 16xU16(0xFFFF).
209
210 // --------------------------------------------------------------------------
211 // [Conical]
212 // --------------------------------------------------------------------------
213
214 enum TableId {
215 kTable256 = 0,
216 kTable512 = 1,
217 kTable1024 = 2,
218 kTable2048 = 3,
219 kTable4096 = 4,
220 kTableCount = 5
221 };
222
223 struct Conical {
224 float n_div_1[4];
225 float n_div_2[4];
226 float n_div_4[4];
227 float n_extra[4];
228
229 // Polynomial to approximate `atan(x) * N / 2PI`:
230 // `x * (Q0 + x*x * (Q1 + x*x * (Q2 + x*x * Q3)))`
231 // Where:
232 // `x >= 0 && x <= 1`
233 float q0[4];
234 float q1[4];
235 float q2[4];
236 float q3[4];
237 } xmm_f_con[kTableCount];
238
239 // --------------------------------------------------------------------------
240 // [Div24Bit]
241 // --------------------------------------------------------------------------
242
243 //! Table, which can be used to turn integer division into multiplication and
244 //! shift. It supports division by 0 (multiplies by zero) up to 255 using 24
245 //! bits of precision. The multiplied product has to be shifted to the right
246 //! by 16 bits to receive the final result.
247 //!
248 //! Usage:
249 //! `if (b) ? (a * 255) / b : 0` can be rewritten to `(a * div24bit[b]) >> 16`.
250 BLLookupTable<uint32_t, 256> div24bit;
251};
252
253BL_HIDDEN extern const BLCommonTable blCommonTable;
254
255//! \}
256//! \endcond
257
258#endif // BLEND2D_BLTABLES_P_H
259