1 | // [Blend2D] |
2 | // 2D Vector Graphics Powered by a JIT Compiler. |
3 | // |
4 | // [License] |
5 | // Zlib - See LICENSE.md file in the package. |
6 | |
7 | #ifndef BLEND2D_BLTABLES_P_H |
8 | #define BLEND2D_BLTABLES_P_H |
9 | |
10 | #include "./blsupport_p.h" |
11 | |
12 | //! \cond INTERNAL |
13 | //! \addtogroup blend2d_internal |
14 | //! \{ |
15 | |
16 | // ============================================================================ |
17 | // [BLLookupTable] |
18 | // ============================================================================ |
19 | |
20 | //! Struct that holds `N` items of `T` type - output of lookup table generators. |
21 | template<typename T, size_t N> |
22 | struct BLLookupTable { |
23 | T data[N]; |
24 | |
25 | constexpr size_t size() const noexcept { return N; } |
26 | constexpr const T& operator[](size_t i) const noexcept { return data[i]; } |
27 | }; |
28 | |
29 | // NOTE: We only need `index_sequence` and `make_index_sequence` to generate |
30 | // our lookup tables. These were added in C++14 so we have to provide our own |
31 | // implementation for C++11 mode. |
32 | #if __cplusplus >= 201402L |
33 | namespace BLInternal { |
34 | // C++14 implementation uses `std::index_sequence` and `std::make_index_sequence`. |
35 | using std::index_sequence; |
36 | using std::make_index_sequence; |
37 | } |
38 | #else |
39 | namespace BLInternal { |
40 | // C++11 implementation based on <https://stackoverflow.com/a/17426611/410767>. |
41 | template <size_t... Ints> |
42 | struct index_sequence { |
43 | using type = index_sequence; |
44 | using value_type = size_t; |
45 | static constexpr std::size_t size() noexcept { return sizeof...(Ints); } |
46 | }; |
47 | |
48 | template <class S1, class S2> |
49 | struct _merge_and_renumber; |
50 | |
51 | template <size_t... I1, size_t... I2> |
52 | struct _merge_and_renumber<index_sequence<I1...>, index_sequence<I2...>> |
53 | : index_sequence<I1..., (sizeof...(I1)+I2)...> {}; |
54 | |
55 | template <size_t N> |
56 | struct make_index_sequence |
57 | : _merge_and_renumber<typename make_index_sequence<N/2>::type, |
58 | typename make_index_sequence<N - N/2>::type> {}; |
59 | |
60 | template<> struct make_index_sequence<0> : index_sequence<> { }; |
61 | template<> struct make_index_sequence<1> : index_sequence<0> { }; |
62 | } |
63 | #endif |
64 | |
65 | template<typename T, size_t N, class Gen, size_t... Indexes> |
66 | static constexpr BLLookupTable<T, N> blLookupTableImpl(BLInternal::index_sequence<Indexes...>) noexcept { |
67 | return BLLookupTable<T, N> {{ T(Gen::value(Indexes))... }}; |
68 | } |
69 | |
70 | //! Creates a lookup table of `BLLookupTable<T[N]>` by using the generator `Gen`. |
71 | template<typename T, size_t N, class Gen> |
72 | static constexpr BLLookupTable<T, N> blLookupTable() noexcept { |
73 | return blLookupTableImpl<T, N, Gen>(BLInternal::make_index_sequence<N>{}); |
74 | } |
75 | |
76 | // ============================================================================ |
77 | // [BLBitCountOfByteTable] |
78 | // ============================================================================ |
79 | |
80 | BL_HIDDEN extern const BLLookupTable<uint8_t, 256> blBitCountOfByteTable; |
81 | |
82 | // ============================================================================ |
83 | // [BLModuloTable] |
84 | // ============================================================================ |
85 | |
86 | //! Table that contains precomputed `{1..16} % N`. |
87 | struct alignas(16) BLModuloTable { |
88 | uint8_t x1_16[16]; |
89 | }; |
90 | |
91 | BL_HIDDEN extern const BLModuloTable blModuloTable[18]; |
92 | |
93 | // ============================================================================ |
94 | // [BLCommonTable] |
95 | // ============================================================================ |
96 | |
97 | //! Common table that contains constants used across Blend2D library, but most |
98 | //! importantly in pipelines (either static or dynamic. The advantage of this |
99 | //! table is that it contains all constants that SIMD code (or also a generic |
100 | //! code) requires so only one register (pointer) is required to address all |
101 | //! of them in either static or generated pipelines. |
102 | struct alignas(32) BLCommonTable { |
103 | // -------------------------------------------------------------------------- |
104 | // [I128 Constants] |
105 | // -------------------------------------------------------------------------- |
106 | |
107 | uint64_t i128_0000000000000000[2]; // 8xU16(0x0000). |
108 | |
109 | uint64_t i128_007F007F007F007F[2]; // 8xU16(0x007F). |
110 | uint64_t i128_0080008000800080[2]; // 8xU16(0x0080). |
111 | uint64_t i128_00FF00FF00FF00FF[2]; // 8xU16(0x00FF). |
112 | uint64_t i128_0100010001000100[2]; // 8xU16(0x0100). |
113 | uint64_t i128_0101010101010101[2]; // 8xU16(0x0101). |
114 | uint64_t i128_01FF01FF01FF01FF[2]; // 8xU16(0x01FF). |
115 | uint64_t i128_0200020002000200[2]; // 8xU16(0x0200). |
116 | uint64_t i128_8000800080008000[2]; // 8xU16(0x8000). |
117 | uint64_t i128_FFFFFFFFFFFFFFFF[2]; // 8xU16(0xFFFF). |
118 | |
119 | uint64_t i128_000000FF000000FF[2]; // 4xU32(0x000000FF). |
120 | uint64_t i128_0000010000000100[2]; // 4xU32(0x00000100). |
121 | uint64_t i128_000001FF000001FF[2]; // 4xU32(0x000001FF). |
122 | uint64_t i128_0000020000000200[2]; // 4xU32(0x00000200). |
123 | uint64_t i128_0000FFFF0000FFFF[2]; // 4xU32(0x0000FFFF). |
124 | uint64_t i128_0002000000020000[2]; // 4xU32(256u << 9) |
125 | uint64_t i128_00FFFFFF00FFFFFF[2]; // 4xU32(0x00FFFFFF). |
126 | uint64_t i128_FF000000FF000000[2]; // 4xU32(0xFF000000). |
127 | uint64_t i128_FFFF0000FFFF0000[2]; // 4xU32(0xFFFF0000). |
128 | |
129 | uint64_t i128_000000FF00FF00FF[2]; // 2xU64(0x000000FF00FF00FF). |
130 | uint64_t i128_0000010001000100[2]; // 2xU64(0x0000010001000100). |
131 | uint64_t i128_0000080000000800[2]; // 2xU64(0x0000080000000800). |
132 | uint64_t i128_0000FFFFFFFFFFFF[2]; // 2xU64(0x0000FFFFFFFFFFFF). |
133 | uint64_t i128_00FF000000000000[2]; // 2xU64(0x00FF000000000000). |
134 | uint64_t i128_0100000000000000[2]; // 2xU64(0x0100000000000000). |
135 | uint64_t i128_0101010100000000[2]; // 2xU64(0x0101010100000000). |
136 | uint64_t i128_FFFF000000000000[2]; // 2xU64(0xFFFF000000000000). |
137 | uint64_t i128_FFFFFFFF00000000[2]; // 2xU64(0xFFFFFFFF00000000). |
138 | |
139 | // TODO: These constants need some renaming... |
140 | |
141 | //! uint32_t(0), uint32_t(0xFFFFFFFF), uint32_t(0xFFFFFFFF), uint32_t(0xFFFFFFFF). |
142 | uint32_t i128_FFFFFFFF_FFFFFFFF_FFFFFFFF_0[4]; |
143 | |
144 | uint32_t xmm_u32_0_1_2_3[4]; // uint32_t(0), uint32_t(1), uint32_t(2), uint32_t(3). |
145 | uint32_t xmm_u32_4[4]; // uint32_t(4). |
146 | |
147 | // -------------------------------------------------------------------------- |
148 | // [F128 Constants] |
149 | // -------------------------------------------------------------------------- |
150 | |
151 | uint64_t f128_sgn[2]; // Mask of all `float` bits containing a sign. |
152 | uint64_t f128_abs[2]; // Mask of all `float` bits without a sign. |
153 | uint64_t f128_abs_lo[2]; // Mask of all LO `float` bits without a sign. |
154 | uint64_t f128_abs_hi[2]; // Mask of all HI `float` bits without a sign. |
155 | float f128_round_max[4]; // Maximum float value to round (8388608). |
156 | float f128_round_magic[4]; // Magic float used by round (12582912). |
157 | |
158 | float f128_1[4]; // Vector of `1.0f`. |
159 | float f128_4[4]; // Vector of `4.0f`. |
160 | float f128_255[4]; // Vector of `255.0f`. |
161 | float f128_1e_m3[4]; // Vector of `1e-3`. |
162 | float f128_1e_m20[4]; // Vector of `1e-20`. |
163 | float f128_1div255[4]; // Vector of `1.0f / 255.0f`. |
164 | float f128_3_2_1_0[4]; // Vector of `[3f, 2f, 1f, 0f]`. |
165 | |
166 | // -------------------------------------------------------------------------- |
167 | // [D128 Constants] |
168 | // -------------------------------------------------------------------------- |
169 | |
170 | uint64_t d128_sgn[2]; // Mask of all `double` bits containing a sign. |
171 | uint64_t d128_abs[2]; // Mask of all `double` bits without a sign. |
172 | uint64_t d128_abs_lo[2]; // Mask of LO `double` bits without a sign. |
173 | uint64_t d128_abs_hi[2]; // Mask of HI `double` bits without a sign. |
174 | double d128_round_max[4]; // Maximum double value to round (4503599627370496). |
175 | double d128_round_magic[4]; // Magic double used by round (6755399441055744). |
176 | |
177 | double d128_1[2]; // Vector of `1.0`. |
178 | double d128_1e_m20[2]; // Vector of `1e-20`. |
179 | double d128_4[2]; // Vector of `4.0`. |
180 | double d128_m1[2]; // Vector of `-1.0`. |
181 | |
182 | // -------------------------------------------------------------------------- |
183 | // [PSHUFB Constants] |
184 | // -------------------------------------------------------------------------- |
185 | |
186 | // PSHUFB predicates for performing shuffles. |
187 | uint8_t i128_pshufb_u32_to_u8_lo[16]; |
188 | uint8_t i128_pshufb_u32_to_u16_lo[16]; |
189 | |
190 | // PSHUFB predicates for unpacking ARGB32 into A8 components. |
191 | uint8_t i128_pshufb_packed_argb32_2x_lo_to_unpacked_a8[16]; |
192 | uint8_t i128_pshufb_packed_argb32_2x_hi_to_unpacked_a8[16]; |
193 | |
194 | uint8_t dummy[32]; |
195 | |
196 | // -------------------------------------------------------------------------- |
197 | // [I256 Constants] |
198 | // -------------------------------------------------------------------------- |
199 | |
200 | uint64_t i256_007F007F007F007F[4]; // 16xU16(0x007F). |
201 | uint64_t i256_0080008000800080[4]; // 16xU16(0x0080). |
202 | uint64_t i256_00FF00FF00FF00FF[4]; // 16xU16(0x00FF). |
203 | uint64_t i256_0100010001000100[4]; // 16xU16(0x0100). |
204 | uint64_t i256_0101010101010101[4]; // 16xU16(0x0101). |
205 | uint64_t i256_01FF01FF01FF01FF[4]; // 16xU16(0x01FF). |
206 | uint64_t i256_0200020002000200[4]; // 16xU16(0x0200). |
207 | uint64_t i256_8000800080008000[4]; // 16xU16(0x8000). |
208 | uint64_t i256_FFFFFFFFFFFFFFFF[4]; // 16xU16(0xFFFF). |
209 | |
210 | // -------------------------------------------------------------------------- |
211 | // [Conical] |
212 | // -------------------------------------------------------------------------- |
213 | |
214 | enum TableId { |
215 | kTable256 = 0, |
216 | kTable512 = 1, |
217 | kTable1024 = 2, |
218 | kTable2048 = 3, |
219 | kTable4096 = 4, |
220 | kTableCount = 5 |
221 | }; |
222 | |
223 | struct Conical { |
224 | float n_div_1[4]; |
225 | float n_div_2[4]; |
226 | float n_div_4[4]; |
227 | float [4]; |
228 | |
229 | // Polynomial to approximate `atan(x) * N / 2PI`: |
230 | // `x * (Q0 + x*x * (Q1 + x*x * (Q2 + x*x * Q3)))` |
231 | // Where: |
232 | // `x >= 0 && x <= 1` |
233 | float q0[4]; |
234 | float q1[4]; |
235 | float q2[4]; |
236 | float q3[4]; |
237 | } xmm_f_con[kTableCount]; |
238 | |
239 | // -------------------------------------------------------------------------- |
240 | // [Div24Bit] |
241 | // -------------------------------------------------------------------------- |
242 | |
243 | //! Table, which can be used to turn integer division into multiplication and |
244 | //! shift. It supports division by 0 (multiplies by zero) up to 255 using 24 |
245 | //! bits of precision. The multiplied product has to be shifted to the right |
246 | //! by 16 bits to receive the final result. |
247 | //! |
248 | //! Usage: |
249 | //! `if (b) ? (a * 255) / b : 0` can be rewritten to `(a * div24bit[b]) >> 16`. |
250 | BLLookupTable<uint32_t, 256> div24bit; |
251 | }; |
252 | |
253 | BL_HIDDEN extern const BLCommonTable blCommonTable; |
254 | |
255 | //! \} |
256 | //! \endcond |
257 | |
258 | #endif // BLEND2D_BLTABLES_P_H |
259 | |