1// [Blend2D]
2// 2D Vector Graphics Powered by a JIT Compiler.
3//
4// [License]
5// Zlib - See LICENSE.md file in the package.
6
7#include "./blapi-build_p.h"
8#include "./bltables_p.h"
9
10// ============================================================================
11// [BLBitCountOfByteTable]
12// ============================================================================
13
14struct BLBitCountOfByteTableGen {
15 static constexpr uint8_t value(size_t i) noexcept {
16 return uint8_t( ((i >> 0) & 1) + ((i >> 1) & 1) + ((i >> 2) & 1) + ((i >> 3) & 1) +
17 ((i >> 4) & 1) + ((i >> 5) & 1) + ((i >> 6) & 1) + ((i >> 7) & 1) );
18 }
19};
20
21static constexpr const BLLookupTable<uint8_t, 256> blBitCountOfByteTable_
22 = blLookupTable<uint8_t, 256, BLBitCountOfByteTableGen>();
23
24const BLLookupTable<uint8_t, 256> blBitCountOfByteTable = blBitCountOfByteTable_;
25
26// ============================================================================
27// [BLModuloTable]
28// ============================================================================
29
30#define INV() {{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 }}
31#define DEF(N) {{ 1%N, 2%N, 3%N, 4%N, 5%N, 6%N, 7%N, 8%N, 9%N, 10%N, 11%N, 12%N, 13%N, 14%N, 15%N, 16%N }}
32
33const BLModuloTable blModuloTable[18] = {
34 INV( ), DEF( 1), DEF( 2), DEF( 3),
35 DEF( 4), DEF( 5), DEF( 6), DEF( 7),
36 DEF( 8), DEF( 9), DEF(10), DEF(11),
37 DEF(12), DEF(13), DEF(14), DEF(15),
38 DEF(16), DEF(17)
39};
40
41#undef DEF
42#undef INV
43
44// ============================================================================
45// [BLCommonTable]
46// ============================================================================
47
48struct BLDiv24BitReciprocalGen {
49 // Helper to calculate the reciprocal - C++11 doesn't allow variables...
50 static constexpr uint32_t calc(double d) noexcept {
51 return uint32_t(d) + uint32_t(d - double(uint32_t(d)) > 0.0);
52 }
53
54 //! Calculates the reciprocal for `BLCommonTable::div25bit` table.
55 static constexpr uint32_t value(size_t i) noexcept {
56 return calc(i ? double(0xFF0000) / double(i) : 0.0);
57 }
58};
59
60#define REPEAT_1X(...) __VA_ARGS__
61#define REPEAT_2X(...) __VA_ARGS__, __VA_ARGS__
62#define REPEAT_4X(...) __VA_ARGS__, __VA_ARGS__, __VA_ARGS__, __VA_ARGS__
63#define FLOAT_4X(A, B, C, D) float(A), float(B), float(C), float(D)
64
65static constexpr const BLCommonTable blCommonTable_ = {
66 // --------------------------------------------------------------------------
67 // [I128 Constants]
68 // --------------------------------------------------------------------------
69
70 { REPEAT_2X(0x0000000000000000u) },
71 { REPEAT_2X(0x007F007F007F007Fu) },
72 { REPEAT_2X(0x0080008000800080u) },
73 { REPEAT_2X(0x00FF00FF00FF00FFu) },
74 { REPEAT_2X(0x0100010001000100u) },
75 { REPEAT_2X(0x0101010101010101u) },
76 { REPEAT_2X(0x01FF01FF01FF01FFu) },
77 { REPEAT_2X(0x0200020002000200u) },
78 { REPEAT_2X(0x8000800080008000u) },
79 { REPEAT_2X(0xFFFFFFFFFFFFFFFFu) },
80
81 { REPEAT_2X(0x000000FF000000FFu) },
82 { REPEAT_2X(0x0000010000000100u) },
83 { REPEAT_2X(0x000001FF000001FFu) },
84 { REPEAT_2X(0x0000020000000200u) },
85 { REPEAT_2X(0x0000FFFF0000FFFFu) },
86 { REPEAT_2X(0x0002000000020000u) },
87 { REPEAT_2X(0x00FFFFFF00FFFFFFu) },
88 { REPEAT_2X(0xFF000000FF000000u) },
89 { REPEAT_2X(0xFFFF0000FFFF0000u) },
90
91 { REPEAT_2X(0x000000FF00FF00FFu) },
92 { REPEAT_2X(0x0000010001000100u) },
93 { REPEAT_2X(0x0000080000000800u) },
94 { REPEAT_2X(0x0000FFFFFFFFFFFFu) },
95 { REPEAT_2X(0x00FF000000000000u) },
96 { REPEAT_2X(0x0100000000000000u) },
97 { REPEAT_2X(0x0101010100000000u) },
98 { REPEAT_2X(0xFFFF000000000000u) },
99 { REPEAT_2X(0xFFFFFFFF00000000u) },
100
101 { 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0 }, // i128_FFFFFFFF_FFFFFFFF_FFFFFFFF_0.
102
103 { 0, 1, 2, 3 }, // xmm_u32_0_1_2_3.
104 { REPEAT_4X(4) }, // xmm_u32_4.
105
106 // --------------------------------------------------------------------------
107 // [F128 Constants]
108 // --------------------------------------------------------------------------
109
110 { REPEAT_2X(0x8000000080000000) },
111 { REPEAT_2X(0x7FFFFFFF7FFFFFFF) },
112 { REPEAT_2X(0xFFFFFFFF7FFFFFFF) },
113 { REPEAT_2X(0x7FFFFFFFFFFFFFFF) },
114 { REPEAT_4X(8388608.0f) },
115 { REPEAT_4X(12582912.0f) },
116
117 { REPEAT_4X(1.0f) }, // f128_1.
118 { REPEAT_4X(4.0f) }, // f128_4.
119 { REPEAT_4X(255.0f) }, // f128_255.
120 { REPEAT_4X(1e-3f) }, // f128_1e_m3.
121 { REPEAT_4X(1e-20f) }, // f128_1e_m20.
122 { REPEAT_4X(1.0f / 255.0f) }, // f128_1div255.
123 { 0.0f , 1.0f , 2.0f , 3.0f }, // f128_3_2_1_0.
124
125 // --------------------------------------------------------------------------
126 // [D128 Constants]
127 // --------------------------------------------------------------------------
128
129 { REPEAT_1X(0x8000000000000000u, 0x8000000000000000u) },
130 { REPEAT_1X(0x7FFFFFFFFFFFFFFFu, 0x7FFFFFFFFFFFFFFFu) },
131 { REPEAT_1X(0x7FFFFFFFFFFFFFFFu, 0xFFFFFFFFFFFFFFFFu) },
132 { REPEAT_1X(0xFFFFFFFFFFFFFFFFu, 0x7FFFFFFFFFFFFFFFu) },
133 { REPEAT_2X(4503599627370496.0) },
134 { REPEAT_2X(6755399441055744.0) },
135
136 { REPEAT_2X(1.0) }, // d128_1.
137 { REPEAT_2X(1e-20) }, // d128_1e_m20.
138 { REPEAT_2X(4.0) }, // d128_4.
139 { REPEAT_2X(-1.0) }, // d128_m1.
140
141 // --------------------------------------------------------------------------
142 // [PSHUFB Constants]
143 // --------------------------------------------------------------------------
144
145 #define Z 0x80 // PSHUFB zeroing.
146 { 0 , 4 , 8 , 12, 0 , 4 , 8 , 12, 0 , 4 , 8 , 12, 0 , 4 , 8 , 12 }, // i128_pshufb_u32_to_u8_lo
147 { 0 , 1 , 4 , 5 , 8 , 9 , 12, 13, 0 , 1 , 4 , 5 , 8 , 9 , 12, 13 }, // i128_pshufb_u32_to_u16_lo
148 { 3 , Z , 3 , Z , 3 , Z , 3 , Z , 7 , Z , 7 , Z , 7 , Z , 7 , Z }, // i128_pshufb_packed_argb32_2x_lo_to_unpacked_a8
149 { 11, Z , 11, Z , 11, Z , 11, Z , 15, Z , 15, Z , 15, Z , 15, Z }, // i128_pshufb_packed_argb32_2x_lo_to_unpacked_a8
150 #undef Z
151
152 // dummy to align the constants that follow.
153 { 0 },
154
155 // --------------------------------------------------------------------------
156 // [I256 Constants]
157 // --------------------------------------------------------------------------
158
159 { REPEAT_4X(0x007F007F007F007Fu) },
160 { REPEAT_4X(0x0080008000800080u) },
161 { REPEAT_4X(0x00FF00FF00FF00FFu) },
162 { REPEAT_4X(0x0100010001000100u) },
163 { REPEAT_4X(0x0101010101010101u) },
164 { REPEAT_4X(0x01FF01FF01FF01FFu) },
165 { REPEAT_4X(0x0200020002000200u) },
166 { REPEAT_4X(0x8000800080008000u) },
167 { REPEAT_4X(0xFFFFFFFFFFFFFFFFu) },
168
169 // --------------------------------------------------------------------------
170 // [XMM Gradients]
171 // --------------------------------------------------------------------------
172
173 // CONICAL GRADIENT:
174 //
175 // Polynomial to approximate `atan(x) * N / 2PI`:
176 // `x * (Q0 + x^2 * (Q1 + x^2 * (Q2 + x^2 * Q3)))`
177 //
178 // The following numbers were obtained by `lolremez` - minmax tool for approx.:
179 //
180 // Atan is an odd function, so we take advantage of it (see lolremez docs):
181 // 1. E=|atan(x) * N / 2PI - P(x) | <- subs. `P(x)` by `x*Q(x^2))`
182 // 2. E=|atan(x) * N / 2PI - x*Q(x^2) | <- subs. `x^2` by `y`
183 // 3. E=|atan(sqrt(y)) * N / 2PI - sqrt(y) * Q(y) | <- eliminate `y` from Q side - div by `y`
184 // 4. E=|atan(sqrt(y)) * N / (2PI * sqrt(y)) - Q(y)|
185 //
186 // LolRemez C++ code:
187 // real f(real const& x) {
188 // real y = sqrt(x);
189 // return atan(y) * real(N) / (real(2) * real::R_PI * y);
190 // }
191 // real g(real const& x) {
192 // return re(sqrt(x));
193 // }
194 // int main(int argc, char **argv) {
195 // RemezSolver<3, real> solver;
196 // solver.Run("1e-1000", 1, f, g, 40);
197 // return 0;
198 // }
199 {
200 #define REC(N, Q0, Q1, Q2, Q3) { \
201 { FLOAT_4X(N , N , N , N ) }, \
202 { FLOAT_4X(N/2, N/2, N/2, N/2) }, \
203 { FLOAT_4X(N/4, N/4, N/4, N/4) }, \
204 { FLOAT_4X(N/2, N , N/2, N ) }, \
205 { FLOAT_4X(Q0 , Q0 , Q0 , Q0 ) }, \
206 { FLOAT_4X(Q1 , Q1 , Q1 , Q1 ) }, \
207 { FLOAT_4X(Q2 , Q2 , Q2 , Q2 ) }, \
208 { FLOAT_4X(Q3 , Q3 , Q3 , Q3 ) } \
209 }
210 REC(256 , 4.071421038552e+1, -1.311160794048e+1, 6.017670215625 , -1.623253505085 ),
211 REC(512 , 8.142842077104e+1, -2.622321588095e+1, 1.203534043125e+1, -3.246507010170 ),
212 REC(1024, 1.628568415421e+2, -5.244643176191e+1, 2.407068086250e+1, -6.493014020340 ),
213 REC(2048, 3.257136830841e+2, -1.048928635238e+2, 4.814136172500e+1, -1.298602804068e+1),
214 REC(4096, 6.514273661683e+2, -2.097857270476e+2, 9.628272344999e+1, -2.597205608136e+1)
215 #undef REC
216 },
217
218 // --------------------------------------------------------------------------
219 // [Div24Bit]
220 // --------------------------------------------------------------------------
221
222 blLookupTable<uint32_t, 256, BLDiv24BitReciprocalGen>()
223};
224
225// NOTE: We must go through `blCommonTable_` as it's the only way to convince
226// MSVC to emit constexpr. If this step is missing it will emit initialization
227// code for this const data, which is exactly what we don't want. Also, we cannot
228// just add `constexpr` to the real `blCommonTable` as MSVC would complain about
229// different storage type.
230const BLCommonTable blCommonTable = blCommonTable_;
231