1 | // [Blend2D] |
2 | // 2D Vector Graphics Powered by a JIT Compiler. |
3 | // |
4 | // [License] |
5 | // Zlib - See LICENSE.md file in the package. |
6 | |
7 | #include "./blapi-build_p.h" |
8 | #include "./bltables_p.h" |
9 | |
10 | // ============================================================================ |
11 | // [BLBitCountOfByteTable] |
12 | // ============================================================================ |
13 | |
14 | struct BLBitCountOfByteTableGen { |
15 | static constexpr uint8_t value(size_t i) noexcept { |
16 | return uint8_t( ((i >> 0) & 1) + ((i >> 1) & 1) + ((i >> 2) & 1) + ((i >> 3) & 1) + |
17 | ((i >> 4) & 1) + ((i >> 5) & 1) + ((i >> 6) & 1) + ((i >> 7) & 1) ); |
18 | } |
19 | }; |
20 | |
21 | static constexpr const BLLookupTable<uint8_t, 256> blBitCountOfByteTable_ |
22 | = blLookupTable<uint8_t, 256, BLBitCountOfByteTableGen>(); |
23 | |
24 | const BLLookupTable<uint8_t, 256> blBitCountOfByteTable = blBitCountOfByteTable_; |
25 | |
26 | // ============================================================================ |
27 | // [BLModuloTable] |
28 | // ============================================================================ |
29 | |
30 | #define INV() {{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 }} |
31 | #define DEF(N) {{ 1%N, 2%N, 3%N, 4%N, 5%N, 6%N, 7%N, 8%N, 9%N, 10%N, 11%N, 12%N, 13%N, 14%N, 15%N, 16%N }} |
32 | |
33 | const BLModuloTable blModuloTable[18] = { |
34 | INV( ), DEF( 1), DEF( 2), DEF( 3), |
35 | DEF( 4), DEF( 5), DEF( 6), DEF( 7), |
36 | DEF( 8), DEF( 9), DEF(10), DEF(11), |
37 | DEF(12), DEF(13), DEF(14), DEF(15), |
38 | DEF(16), DEF(17) |
39 | }; |
40 | |
41 | #undef DEF |
42 | #undef INV |
43 | |
44 | // ============================================================================ |
45 | // [BLCommonTable] |
46 | // ============================================================================ |
47 | |
48 | struct BLDiv24BitReciprocalGen { |
49 | // Helper to calculate the reciprocal - C++11 doesn't allow variables... |
50 | static constexpr uint32_t calc(double d) noexcept { |
51 | return uint32_t(d) + uint32_t(d - double(uint32_t(d)) > 0.0); |
52 | } |
53 | |
54 | //! Calculates the reciprocal for `BLCommonTable::div25bit` table. |
55 | static constexpr uint32_t value(size_t i) noexcept { |
56 | return calc(i ? double(0xFF0000) / double(i) : 0.0); |
57 | } |
58 | }; |
59 | |
60 | #define REPEAT_1X(...) __VA_ARGS__ |
61 | #define REPEAT_2X(...) __VA_ARGS__, __VA_ARGS__ |
62 | #define REPEAT_4X(...) __VA_ARGS__, __VA_ARGS__, __VA_ARGS__, __VA_ARGS__ |
63 | #define FLOAT_4X(A, B, C, D) float(A), float(B), float(C), float(D) |
64 | |
65 | static constexpr const BLCommonTable blCommonTable_ = { |
66 | // -------------------------------------------------------------------------- |
67 | // [I128 Constants] |
68 | // -------------------------------------------------------------------------- |
69 | |
70 | { REPEAT_2X(0x0000000000000000u) }, |
71 | { REPEAT_2X(0x007F007F007F007Fu) }, |
72 | { REPEAT_2X(0x0080008000800080u) }, |
73 | { REPEAT_2X(0x00FF00FF00FF00FFu) }, |
74 | { REPEAT_2X(0x0100010001000100u) }, |
75 | { REPEAT_2X(0x0101010101010101u) }, |
76 | { REPEAT_2X(0x01FF01FF01FF01FFu) }, |
77 | { REPEAT_2X(0x0200020002000200u) }, |
78 | { REPEAT_2X(0x8000800080008000u) }, |
79 | { REPEAT_2X(0xFFFFFFFFFFFFFFFFu) }, |
80 | |
81 | { REPEAT_2X(0x000000FF000000FFu) }, |
82 | { REPEAT_2X(0x0000010000000100u) }, |
83 | { REPEAT_2X(0x000001FF000001FFu) }, |
84 | { REPEAT_2X(0x0000020000000200u) }, |
85 | { REPEAT_2X(0x0000FFFF0000FFFFu) }, |
86 | { REPEAT_2X(0x0002000000020000u) }, |
87 | { REPEAT_2X(0x00FFFFFF00FFFFFFu) }, |
88 | { REPEAT_2X(0xFF000000FF000000u) }, |
89 | { REPEAT_2X(0xFFFF0000FFFF0000u) }, |
90 | |
91 | { REPEAT_2X(0x000000FF00FF00FFu) }, |
92 | { REPEAT_2X(0x0000010001000100u) }, |
93 | { REPEAT_2X(0x0000080000000800u) }, |
94 | { REPEAT_2X(0x0000FFFFFFFFFFFFu) }, |
95 | { REPEAT_2X(0x00FF000000000000u) }, |
96 | { REPEAT_2X(0x0100000000000000u) }, |
97 | { REPEAT_2X(0x0101010100000000u) }, |
98 | { REPEAT_2X(0xFFFF000000000000u) }, |
99 | { REPEAT_2X(0xFFFFFFFF00000000u) }, |
100 | |
101 | { 0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, 0 }, // i128_FFFFFFFF_FFFFFFFF_FFFFFFFF_0. |
102 | |
103 | { 0, 1, 2, 3 }, // xmm_u32_0_1_2_3. |
104 | { REPEAT_4X(4) }, // xmm_u32_4. |
105 | |
106 | // -------------------------------------------------------------------------- |
107 | // [F128 Constants] |
108 | // -------------------------------------------------------------------------- |
109 | |
110 | { REPEAT_2X(0x8000000080000000) }, |
111 | { REPEAT_2X(0x7FFFFFFF7FFFFFFF) }, |
112 | { REPEAT_2X(0xFFFFFFFF7FFFFFFF) }, |
113 | { REPEAT_2X(0x7FFFFFFFFFFFFFFF) }, |
114 | { REPEAT_4X(8388608.0f) }, |
115 | { REPEAT_4X(12582912.0f) }, |
116 | |
117 | { REPEAT_4X(1.0f) }, // f128_1. |
118 | { REPEAT_4X(4.0f) }, // f128_4. |
119 | { REPEAT_4X(255.0f) }, // f128_255. |
120 | { REPEAT_4X(1e-3f) }, // f128_1e_m3. |
121 | { REPEAT_4X(1e-20f) }, // f128_1e_m20. |
122 | { REPEAT_4X(1.0f / 255.0f) }, // f128_1div255. |
123 | { 0.0f , 1.0f , 2.0f , 3.0f }, // f128_3_2_1_0. |
124 | |
125 | // -------------------------------------------------------------------------- |
126 | // [D128 Constants] |
127 | // -------------------------------------------------------------------------- |
128 | |
129 | { REPEAT_1X(0x8000000000000000u, 0x8000000000000000u) }, |
130 | { REPEAT_1X(0x7FFFFFFFFFFFFFFFu, 0x7FFFFFFFFFFFFFFFu) }, |
131 | { REPEAT_1X(0x7FFFFFFFFFFFFFFFu, 0xFFFFFFFFFFFFFFFFu) }, |
132 | { REPEAT_1X(0xFFFFFFFFFFFFFFFFu, 0x7FFFFFFFFFFFFFFFu) }, |
133 | { REPEAT_2X(4503599627370496.0) }, |
134 | { REPEAT_2X(6755399441055744.0) }, |
135 | |
136 | { REPEAT_2X(1.0) }, // d128_1. |
137 | { REPEAT_2X(1e-20) }, // d128_1e_m20. |
138 | { REPEAT_2X(4.0) }, // d128_4. |
139 | { REPEAT_2X(-1.0) }, // d128_m1. |
140 | |
141 | // -------------------------------------------------------------------------- |
142 | // [PSHUFB Constants] |
143 | // -------------------------------------------------------------------------- |
144 | |
145 | #define Z 0x80 // PSHUFB zeroing. |
146 | { 0 , 4 , 8 , 12, 0 , 4 , 8 , 12, 0 , 4 , 8 , 12, 0 , 4 , 8 , 12 }, // i128_pshufb_u32_to_u8_lo |
147 | { 0 , 1 , 4 , 5 , 8 , 9 , 12, 13, 0 , 1 , 4 , 5 , 8 , 9 , 12, 13 }, // i128_pshufb_u32_to_u16_lo |
148 | { 3 , Z , 3 , Z , 3 , Z , 3 , Z , 7 , Z , 7 , Z , 7 , Z , 7 , Z }, // i128_pshufb_packed_argb32_2x_lo_to_unpacked_a8 |
149 | { 11, Z , 11, Z , 11, Z , 11, Z , 15, Z , 15, Z , 15, Z , 15, Z }, // i128_pshufb_packed_argb32_2x_lo_to_unpacked_a8 |
150 | #undef Z |
151 | |
152 | // dummy to align the constants that follow. |
153 | { 0 }, |
154 | |
155 | // -------------------------------------------------------------------------- |
156 | // [I256 Constants] |
157 | // -------------------------------------------------------------------------- |
158 | |
159 | { REPEAT_4X(0x007F007F007F007Fu) }, |
160 | { REPEAT_4X(0x0080008000800080u) }, |
161 | { REPEAT_4X(0x00FF00FF00FF00FFu) }, |
162 | { REPEAT_4X(0x0100010001000100u) }, |
163 | { REPEAT_4X(0x0101010101010101u) }, |
164 | { REPEAT_4X(0x01FF01FF01FF01FFu) }, |
165 | { REPEAT_4X(0x0200020002000200u) }, |
166 | { REPEAT_4X(0x8000800080008000u) }, |
167 | { REPEAT_4X(0xFFFFFFFFFFFFFFFFu) }, |
168 | |
169 | // -------------------------------------------------------------------------- |
170 | // [XMM Gradients] |
171 | // -------------------------------------------------------------------------- |
172 | |
173 | // CONICAL GRADIENT: |
174 | // |
175 | // Polynomial to approximate `atan(x) * N / 2PI`: |
176 | // `x * (Q0 + x^2 * (Q1 + x^2 * (Q2 + x^2 * Q3)))` |
177 | // |
178 | // The following numbers were obtained by `lolremez` - minmax tool for approx.: |
179 | // |
180 | // Atan is an odd function, so we take advantage of it (see lolremez docs): |
181 | // 1. E=|atan(x) * N / 2PI - P(x) | <- subs. `P(x)` by `x*Q(x^2))` |
182 | // 2. E=|atan(x) * N / 2PI - x*Q(x^2) | <- subs. `x^2` by `y` |
183 | // 3. E=|atan(sqrt(y)) * N / 2PI - sqrt(y) * Q(y) | <- eliminate `y` from Q side - div by `y` |
184 | // 4. E=|atan(sqrt(y)) * N / (2PI * sqrt(y)) - Q(y)| |
185 | // |
186 | // LolRemez C++ code: |
187 | // real f(real const& x) { |
188 | // real y = sqrt(x); |
189 | // return atan(y) * real(N) / (real(2) * real::R_PI * y); |
190 | // } |
191 | // real g(real const& x) { |
192 | // return re(sqrt(x)); |
193 | // } |
194 | // int main(int argc, char **argv) { |
195 | // RemezSolver<3, real> solver; |
196 | // solver.Run("1e-1000", 1, f, g, 40); |
197 | // return 0; |
198 | // } |
199 | { |
200 | #define REC(N, Q0, Q1, Q2, Q3) { \ |
201 | { FLOAT_4X(N , N , N , N ) }, \ |
202 | { FLOAT_4X(N/2, N/2, N/2, N/2) }, \ |
203 | { FLOAT_4X(N/4, N/4, N/4, N/4) }, \ |
204 | { FLOAT_4X(N/2, N , N/2, N ) }, \ |
205 | { FLOAT_4X(Q0 , Q0 , Q0 , Q0 ) }, \ |
206 | { FLOAT_4X(Q1 , Q1 , Q1 , Q1 ) }, \ |
207 | { FLOAT_4X(Q2 , Q2 , Q2 , Q2 ) }, \ |
208 | { FLOAT_4X(Q3 , Q3 , Q3 , Q3 ) } \ |
209 | } |
210 | REC(256 , 4.071421038552e+1, -1.311160794048e+1, 6.017670215625 , -1.623253505085 ), |
211 | REC(512 , 8.142842077104e+1, -2.622321588095e+1, 1.203534043125e+1, -3.246507010170 ), |
212 | REC(1024, 1.628568415421e+2, -5.244643176191e+1, 2.407068086250e+1, -6.493014020340 ), |
213 | REC(2048, 3.257136830841e+2, -1.048928635238e+2, 4.814136172500e+1, -1.298602804068e+1), |
214 | REC(4096, 6.514273661683e+2, -2.097857270476e+2, 9.628272344999e+1, -2.597205608136e+1) |
215 | #undef REC |
216 | }, |
217 | |
218 | // -------------------------------------------------------------------------- |
219 | // [Div24Bit] |
220 | // -------------------------------------------------------------------------- |
221 | |
222 | blLookupTable<uint32_t, 256, BLDiv24BitReciprocalGen>() |
223 | }; |
224 | |
225 | // NOTE: We must go through `blCommonTable_` as it's the only way to convince |
226 | // MSVC to emit constexpr. If this step is missing it will emit initialization |
227 | // code for this const data, which is exactly what we don't want. Also, we cannot |
228 | // just add `constexpr` to the real `blCommonTable` as MSVC would complain about |
229 | // different storage type. |
230 | const BLCommonTable blCommonTable = blCommonTable_; |
231 | |