bltables.cpp source code [Blend2d/src/blend2d/bltables.cpp]

1	// [Blend2D]
2	// 2D Vector Graphics Powered by a JIT Compiler.
3	//
4	// [License]
5	// Zlib - See LICENSE.md file in the package.
6
7	#include "./blapi-build_p.h"
8	#include "./bltables_p.h"
9
10	// ============================================================================
11	// [BLBitCountOfByteTable]
12	// ============================================================================
13
14	struct BLBitCountOfByteTableGen {
15	static constexpr uint8_t value(size_t i) noexcept {
16	return uint8_t( ((i >> `0`) & `1`) + ((i >> `1`) & `1`) + ((i >> `2`) & `1`) + ((i >> `3`) & `1`) +
17	((i >> `4`) & `1`) + ((i >> `5`) & `1`) + ((i >> `6`) & `1`) + ((i >> `7`) & `1`) );
18	}
19	};
20
21	static constexpr const BLLookupTable<uint8_t, `256`> blBitCountOfByteTable_
22	= blLookupTable<uint8_t, `256`, BLBitCountOfByteTableGen>();
23
24	const BLLookupTable<uint8_t, `256`> blBitCountOfByteTable = blBitCountOfByteTable_;
25
26	// ============================================================================
27	// [BLModuloTable]
28	// ============================================================================
29
30	#define INV() {{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 }}
31	#define DEF(N) {{ 1%N, 2%N, 3%N, 4%N, 5%N, 6%N, 7%N, 8%N, 9%N, 10%N, 11%N, 12%N, 13%N, 14%N, 15%N, 16%N }}
32
33	const BLModuloTable blModuloTable[`18`] = {
34	INV( ), DEF( `1`), DEF( `2`), DEF( `3`),
35	DEF( `4`), DEF( `5`), DEF( `6`), DEF( `7`),
36	DEF( `8`), DEF( `9`), DEF(`10`), DEF(`11`),
37	DEF(`12`), DEF(`13`), DEF(`14`), DEF(`15`),
38	DEF(`16`), DEF(`17`)
39	};
40
41	#undef DEF
42	#undef INV
43
44	// ============================================================================
45	// [BLCommonTable]
46	// ============================================================================
47
48	struct BLDiv24BitReciprocalGen {
49	// Helper to calculate the reciprocal - C++11 doesn't allow variables...
50	static constexpr uint32_t calc(double d) noexcept {
51	return uint32_t(d) + uint32_t(d - double(uint32_t(d)) > `0.0`);
52	}
53
54	//! Calculates the reciprocal for `BLCommonTable::div25bit` table.
55	static constexpr uint32_t value(size_t i) noexcept {
56	return calc(i ? double(`0xFF0000`) / double(i) : `0.0`);
57	}
58	};
59
60	#define REPEAT_1X(...) __VA_ARGS__
61	#define REPEAT_2X(...) __VA_ARGS__, __VA_ARGS__
62	#define REPEAT_4X(...) __VA_ARGS__, __VA_ARGS__, __VA_ARGS__, __VA_ARGS__
63	#define FLOAT_4X(A, B, C, D) float(A), float(B), float(C), float(D)
64
65	static constexpr const BLCommonTable blCommonTable_ = {
66	// --------------------------------------------------------------------------
67	// [I128 Constants]
68	// --------------------------------------------------------------------------
69
70	{ REPEAT_2X(`0x0000000000000000u`) },
71	{ REPEAT_2X(`0x007F007F007F007Fu`) },
72	{ REPEAT_2X(`0x0080008000800080u`) },
73	{ REPEAT_2X(`0x00FF00FF00FF00FFu`) },
74	{ REPEAT_2X(`0x0100010001000100u`) },
75	{ REPEAT_2X(`0x0101010101010101u`) },
76	{ REPEAT_2X(`0x01FF01FF01FF01FFu`) },
77	{ REPEAT_2X(`0x0200020002000200u`) },
78	{ REPEAT_2X(`0x8000800080008000u`) },
79	{ REPEAT_2X(`0xFFFFFFFFFFFFFFFFu`) },
80
81	{ REPEAT_2X(`0x000000FF000000FFu`) },
82	{ REPEAT_2X(`0x0000010000000100u`) },
83	{ REPEAT_2X(`0x000001FF000001FFu`) },
84	{ REPEAT_2X(`0x0000020000000200u`) },
85	{ REPEAT_2X(`0x0000FFFF0000FFFFu`) },
86	{ REPEAT_2X(`0x0002000000020000u`) },
87	{ REPEAT_2X(`0x00FFFFFF00FFFFFFu`) },
88	{ REPEAT_2X(`0xFF000000FF000000u`) },
89	{ REPEAT_2X(`0xFFFF0000FFFF0000u`) },
90
91	{ REPEAT_2X(`0x000000FF00FF00FFu`) },
92	{ REPEAT_2X(`0x0000010001000100u`) },
93	{ REPEAT_2X(`0x0000080000000800u`) },
94	{ REPEAT_2X(`0x0000FFFFFFFFFFFFu`) },
95	{ REPEAT_2X(`0x00FF000000000000u`) },
96	{ REPEAT_2X(`0x0100000000000000u`) },
97	{ REPEAT_2X(`0x0101010100000000u`) },
98	{ REPEAT_2X(`0xFFFF000000000000u`) },
99	{ REPEAT_2X(`0xFFFFFFFF00000000u`) },
100
101	{ `0xFFFFFFFFu`, `0xFFFFFFFFu`, `0xFFFFFFFFu`, `0` }, // i128_FFFFFFFF_FFFFFFFF_FFFFFFFF_0.
102
103	{ `0`, `1`, `2`, `3` }, // xmm_u32_0_1_2_3.
104	{ REPEAT_4X(`4`) }, // xmm_u32_4.
105
106	// --------------------------------------------------------------------------
107	// [F128 Constants]
108	// --------------------------------------------------------------------------
109
110	{ REPEAT_2X(`0x8000000080000000`) },
111	{ REPEAT_2X(`0x7FFFFFFF7FFFFFFF`) },
112	{ REPEAT_2X(`0xFFFFFFFF7FFFFFFF`) },
113	{ REPEAT_2X(`0x7FFFFFFFFFFFFFFF`) },
114	{ REPEAT_4X(`8388608.0f`) },
115	{ REPEAT_4X(`12582912.0f`) },
116
117	{ REPEAT_4X(`1.0f`) }, // f128_1.
118	{ REPEAT_4X(`4.0f`) }, // f128_4.
119	{ REPEAT_4X(`255.0f`) }, // f128_255.
120	{ REPEAT_4X(`1e-3f`) }, // f128_1e_m3.
121	{ REPEAT_4X(`1e-20f`) }, // f128_1e_m20.
122	{ REPEAT_4X(`1.0f` / `255.0f`) }, // f128_1div255.
123	{ `0.0f` , `1.0f` , `2.0f` , `3.0f` }, // f128_3_2_1_0.
124
125	// --------------------------------------------------------------------------
126	// [D128 Constants]
127	// --------------------------------------------------------------------------
128
129	{ REPEAT_1X(`0x8000000000000000u`, `0x8000000000000000u`) },
130	{ REPEAT_1X(`0x7FFFFFFFFFFFFFFFu`, `0x7FFFFFFFFFFFFFFFu`) },
131	{ REPEAT_1X(`0x7FFFFFFFFFFFFFFFu`, `0xFFFFFFFFFFFFFFFFu`) },
132	{ REPEAT_1X(`0xFFFFFFFFFFFFFFFFu`, `0x7FFFFFFFFFFFFFFFu`) },
133	{ REPEAT_2X(`4503599627370496.0`) },
134	{ REPEAT_2X(`6755399441055744.0`) },
135
136	{ REPEAT_2X(`1.0`) }, // d128_1.
137	{ REPEAT_2X(`1e-20`) }, // d128_1e_m20.
138	{ REPEAT_2X(`4.0`) }, // d128_4.
139	{ REPEAT_2X(-`1.0`) }, // d128_m1.
140
141	// --------------------------------------------------------------------------
142	// [PSHUFB Constants]
143	// --------------------------------------------------------------------------
144
145	#define Z 0x80 // PSHUFB zeroing.
146	{ `0` , `4` , `8` , `12`, `0` , `4` , `8` , `12`, `0` , `4` , `8` , `12`, `0` , `4` , `8` , `12` }, // i128_pshufb_u32_to_u8_lo
147	{ `0` , `1` , `4` , `5` , `8` , `9` , `12`, `13`, `0` , `1` , `4` , `5` , `8` , `9` , `12`, `13` }, // i128_pshufb_u32_to_u16_lo
148	{ `3` , Z , `3` , Z , `3` , Z , `3` , Z , `7` , Z , `7` , Z , `7` , Z , `7` , Z }, // i128_pshufb_packed_argb32_2x_lo_to_unpacked_a8
149	{ `11`, Z , `11`, Z , `11`, Z , `11`, Z , `15`, Z , `15`, Z , `15`, Z , `15`, Z }, // i128_pshufb_packed_argb32_2x_lo_to_unpacked_a8
150	#undef Z
151
152	// dummy to align the constants that follow.
153	{ `0` },
154
155	// --------------------------------------------------------------------------
156	// [I256 Constants]
157	// --------------------------------------------------------------------------
158
159	{ REPEAT_4X(`0x007F007F007F007Fu`) },
160	{ REPEAT_4X(`0x0080008000800080u`) },
161	{ REPEAT_4X(`0x00FF00FF00FF00FFu`) },
162	{ REPEAT_4X(`0x0100010001000100u`) },
163	{ REPEAT_4X(`0x0101010101010101u`) },
164	{ REPEAT_4X(`0x01FF01FF01FF01FFu`) },
165	{ REPEAT_4X(`0x0200020002000200u`) },
166	{ REPEAT_4X(`0x8000800080008000u`) },
167	{ REPEAT_4X(`0xFFFFFFFFFFFFFFFFu`) },
168
169	// --------------------------------------------------------------------------
170	// [XMM Gradients]
171	// --------------------------------------------------------------------------
172
173	// CONICAL GRADIENT:
174	//
175	// Polynomial to approximate `atan(x) N / 2PI`:*
176	// `x (Q0 + x^2 * (Q1 + x^2 * (Q2 + x^2 * Q3)))`*
177	//
178	// The following numbers were obtained by `lolremez` - minmax tool for approx.:
179	//
180	// Atan is an odd function, so we take advantage of it (see lolremez docs):
181	// 1. E=\|atan(x) N / 2PI - P(x) \| <- subs. `P(x)` by `xQ(x^2))`
182	// 2. E=\|atan(x) N / 2PI - xQ(x^2) \| <- subs. `x^2` by `y`
183	// 3. E=\|atan(sqrt(y)) N / 2PI - sqrt(y) * Q(y) \| <- eliminate `y` from Q side - div by `y`*
184	// 4. E=\|atan(sqrt(y)) N / (2PI * sqrt(y)) - Q(y)\|*
185	//
186	// LolRemez C++ code:
187	// real f(real const& x) {
188	// real y = sqrt(x);
189	// return atan(y) real(N) / (real(2) * real::R_PI * y);*
190	// }
191	// real g(real const& x) {
192	// return re(sqrt(x));
193	// }
194	// int main(int argc, char argv) {
195	// RemezSolver<3, real> solver;
196	// solver.Run("1e-1000", 1, f, g, 40);
197	// return 0;
198	// }
199	{
200	#define REC(N, Q0, Q1, Q2, Q3) { \
201	{ FLOAT_4X(N , N , N , N ) }, \
202	{ FLOAT_4X(N/2, N/2, N/2, N/2) }, \
203	{ FLOAT_4X(N/4, N/4, N/4, N/4) }, \
204	{ FLOAT_4X(N/2, N , N/2, N ) }, \
205	{ FLOAT_4X(Q0 , Q0 , Q0 , Q0 ) }, \
206	{ FLOAT_4X(Q1 , Q1 , Q1 , Q1 ) }, \
207	{ FLOAT_4X(Q2 , Q2 , Q2 , Q2 ) }, \
208	{ FLOAT_4X(Q3 , Q3 , Q3 , Q3 ) } \
209	}
210	REC(`256` , `4.071421038552e+1`, -`1.311160794048e+1`, `6.017670215625` , -`1.623253505085` ),
211	REC(`512` , `8.142842077104e+1`, -`2.622321588095e+1`, `1.203534043125e+1`, -`3.246507010170` ),
212	REC(`1024`, `1.628568415421e+2`, -`5.244643176191e+1`, `2.407068086250e+1`, -`6.493014020340` ),
213	REC(`2048`, `3.257136830841e+2`, -`1.048928635238e+2`, `4.814136172500e+1`, -`1.298602804068e+1`),
214	REC(`4096`, `6.514273661683e+2`, -`2.097857270476e+2`, `9.628272344999e+1`, -`2.597205608136e+1`)
215	#undef REC
216	},
217
218	// --------------------------------------------------------------------------
219	// [Div24Bit]
220	// --------------------------------------------------------------------------
221
222	blLookupTable<uint32_t, `256`, BLDiv24BitReciprocalGen>()
223	};
224
225	// NOTE: We must go through `blCommonTable_` as it's the only way to convince
226	// MSVC to emit constexpr. If this step is missing it will emit initialization
227	// code for this const data, which is exactly what we don't want. Also, we cannot
228	// just add `constexpr` to the real `blCommonTable` as MSVC would complain about
229	// different storage type.
230	const BLCommonTable blCommonTable = blCommonTable_;
231

Browse the source code of Blend2d/src/blend2d/bltables.cpp