xsimd_generic_math.hpp source code [Velox/build/_deps/xsimd-src/include/xsimd/arch/generic/xsimd_generic_math.hpp]

1	/***************************************************************************
2	* Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3	* Martin Renou *
4	* Copyright (c) QuantStack *
5	* Copyright (c) Serge Guelton *
6	* *
7	* Distributed under the terms of the BSD 3-Clause License. *
8	* *
9	* The full license is in the file LICENSE, distributed with this software. *
10	****************************************************************************/
11
12	#ifndef XSIMD_GENERIC_MATH_HPP
13	#define XSIMD_GENERIC_MATH_HPP
14
15	#include "../xsimd_scalar.hpp"
16	#include "./xsimd_generic_details.hpp"
17	#include "./xsimd_generic_trigo.hpp"
18
19	#include <type_traits>
20
21	namespace xsimd
22	{
23
24	namespace kernel
25	{
26
27	using namespace types;
28	// abs
29	template <class A, class T, class /=typename std::enable_if<std::is_integral<T>::value, void>::type/>
30	inline batch<T, A> abs(batch<T, A> const& self, requires_arch<generic>) noexcept
31	{
32	if (std::is_unsigned<T>::value)
33	return self;
34	else
35	{
36	auto sign = bitofsign(self);
37	auto inv = self ^ sign;
38	return inv - sign;
39	}
40	}
41
42	template <class A, class T>
43	inline batch<T, A> abs(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
44	{
45	return hypot(z.real(), z.imag());
46	}
47
48	// batch_cast
49	template <class A, class T>
50	inline batch<T, A> batch_cast(batch<T, A> const& self, batch<T, A> const&, requires_arch<generic>) noexcept
51	{
52	return self;
53	}
54
55	namespace detail
56	{
57	template <class A, class T_out, class T_in>
58	inline batch<T_out, A> batch_cast(batch<T_in, A> const& self, batch<T_out, A> const& out, requires_arch<generic>, with_fast_conversion) noexcept
59	{
60	return fast_cast(self, out, A {});
61	}
62	template <class A, class T_out, class T_in>
63	inline batch<T_out, A> batch_cast(batch<T_in, A> const& self, batch<T_out, A> const&, requires_arch<generic>, with_slow_conversion) noexcept
64	{
65	static_assert(!std::is_same<T_in, T_out>::value, "there should be no conversion for this type combination");
66	using batch_type_in = batch<T_in, A>;
67	using batch_type_out = batch<T_out, A>;
68	static_assert(batch_type_in::size == batch_type_out::size, "compatible sizes");
69	alignas(A::alignment()) T_in buffer_in[batch_type_in::size];
70	alignas(A::alignment()) T_out buffer_out[batch_type_out::size];
71	self.store_aligned(&buffer_in[`0`]);
72	std::copy(std::begin(buffer_in), std::end(buffer_in), std::begin(buffer_out));
73	return batch_type_out::load_aligned(buffer_out);
74	}
75
76	}
77
78	template <class A, class T_out, class T_in>
79	inline batch<T_out, A> batch_cast(batch<T_in, A> const& self, batch<T_out, A> const& out, requires_arch<generic>) noexcept
80	{
81	return detail::batch_cast(self, out, A {}, detail::conversion_type<A, T_in, T_out> {});
82	}
83
84	// bitofsign
85	template <class A, class T>
86	inline batch<T, A> bitofsign(batch<T, A> const& self, requires_arch<generic>) noexcept
87	{
88	static_assert(std::is_integral<T>::value, "int type implementation");
89	if (std::is_unsigned<T>::value)
90	return batch<T, A>(`0`);
91	else
92	return self >> (T)(`8` * sizeof(T) - `1`);
93	}
94
95	template <class A>
96	inline batch<float, A> bitofsign(batch<float, A> const& self, requires_arch<generic>) noexcept
97	{
98	return self & constants::minuszero<batch<float, A>>();
99	}
100	template <class A>
101	inline batch<double, A> bitofsign(batch<double, A> const& self, requires_arch<generic>) noexcept
102	{
103	return self & constants::minuszero<batch<double, A>>();
104	}
105
106	// bitwise_cast
107	template <class A, class T>
108	inline batch<T, A> bitwise_cast(batch<T, A> const& self, batch<T, A> const&, requires_arch<generic>) noexcept
109	{
110	return self;
111	}
112
113	// cbrt
114	/ origin: boost/simd/arch/common/simd/function/cbrt.hpp /
115	/*
116	* ====================================================
117	* copyright 2016 NumScale SAS
118	*
119	* Distributed under the Boost Software License, Version 1.0.
120	* (See copy at http://boost.org/LICENSE_1_0.txt)
121	* ====================================================
122	*/
123	template <class A>
124	inline batch<float, A> cbrt(batch<float, A> const& self, requires_arch<generic>) noexcept
125	{
126	using batch_type = batch<float, A>;
127	batch_type z = abs(self);
128	#ifndef XSIMD_NO_DENORMALS
129	auto denormal = z < constants::smallestposval<batch_type>();
130	z = select(denormal, z * constants::twotonmb<batch_type>(), z);
131	batch_type f = select(denormal, constants::twotonmbo3<batch_type>(), batch_type(`1.`));
132	#endif
133	const batch_type CBRT2(bit_cast<float>(val: `0x3fa14518`));
134	const batch_type CBRT4(bit_cast<float>(val: `0x3fcb2ff5`));
135	const batch_type CBRT2I(bit_cast<float>(val: `0x3f4b2ff5`));
136	const batch_type CBRT4I(bit_cast<float>(val: `0x3f214518`));
137	using i_type = as_integer_t<batch_type>;
138	i_type e;
139	batch_type x = frexp(z, e);
140	x = detail::horner<batch_type,
141	`0x3ece0609`,
142	`0x3f91eb77`,
143	`0xbf745265`,
144	`0x3f0bf0fe`,
145	`0xbe09e49a`>(x);
146	auto flag = e >= i_type(`0`);
147	i_type e1 = abs(e);
148	i_type rem = e1;
149	e1 /= i_type(`3`);
150	rem -= e1 * i_type(`3`);
151	e = e1 * sign(e);
152	const batch_type cbrt2 = select(batch_bool_cast<float>(flag), CBRT2, CBRT2I);
153	const batch_type cbrt4 = select(batch_bool_cast<float>(flag), CBRT4, CBRT4I);
154	batch_type fact = select(batch_bool_cast<float>(rem == i_type(`1`)), cbrt2, batch_type(`1.`));
155	fact = select(batch_bool_cast<float>(rem == i_type(`2`)), cbrt4, fact);
156	x = ldexp(x * fact, e);
157	x -= (x - z / (x * x)) * batch_type(`1.f` / `3.f`);
158	#ifndef XSIMD_NO_DENORMALS
159	x = (x \| bitofsign(self)) * f;
160	#else
161	x = x \| bitofsign(self);
162	#endif
163	#ifndef XSIMD_NO_INFINITIES
164	return select(self == batch_type(`0.`) \|\| isinf(self), self, x);
165	#else
166	return select(self == batch_type(`0.`), self, x);
167	#endif
168	}
169
170	template <class A>
171	inline batch<double, A> cbrt(batch<double, A> const& self, requires_arch<generic>) noexcept
172	{
173	using batch_type = batch<double, A>;
174	batch_type z = abs(self);
175	#ifndef XSIMD_NO_DENORMALS
176	auto denormal = z < constants::smallestposval<batch_type>();
177	z = select(denormal, z * constants::twotonmb<batch_type>(), z);
178	batch_type f = select(denormal, constants::twotonmbo3<batch_type>(), batch_type(`1.`));
179	#endif
180	const batch_type CBRT2(bit_cast<double>(val: int64_t(`0x3ff428a2f98d728b`)));
181	const batch_type CBRT4(bit_cast<double>(val: int64_t(`0x3ff965fea53d6e3d`)));
182	const batch_type CBRT2I(bit_cast<double>(val: int64_t(`0x3fe965fea53d6e3d`)));
183	const batch_type CBRT4I(bit_cast<double>(val: int64_t(`0x3fe428a2f98d728b`)));
184	using i_type = as_integer_t<batch_type>;
185	i_type e;
186	batch_type x = frexp(z, e);
187	x = detail::horner<batch_type,
188	`0x3fd9c0c12122a4feull`,
189	`0x3ff23d6ee505873aull`,
190	`0xbfee8a4ca3ba37b8ull`,
191	`0x3fe17e1fc7e59d58ull`,
192	`0xbfc13c93386fdff6ull`>(x);
193	auto flag = e >= typename i_type::value_type(`0`);
194	i_type e1 = abs(e);
195	i_type rem = e1;
196	e1 /= i_type(`3`);
197	rem -= e1 * i_type(`3`);
198	e = e1 * sign(e);
199	const batch_type cbrt2 = select(batch_bool_cast<double>(flag), CBRT2, CBRT2I);
200	const batch_type cbrt4 = select(batch_bool_cast<double>(flag), CBRT4, CBRT4I);
201	batch_type fact = select(batch_bool_cast<double>(rem == i_type(`1`)), cbrt2, batch_type(`1.`));
202	fact = select(batch_bool_cast<double>(rem == i_type(`2`)), cbrt4, fact);
203	x = ldexp(x * fact, e);
204	x -= (x - z / (x * x)) * batch_type(`1.` / `3.`);
205	x -= (x - z / (x * x)) * batch_type(`1.` / `3.`);
206	#ifndef XSIMD_NO_DENORMALS
207	x = (x \| bitofsign(self)) * f;
208	#else
209	x = x \| bitofsign(self);
210	#endif
211	#ifndef XSIMD_NO_INFINITIES
212	return select(self == batch_type(`0.`) \|\| isinf(self), self, x);
213	#else
214	return select(self == batch_type(`0.`), self, x);
215	#endif
216	}
217
218	// clip
219	template <class A, class T>
220	inline batch<T, A> clip(batch<T, A> const& self, batch<T, A> const& lo, batch<T, A> const& hi, requires_arch<generic>) noexcept
221	{
222	return min(hi, max(self, lo));
223	}
224
225	// copysign
226	template <class A, class T, class _ = typename std::enable_if<std::is_floating_point<T>::value, void>::type>
227	inline batch<T, A> copysign(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
228	{
229	return abs(self) \| bitofsign(other);
230	}
231
232	// erf
233
234	namespace detail
235	{
236	/ origin: boost/simd/arch/common/detail/generic/erf_kernel.hpp /
237	/*
238	* ====================================================
239	* copyright 2016 NumScale SAS
240	*
241	* Distributed under the Boost Software License, Version 1.0.
242	* (See copy at http://boost.org/LICENSE_1_0.txt)
243	* ====================================================
244	*/
245	template <class B>
246	struct erf_kernel;
247
248	template <class A>
249	struct erf_kernel<batch<float, A>>
250	{
251	using batch_type = batch<float, A>;
252	// computes erf(a0)/a0
253	// x is sqr(a0) and 0 <= abs(a0) <= 2/3
254	static inline batch_type erf1(const batch_type& x) noexcept
255	{
256	return detail::horner<batch_type,
257	`0x3f906eba`, // 1.128379154774254e+00
258	`0xbec0937e`, // -3.761252839094832e-01
259	`0x3de70f22`, // 1.128218315189123e-01
260	`0xbcdb61f4`, // -2.678010670585737e-02
261	`0x3ba4468d`, // 5.013293006147870e-03
262	`0xba1fc83b` // -6.095205117313012e-04
263	>(x);
264	}
265
266	// computes erfc(x)exp(sqr(x))*
267	// x >= 2/3
268	static inline batch_type erfc2(const batch_type& x) noexcept
269	{
270	return detail::horner<batch_type,
271	`0x3f0a0e8b`, // 5.392844046572836e-01
272	`0xbf918a62`, // -1.137035586823118e+00
273	`0x3e243828`, // 1.603704761054187e-01
274	`0x3ec4ca6e`, // 3.843569094305250e-01
275	`0x3e1175c7`, // 1.420508523645926e-01
276	`0x3e2006f0`, // 1.562764709849380e-01
277	`0xbfaea865`, // -1.364514006347145e+00
278	`0x4050b063`, // 3.260765682222576e+00
279	`0xc0cd1a85`, // -6.409487379234005e+00
280	`0x40d67e3b`, // 6.702908785399893e+00
281	`0xc0283611` // -2.628299919293280e+00
282	>(x);
283	}
284
285	static inline batch_type erfc3(const batch_type& x) noexcept
286	{
287	return (batch_type(`1.`) - x) * detail::horner<batch_type,
288	`0x3f7ffffe`, // 9.9999988e-01
289	`0xbe036d7e`, // -1.2834737e-01
290	`0xbfa11698`, // -1.2585020e+00
291	`0xbffc9284`, // -1.9732213e+00
292	`0xc016c985`, // -2.3560498e+00
293	`0x3f2cff3b`, // 6.7576951e-01
294	`0xc010d956`, // -2.2632651e+00
295	`0x401b5680`, // 2.4271545e+00
296	`0x41aa8e55` // 2.1319498e+01
297	>(x);
298	}
299	};
300
301	template <class A>
302	struct erf_kernel<batch<double, A>>
303	{
304	using batch_type = batch<double, A>;
305	// computes erf(a0)/a0
306	// x is sqr(a0) and 0 <= abs(a0) <= 0.65
307	static inline batch_type erf1(const batch_type& x) noexcept
308	{
309	return detail::horner<batch_type,
310	`0x3ff20dd750429b61ull`, // 1.12837916709551
311	`0x3fc16500f106c0a5ull`, // 0.135894887627278
312	`0x3fa4a59a4f02579cull`, // 4.03259488531795E-02
313	`0x3f53b7664358865aull`, // 1.20339380863079E-03
314	`0x3f110512d5b20332ull` // 6.49254556481904E-05
315	>(x)
316	/ detail::horner<batch_type,
317	`0x3ff0000000000000ull`, // 1
318	`0x3fdd0a84eb1ca867ull`, // 0.453767041780003
319	`0x3fb64536ca92ea2full`, // 8.69936222615386E-02
320	`0x3f8166f75999dbd1ull`, // 8.49717371168693E-03
321	`0x3f37ea4332348252ull` // 3.64915280629351E-04
322	>(x);
323	}
324
325	// computes erfc(x)exp(xx)
326	// 0.65 <= abs(x) <= 2.2
327	static inline batch_type erfc2(const batch_type& x) noexcept
328	{
329	return detail::horner<batch_type,
330	`0x3feffffffbbb552bull`, // 0.999999992049799
331	`0x3ff54dfe9b258a60ull`, // 1.33154163936765
332	`0x3fec1986509e687bull`, // 0.878115804155882
333	`0x3fd53dd7a67c7e9full`, // 0.331899559578213
334	`0x3fb2488a6b5cb5e5ull`, // 7.14193832506776E-02
335	`0x3f7cf4cfe0aacbb4ull`, // 7.06940843763253E-03
336	`0x0ull` // 0
337	>(x)
338	/ detail::horner<batch_type,
339	`0x3ff0000000000000ull`, // 1
340	`0x4003adeae79b9708ull`, // 2.45992070144246
341	`0x40053b1052dca8bdull`, // 2.65383972869776
342	`0x3ff9e677c2777c3cull`, // 1.61876655543871
343	`0x3fe307622fcff772ull`, // 0.594651311286482
344	`0x3fc033c113a7deeeull`, // 0.126579413030178
345	`0x3f89a996639b0d00ull` // 1.25304936549413E-02
346	>(x);
347	}
348
349	// computes erfc(x)exp(xx)
350	// 2.2 <= abs(x) <= 6
351	static inline batch_type erfc3(const batch_type& x) noexcept
352	{
353	return detail::horner<batch_type,
354	`0x3fefff5a9e697ae2ull`, // 0.99992114009714
355	`0x3ff9fa202deb88e5ull`, // 1.62356584489367
356	`0x3ff44744306832aeull`, // 1.26739901455873
357	`0x3fe29be1cff90d94ull`, // 0.581528574177741
358	`0x3fc42210f88b9d43ull`, // 0.157289620742839
359	`0x3f971d0907ea7a92ull`, // 2.25716982919218E-02
360	`0x0ll` // 0
361	>(x)
362	/ detail::horner<batch_type,
363	`0x3ff0000000000000ull`, // 1
364	`0x400602f24bf3fdb6ull`, // 2.75143870676376
365	`0x400afd487397568full`, // 3.37367334657285
366	`0x400315ffdfd5ce91ull`, // 2.38574194785344
367	`0x3ff0cfd4cb6cde9full`, // 1.05074004614827
368	`0x3fd1d7ab774bb837ull`, // 0.278788439273629
369	`0x3fa47bd61bbb3843ull` // 4.00072964526861E-02
370	>(x);
371	}
372
373	// computes erfc(rx)exp(rxrx)
374	// x >= 6 rx = 1/x
375	static inline batch_type erfc4(const batch_type& x) noexcept
376	{
377	return detail::horner<batch_type,
378	`0xbc7e4ad1ec7d0000ll`, // -2.627435221016534e-17
379	`0x3fe20dd750429a16ll`, // 5.641895835477182e-01
380	`0x3db60000e984b501ll`, // 2.000889609806154e-11
381	`0xbfd20dd753ae5dfdll`, // -2.820947949598745e-01
382	`0x3e907e71e046a820ll`, // 2.457786367990903e-07
383	`0x3fdb1494cac06d39ll`, // 4.231311779019112e-01
384	`0x3f34a451701654f1ll`, // 3.149699042180451e-04
385	`0xbff105e6b8ef1a63ll`, // -1.063940737150596e+00
386	`0x3fb505a857e9ccc8ll`, // 8.211757799454056e-02
387	`0x40074fbabc514212ll`, // 2.913930388669777e+00
388	`0x4015ac7631f7ac4fll`, // 5.418419628850713e+00
389	`0xc0457e03041e9d8bll`, // -4.298446704382794e+01
390	`0x4055803d26c4ec4fll`, // 8.600373238783617e+01
391	`0xc0505fce04ec4ec5ll` // -6.549694941594051e+01
392	>(x);
393	}
394	};
395	}
396	/ origin: boost/simd/arch/common/simd/function/erf.hpp /
397	/*
398	* ====================================================
399	* copyright 2016 NumScale SAS
400	*
401	* Distributed under the Boost Software License, Version 1.0.
402	* (See copy at http://boost.org/LICENSE_1_0.txt)
403	* ====================================================
404	*/
405
406	template <class A>
407	inline batch<float, A> erf(batch<float, A> const& self, requires_arch<generic>) noexcept
408	{
409	using batch_type = batch<float, A>;
410	batch_type x = abs(self);
411	batch_type r1(`0.`);
412	auto test1 = x < batch_type(`2.f` / `3.f`);
413	if (any(test1))
414	{
415	r1 = self * detail::erf_kernel<batch_type>::erf1(x * x);
416	if (all(test1))
417	return r1;
418	}
419	batch_type z = x / (batch_type(`1.`) + x);
420	z -= batch_type(`0.4f`);
421	batch_type r2 = batch_type(`1.`) - exp(-x * x) * detail::erf_kernel<batch_type>::erfc2(z);
422	r2 = select(self < batch_type(`0.`), -r2, r2);
423	r1 = select(test1, r1, r2);
424	#ifndef XSIMD_NO_INFINITIES
425	r1 = select(xsimd::isinf(self), sign(self), r1);
426	#endif
427	return r1;
428	}
429
430	template <class A>
431	inline batch<double, A> erf(batch<double, A> const& self, requires_arch<generic>) noexcept
432	{
433	using batch_type = batch<double, A>;
434	batch_type x = abs(self);
435	batch_type xx = x * x;
436	batch_type lim1(`0.65`);
437	batch_type lim2(`2.2`);
438	auto test1 = x < lim1;
439	batch_type r1(`0.`);
440	if (any(test1))
441	{
442	r1 = self * detail::erf_kernel<batch_type>::erf1(xx);
443	if (all(test1))
444	return r1;
445	}
446	auto test2 = x < lim2;
447	auto test3 = test2 && !test1;
448	batch_type ex = exp(-xx);
449	if (any(test3))
450	{
451	batch_type z = batch_type(`1.`) - ex * detail::erf_kernel<batch_type>::erfc2(x);
452	batch_type r2 = select(self < batch_type(`0.`), -z, z);
453	r1 = select(test1, r1, r2);
454	if (all(test1 \|\| test3))
455	return r1;
456	}
457	batch_type z = batch_type(`1.`) - ex * detail::erf_kernel<batch_type>::erfc3(x);
458	z = select(self < batch_type(`0.`), -z, z);
459	#ifndef XSIMD_NO_INFINITIES
460	z = select(xsimd::isinf(self), sign(self), z);
461	#endif
462	return select(test2, r1, z);
463	}
464
465	// erfc
466	template <class A>
467	inline batch<float, A> erfc(batch<float, A> const& self, requires_arch<generic>) noexcept
468	{
469	using batch_type = batch<float, A>;
470	batch_type x = abs(self);
471	auto test0 = self < batch_type(`0.`);
472	batch_type r1(`0.`);
473	batch_type z = x / (batch_type(`1.`) + x);
474	if (any(`3.f` * x < `2.f`))
475	{
476	r1 = detail::erf_kernel<batch_type>::erfc3(z);
477	}
478	else
479	{
480	z -= batch_type(`0.4f`);
481	r1 = exp(-x * x) * detail::erf_kernel<batch_type>::erfc2(z);
482	}
483	#ifndef XSIMD_NO_INFINITIES
484	r1 = select(x == constants::infinity<batch_type>(), batch_type(`0.`), r1);
485	#endif
486	return select(test0, batch_type(`2.`) - r1, r1);
487	}
488
489	template <class A>
490	inline batch<double, A> erfc(batch<double, A> const& self, requires_arch<generic>) noexcept
491	{
492	using batch_type = batch<double, A>;
493	batch_type x = abs(self);
494	batch_type xx = x * x;
495	batch_type lim1(`0.65`);
496	batch_type lim2(`2.2`);
497	auto test0 = self < batch_type(`0.`);
498	auto test1 = x < lim1;
499	batch_type r1(`0.`);
500	if (any(test1))
501	{
502	r1 = batch_type(`1.`) - x * detail::erf_kernel<batch_type>::erf1(xx);
503	if (all(test1))
504	return select(test0, batch_type(`2.`) - r1, r1);
505	}
506	auto test2 = x < lim2;
507	auto test3 = test2 && !test1;
508	batch_type ex = exp(-xx);
509	if (any(test3))
510	{
511	batch_type z = ex * detail::erf_kernel<batch_type>::erfc2(x);
512	r1 = select(test1, r1, z);
513	if (all(test1 \|\| test3))
514	return select(test0, batch_type(`2.`) - r1, r1);
515	}
516	batch_type z = ex * detail::erf_kernel<batch_type>::erfc3(x);
517	r1 = select(test2, r1, z);
518	#ifndef XSIMD_NO_INFINITIES
519	r1 = select(x == constants::infinity<batch_type>(), batch_type(`0.`), r1);
520	#endif
521	return select(test0, batch_type(`2.`) - r1, r1);
522	}
523
524	// estrin
525	namespace detail
526	{
527
528	template <class B>
529	struct estrin
530	{
531	B x;
532
533	template <typename... Ts>
534	inline B operator()(const Ts&... coefs) noexcept
535	{
536	return eval(coefs...);
537	}
538
539	private:
540	inline B eval(const B& c0) noexcept
541	{
542	return c0;
543	}
544
545	inline B eval(const B& c0, const B& c1) noexcept
546	{
547	return fma(x, c1, c0);
548	}
549
550	template <size_t... Is, class Tuple>
551	inline B eval(::xsimd::detail::index_sequence<Is...>, const Tuple& tuple)
552	{
553	return estrin { x * x }(std::get<Is>(tuple)...);
554	}
555
556	template <class... Args>
557	inline B eval(const std::tuple<Args...>& tuple) noexcept
558	{
559	return eval(::xsimd::detail::make_index_sequence<sizeof...(Args)>(), tuple);
560	}
561
562	template <class... Args>
563	inline B eval(const std::tuple<Args...>& tuple, const B& c0) noexcept
564	{
565	return eval(std::tuple_cat(tuple, std::make_tuple(eval(c0))));
566	}
567
568	template <class... Args>
569	inline B eval(const std::tuple<Args...>& tuple, const B& c0, const B& c1) noexcept
570	{
571	return eval(std::tuple_cat(tuple, std::make_tuple(eval(c0, c1))));
572	}
573
574	template <class... Args, class... Ts>
575	inline B eval(const std::tuple<Args...>& tuple, const B& c0, const B& c1, const Ts&... coefs) noexcept
576	{
577	return eval(std::tuple_cat(tuple, std::make_tuple(eval(c0, c1))), coefs...);
578	}
579
580	template <class... Ts>
581	inline B eval(const B& c0, const B& c1, const Ts&... coefs) noexcept
582	{
583	return eval(std::make_tuple(eval(c0, c1)), coefs...);
584	}
585	};
586	}
587
588	template <class T, class A, uint64_t... Coefs>
589	inline batch<T, A> estrin(const batch<T, A>& self) noexcept
590	{
591	using batch_type = batch<T, A>;
592	return detail::estrin<batch_type> { self }(detail::coef<batch_type, Coefs>()...);
593	}
594
595	// exp
596	/ origin: boost/simd/arch/common/detail/simd/expo_base.hpp /
597	/*
598	* ====================================================
599	* copyright 2016 NumScale SAS
600	*
601	* Distributed under the Boost Software License, Version 1.0.
602	* (See copy at http://boost.org/LICENSE_1_0.txt)
603	* ====================================================
604	*/
605	namespace detail
606	{
607	enum exp_reduction_tag
608	{
609	exp_tag,
610	exp2_tag,
611	exp10_tag
612	};
613
614	template <class B, exp_reduction_tag Tag>
615	struct exp_reduction_base;
616
617	template <class B>
618	struct exp_reduction_base<B, exp_tag>
619	{
620	static constexpr B maxlog() noexcept
621	{
622	return constants::maxlog<B>();
623	}
624
625	static constexpr B minlog() noexcept
626	{
627	return constants::minlog<B>();
628	}
629	};
630
631	template <class B>
632	struct exp_reduction_base<B, exp10_tag>
633	{
634	static constexpr B maxlog() noexcept
635	{
636	return constants::maxlog10<B>();
637	}
638
639	static constexpr B minlog() noexcept
640	{
641	return constants::minlog10<B>();
642	}
643	};
644
645	template <class B>
646	struct exp_reduction_base<B, exp2_tag>
647	{
648	static constexpr B maxlog() noexcept
649	{
650	return constants::maxlog2<B>();
651	}
652
653	static constexpr B minlog() noexcept
654	{
655	return constants::minlog2<B>();
656	}
657	};
658
659	template <class T, class A, exp_reduction_tag Tag>
660	struct exp_reduction;
661
662	template <class A>
663	struct exp_reduction<float, A, exp_tag> : exp_reduction_base<batch<float, A>, exp_tag>
664	{
665	using batch_type = batch<float, A>;
666	static inline batch_type approx(const batch_type& x) noexcept
667	{
668	batch_type y = detail::horner<batch_type,
669	`0x3f000000`, // 5.0000000e-01
670	`0x3e2aa9a5`, // 1.6666277e-01
671	`0x3d2aa957`, // 4.1665401e-02
672	`0x3c098d8b`, // 8.3955629e-03
673	`0x3ab778cf` // 1.3997796e-03
674	>(x);
675	return ++fma(y, x * x, x);
676	}
677
678	static inline batch_type reduce(const batch_type& a, batch_type& x) noexcept
679	{
680	batch_type k = nearbyint(constants::invlog_2<batch_type>() * a);
681	x = fnma(k, constants::log_2hi<batch_type>(), a);
682	x = fnma(k, constants::log_2lo<batch_type>(), x);
683	return k;
684	}
685	};
686
687	template <class A>
688	struct exp_reduction<float, A, exp10_tag> : exp_reduction_base<batch<float, A>, exp10_tag>
689	{
690	using batch_type = batch<float, A>;
691	static inline batch_type approx(const batch_type& x) noexcept
692	{
693	return ++(detail::horner<batch_type,
694	`0x40135d8e`, // 2.3025851e+00
695	`0x4029a926`, // 2.6509490e+00
696	`0x400237da`, // 2.0346589e+00
697	`0x3f95eb4c`, // 1.1712432e+00
698	`0x3f0aacef`, // 5.4170126e-01
699	`0x3e54dff1` // 2.0788552e-01
700	>(x)
701	* x);
702	}
703
704	static inline batch_type reduce(const batch_type& a, batch_type& x) noexcept
705	{
706	batch_type k = nearbyint(constants::invlog10_2<batch_type>() * a);
707	x = fnma(k, constants::log10_2hi<batch_type>(), a);
708	x -= k * constants::log10_2lo<batch_type>();
709	return k;
710	}
711	};
712
713	template <class A>
714	struct exp_reduction<float, A, exp2_tag> : exp_reduction_base<batch<float, A>, exp2_tag>
715	{
716	using batch_type = batch<float, A>;
717	static inline batch_type approx(const batch_type& x) noexcept
718	{
719	batch_type y = detail::horner<batch_type,
720	`0x3e75fdf1`, // 2.4022652e-01
721	`0x3d6356eb`, // 5.5502813e-02
722	`0x3c1d9422`, // 9.6178371e-03
723	`0x3ab01218`, // 1.3433127e-03
724	`0x3922c8c4` // 1.5524315e-04
725	>(x);
726	return ++fma(y, x * x, x * constants::log_2<batch_type>());
727	}
728
729	static inline batch_type reduce(const batch_type& a, batch_type& x) noexcept
730	{
731	batch_type k = nearbyint(a);
732	x = (a - k);
733	return k;
734	}
735	};
736
737	template <class A>
738	struct exp_reduction<double, A, exp_tag> : exp_reduction_base<batch<double, A>, exp_tag>
739	{
740	using batch_type = batch<double, A>;
741	static inline batch_type approx(const batch_type& x) noexcept
742	{
743	batch_type t = x * x;
744	return fnma(t,
745	detail::horner<batch_type,
746	`0x3fc555555555553eull`,
747	`0xbf66c16c16bebd93ull`,
748	`0x3f11566aaf25de2cull`,
749	`0xbebbbd41c5d26bf1ull`,
750	`0x3e66376972bea4d0ull`>(t),
751	x);
752	}
753
754	static inline batch_type reduce(const batch_type& a, batch_type& hi, batch_type& lo, batch_type& x) noexcept
755	{
756	batch_type k = nearbyint(constants::invlog_2<batch_type>() * a);
757	hi = fnma(k, constants::log_2hi<batch_type>(), a);
758	lo = k * constants::log_2lo<batch_type>();
759	x = hi - lo;
760	return k;
761	}
762
763	static inline batch_type finalize(const batch_type& x, const batch_type& c, const batch_type& hi, const batch_type& lo) noexcept
764	{
765	return batch_type(`1.`) - (((lo - (x * c) / (batch_type(`2.`) - c)) - hi));
766	}
767	};
768
769	template <class A>
770	struct exp_reduction<double, A, exp10_tag> : exp_reduction_base<batch<double, A>, exp10_tag>
771	{
772	using batch_type = batch<double, A>;
773	static inline batch_type approx(const batch_type& x) noexcept
774	{
775	batch_type xx = x * x;
776	batch_type px = x * detail::horner<batch_type, `0x40a2b4798e134a01ull`, `0x40796b7a050349e4ull`, `0x40277d9474c55934ull`, `0x3fa4fd75f3062dd4ull`>(xx);
777	batch_type x2 = px / (detail::horner1<batch_type, `0x40a03f37650df6e2ull`, `0x4093e05eefd67782ull`, `0x405545fdce51ca08ull`>(xx) - px);
778	return ++(x2 + x2);
779	}
780
781	static inline batch_type reduce(const batch_type& a, batch_type&, batch_type&, batch_type& x) noexcept
782	{
783	batch_type k = nearbyint(constants::invlog10_2<batch_type>() * a);
784	x = fnma(k, constants::log10_2hi<batch_type>(), a);
785	x = fnma(k, constants::log10_2lo<batch_type>(), x);
786	return k;
787	}
788
789	static inline batch_type finalize(const batch_type&, const batch_type& c, const batch_type&, const batch_type&) noexcept
790	{
791	return c;
792	}
793	};
794
795	template <class A>
796	struct exp_reduction<double, A, exp2_tag> : exp_reduction_base<batch<double, A>, exp2_tag>
797	{
798	using batch_type = batch<double, A>;
799	static inline batch_type approx(const batch_type& x) noexcept
800	{
801	batch_type t = x * x;
802	return fnma(t,
803	detail::horner<batch_type,
804	`0x3fc555555555553eull`,
805	`0xbf66c16c16bebd93ull`,
806	`0x3f11566aaf25de2cull`,
807	`0xbebbbd41c5d26bf1ull`,
808	`0x3e66376972bea4d0ull`>(t),
809	x);
810	}
811
812	static inline batch_type reduce(const batch_type& a, batch_type&, batch_type&, batch_type& x) noexcept
813	{
814	batch_type k = nearbyint(a);
815	x = (a - k) * constants::log_2<batch_type>();
816	return k;
817	}
818
819	static inline batch_type finalize(const batch_type& x, const batch_type& c, const batch_type&, const batch_type&) noexcept
820	{
821	return batch_type(`1.`) + x + x * c / (batch_type(`2.`) - c);
822	}
823	};
824
825	template <exp_reduction_tag Tag, class A>
826	inline batch<float, A> exp(batch<float, A> const& self) noexcept
827	{
828	using batch_type = batch<float, A>;
829	using reducer_t = exp_reduction<float, A, Tag>;
830	batch_type x;
831	batch_type k = reducer_t::reduce(self, x);
832	x = reducer_t::approx(x);
833	x = select(self <= reducer_t::minlog(), batch_type(`0.`), ldexp(x, to_int(k)));
834	x = select(self >= reducer_t::maxlog(), constants::infinity<batch_type>(), x);
835	return x;
836	}
837
838	template <exp_reduction_tag Tag, class A>
839	inline batch<double, A> exp(batch<double, A> const& self) noexcept
840	{
841	using batch_type = batch<double, A>;
842	using reducer_t = exp_reduction<double, A, Tag>;
843	batch_type hi, lo, x;
844	batch_type k = reducer_t::reduce(self, hi, lo, x);
845	batch_type c = reducer_t::approx(x);
846	c = reducer_t::finalize(x, c, hi, lo);
847	c = select(self <= reducer_t::minlog(), batch_type(`0.`), ldexp(c, to_int(k)));
848	c = select(self >= reducer_t::maxlog(), constants::infinity<batch_type>(), c);
849	return c;
850	}
851	}
852
853	template <class A, class T>
854	inline batch<T, A> exp(batch<T, A> const& self, requires_arch<generic>) noexcept
855	{
856	return detail::exp<detail::exp_tag>(self);
857	}
858
859	template <class A, class T>
860	inline batch<std::complex<T>, A> exp(batch<std::complex<T>, A> const& self, requires_arch<generic>) noexcept
861	{
862	using batch_type = batch<std::complex<T>, A>;
863	auto isincos = sincos(self.imag());
864	return exp(self.real()) * batch_type(std::get<`1`>(isincos), std::get<`0`>(isincos));
865	}
866
867	// exp10
868	template <class A, class T>
869	inline batch<T, A> exp10(batch<T, A> const& self, requires_arch<generic>) noexcept
870	{
871	return detail::exp<detail::exp10_tag>(self);
872	}
873
874	// exp2
875	template <class A, class T>
876	inline batch<T, A> exp2(batch<T, A> const& self, requires_arch<generic>) noexcept
877	{
878	return detail::exp<detail::exp2_tag>(self);
879	}
880
881	// expm1
882	namespace detail
883	{
884	/ origin: boost/simd/arch/common/detail/generic/expm1_kernel.hpp /
885	/*
886	* ====================================================
887	* copyright 2016 NumScale SAS
888	*
889	* Distributed under the Boost Software License, Version 1.0.
890	* (See copy at http://boost.org/LICENSE_1_0.txt)
891	* ====================================================
892	*/
893	template <class A>
894	static inline batch<float, A> expm1(const batch<float, A>& a) noexcept
895	{
896	using batch_type = batch<float, A>;
897	batch_type k = nearbyint(constants::invlog_2<batch_type>() * a);
898	batch_type x = fnma(k, constants::log_2hi<batch_type>(), a);
899	x = fnma(k, constants::log_2lo<batch_type>(), x);
900	batch_type hx = x * batch_type(`0.5`);
901	batch_type hxs = x * hx;
902	batch_type r = detail::horner<batch_type,
903	`0X3F800000UL`, // 1
904	`0XBD08887FUL`, // -3.3333298E-02
905	`0X3ACF6DB4UL` // 1.582554
906	>(hxs);
907	batch_type t = fnma(r, hx, batch_type(`3.`));
908	batch_type e = hxs * ((r - t) / (batch_type(`6.`) - x * t));
909	e = fms(x, e, hxs);
910	using i_type = as_integer_t<batch_type>;
911	i_type ik = to_int(k);
912	batch_type two2mk = ::xsimd::bitwise_cast<batch_type>((constants::maxexponent<batch_type>() - ik) << constants::nmb<batch_type>());
913	batch_type y = batch_type(`1.`) - two2mk - (e - x);
914	return ldexp(y, ik);
915	}
916
917	template <class A>
918	static inline batch<double, A> expm1(const batch<double, A>& a) noexcept
919	{
920	using batch_type = batch<double, A>;
921	batch_type k = nearbyint(constants::invlog_2<batch_type>() * a);
922	batch_type hi = fnma(k, constants::log_2hi<batch_type>(), a);
923	batch_type lo = k * constants::log_2lo<batch_type>();
924	batch_type x = hi - lo;
925	batch_type hxs = x * x * batch_type(`0.5`);
926	batch_type r = detail::horner<batch_type,
927	`0X3FF0000000000000ULL`,
928	`0XBFA11111111110F4ULL`,
929	`0X3F5A01A019FE5585ULL`,
930	`0XBF14CE199EAADBB7ULL`,
931	`0X3ED0CFCA86E65239ULL`,
932	`0XBE8AFDB76E09C32DULL`>(hxs);
933	batch_type t = batch_type(`3.`) - r * batch_type(`0.5`) * x;
934	batch_type e = hxs * ((r - t) / (batch_type(`6`) - x * t));
935	batch_type c = (hi - x) - lo;
936	e = (x * (e - c) - c) - hxs;
937	using i_type = as_integer_t<batch_type>;
938	i_type ik = to_int(k);
939	batch_type two2mk = ::xsimd::bitwise_cast<batch_type>((constants::maxexponent<batch_type>() - ik) << constants::nmb<batch_type>());
940	batch_type ct1 = batch_type(`1.`) - two2mk - (e - x);
941	batch_type ct2 = ++(x - (e + two2mk));
942	batch_type y = select(k < batch_type(`20.`), ct1, ct2);
943	return ldexp(y, ik);
944	}
945
946	}
947
948	template <class A, class T>
949	inline batch<T, A> expm1(batch<T, A> const& self, requires_arch<generic>) noexcept
950	{
951	using batch_type = batch<T, A>;
952	return select(self < constants::logeps<batch_type>(),
953	batch_type(-`1.`),
954	select(self > constants::maxlog<batch_type>(),
955	constants::infinity<batch_type>(),
956	detail::expm1(self)));
957	}
958
959	template <class A, class T>
960	inline batch<std::complex<T>, A> expm1(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
961	{
962	using batch_type = batch<std::complex<T>, A>;
963	using real_batch = typename batch_type::real_batch;
964	real_batch isin = sin(z.imag());
965	real_batch rem1 = expm1(z.real());
966	real_batch re = rem1 + `1.`;
967	real_batch si = sin(z.imag() * `0.5`);
968	return { rem1 - `2.` * re * si * si, re * isin };
969	}
970
971	// polar
972	template <class A, class T>
973	inline batch<std::complex<T>, A> polar(const batch<T, A>& r, const batch<T, A>& theta, requires_arch<generic>) noexcept
974	{
975	auto sincosTheta = sincos(theta);
976	return { r * sincosTheta.second, r * sincosTheta.first };
977	}
978
979	// fdim
980	template <class A, class T>
981	inline batch<T, A> fdim(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
982	{
983	return fmax(batch<T, A>(`0`), self - other);
984	}
985
986	// fmod
987	template <class A, class T>
988	inline batch<T, A> fmod(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
989	{
990	return fnma(trunc(self / other), other, self);
991	}
992
993	// frexp
994	/ origin: boost/simd/arch/common/simd/function/ifrexp.hpp /
995	/*
996	* ====================================================
997	* copyright 2016 NumScale SAS
998	*
999	* Distributed under the Boost Software License, Version 1.0.
1000	* (See copy at http://boost.org/LICENSE_1_0.txt)
1001	* ====================================================
1002	*/
1003	template <class A, class T>
1004	inline batch<T, A> frexp(const batch<T, A>& self, batch<as_integer_t<T>, A>& exp, requires_arch<generic>) noexcept
1005	{
1006	using batch_type = batch<T, A>;
1007	using i_type = batch<as_integer_t<T>, A>;
1008	i_type m1f = constants::mask1frexp<batch_type>();
1009	i_type r1 = m1f & ::xsimd::bitwise_cast<i_type>(self);
1010	batch_type x = self & ::xsimd::bitwise_cast<batch_type>(~m1f);
1011	exp = (r1 >> constants::nmb<batch_type>()) - constants::maxexponentm1<batch_type>();
1012	exp = select(batch_bool_cast<typename i_type::value_type>(self != batch_type(`0.`)), exp, i_type(typename i_type::value_type(`0`)));
1013	return select((self != batch_type(`0.`)), x \| ::xsimd::bitwise_cast<batch_type>(constants::mask2frexp<batch_type>()), batch_type(`0.`));
1014	}
1015
1016	// from bool
1017	template <class A, class T>
1018	inline batch<T, A> from_bool(batch_bool<T, A> const& self, requires_arch<generic>) noexcept
1019	{
1020	return batch<T, A>(self.data) & batch<T, A>(`1`);
1021	}
1022
1023	// horner
1024	template <class T, class A, uint64_t... Coefs>
1025	inline batch<T, A> horner(const batch<T, A>& self) noexcept
1026	{
1027	return detail::horner<batch<T, A>, Coefs...>(self);
1028	}
1029
1030	// hypot
1031	template <class A, class T>
1032	inline batch<T, A> hypot(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
1033	{
1034	return sqrt(fma(self, self, other * other));
1035	}
1036
1037	// ipow
1038	template <class A, class T, class ITy>
1039	inline batch<T, A> ipow(batch<T, A> const& self, ITy other, requires_arch<generic>) noexcept
1040	{
1041	return ::xsimd::detail::ipow(self, other);
1042	}
1043
1044	// ldexp
1045	/ origin: boost/simd/arch/common/simd/function/ldexp.hpp /
1046	/*
1047	* ====================================================
1048	* copyright 2016 NumScale SAS
1049	*
1050	* Distributed under the Boost Software License, Version 1.0.
1051	* (See copy at http://boost.org/LICENSE_1_0.txt)
1052	* ====================================================
1053	*/
1054	template <class A, class T>
1055	inline batch<T, A> ldexp(const batch<T, A>& self, const batch<as_integer_t<T>, A>& other, requires_arch<generic>) noexcept
1056	{
1057	using batch_type = batch<T, A>;
1058	using itype = as_integer_t<batch_type>;
1059	itype ik = other + constants::maxexponent<T>();
1060	ik = ik << constants::nmb<T>();
1061	return self * ::xsimd::bitwise_cast<batch_type>(ik);
1062	}
1063
1064	// lgamma
1065	template <class A, class T>
1066	inline batch<T, A> lgamma(batch<T, A> const& self, requires_arch<generic>) noexcept;
1067
1068	namespace detail
1069	{
1070	/ origin: boost/simd/arch/common/detail/generic/gammaln_kernel.hpp /
1071	/*
1072	* ====================================================
1073	* copyright 2016 NumScale SAS
1074	*
1075	* Distributed under the Boost Software License, Version 1.0.
1076	* (See copy at http://boost.org/LICENSE_1_0.txt)
1077	* ====================================================
1078	*/
1079	template <class A>
1080	static inline batch<float, A> gammalnB(const batch<float, A>& x) noexcept
1081	{
1082	return horner<batch<float, A>,
1083	`0x3ed87730`, // 4.227843421859038E-001
1084	`0x3ea51a64`, // 3.224669577325661E-001,
1085	`0xbd89f07e`, // -6.735323259371034E-002,
1086	`0x3ca89ed8`, // 2.058355474821512E-002,
1087	`0xbbf164fd`, // -7.366775108654962E-003,
1088	`0x3b3ba883`, // 2.863437556468661E-003,
1089	`0xbaabeab1`, // -1.311620815545743E-003,
1090	`0x3a1ebb94` // 6.055172732649237E-004
1091	>(x);
1092	}
1093
1094	template <class A>
1095	static inline batch<float, A> gammalnC(const batch<float, A>& x) noexcept
1096	{
1097	return horner<batch<float, A>,
1098	`0xbf13c468`, // -5.772156501719101E-001
1099	`0x3f528d34`, // 8.224670749082976E-001,
1100	`0xbecd27a8`, // -4.006931650563372E-001,
1101	`0x3e8a898b`, // 2.705806208275915E-001,
1102	`0xbe53c04f`, // -2.067882815621965E-001,
1103	`0x3e2d4dab`, // 1.692415923504637E-001,
1104	`0xbe22d329`, // -1.590086327657347E-001,
1105	`0x3e0c3c4f` // 1.369488127325832E-001
1106	>(x);
1107	}
1108
1109	template <class A>
1110	static inline batch<float, A> gammaln2(const batch<float, A>& x) noexcept
1111	{
1112	return horner<batch<float, A>,
1113	`0x3daaaa94`, // 8.333316229807355E-002f
1114	`0xbb358701`, // -2.769887652139868E-003f,
1115	`0x3a31fd69` // 6.789774945028216E-004f
1116	>(x);
1117	}
1118
1119	template <class A>
1120	static inline batch<double, A> gammaln1(const batch<double, A>& x) noexcept
1121	{
1122	return horner<batch<double, A>,
1123	`0xc12a0c675418055eull`, // -8.53555664245765465627E5
1124	`0xc13a45890219f20bull`, // -1.72173700820839662146E6,
1125	`0xc131bc82f994db51ull`, // -1.16237097492762307383E6,
1126	`0xc1143d73f89089e5ull`, // -3.31612992738871184744E5,
1127	`0xc0e2f234355bb93eull`, // -3.88016315134637840924E4,
1128	`0xc09589018ff36761ull` // -1.37825152569120859100E3
1129	>(x)
1130	/ horner<batch<double, A>,
1131	`0xc13ece4b6a11e14aull`, // -2.01889141433532773231E6
1132	`0xc1435255892ff34cull`, // -2.53252307177582951285E6,
1133	`0xc131628671950043ull`, // -1.13933444367982507207E6,
1134	`0xc10aeb84b9744c9bull`, // -2.20528590553854454839E5,
1135	`0xc0d0aa0d7b89d757ull`, // -1.70642106651881159223E4,
1136	`0xc075fd0d1cf312b2ull`, // -3.51815701436523470549E2,
1137	`0x3ff0000000000000ull` // 1.00000000000000000000E0
1138	>(x);
1139	}
1140
1141	template <class A>
1142	static inline batch<double, A> gammalnA(const batch<double, A>& x) noexcept
1143	{
1144	return horner<batch<double, A>,
1145	`0x3fb555555555554bull`, // 8.33333333333331927722E-2
1146	`0xbf66c16c16b0a5a1ull`, // -2.77777777730099687205E-3,
1147	`0x3f4a019f20dc5ebbull`, // 7.93650340457716943945E-4,
1148	`0xbf437fbdb580e943ull`, // -5.95061904284301438324E-4,
1149	`0x3f4a985027336661ull` // 8.11614167470508450300E-4
1150	>(x);
1151	}
1152
1153	/ origin: boost/simd/arch/common/simd/function/gammaln.hpp /
1154	/*
1155	* ====================================================
1156	* copyright 2016 NumScale SAS
1157	*
1158	* Distributed under the Boost Software License, Version 1.0.
1159	* (See copy at http://boost.org/LICENSE_1_0.txt)
1160	* ====================================================
1161	*/
1162	template <class B>
1163	struct lgamma_impl;
1164
1165	template <class A>
1166	struct lgamma_impl<batch<float, A>>
1167	{
1168	using batch_type = batch<float, A>;
1169	static inline batch_type compute(const batch_type& a) noexcept
1170	{
1171	auto inf_result = (a <= batch_type(`0.`)) && is_flint(a);
1172	batch_type x = select(inf_result, constants::nan<batch_type>(), a);
1173	batch_type q = abs(x);
1174	#ifndef XSIMD_NO_INFINITIES
1175	inf_result = (x == constants::infinity<batch_type>()) \|\| inf_result;
1176	#endif
1177	auto ltza = a < batch_type(`0.`);
1178	batch_type r;
1179	batch_type r1 = other(x: q);
1180	if (any(ltza))
1181	{
1182	r = select(inf_result, constants::infinity<batch_type>(), negative(q, w: r1));
1183	if (all(ltza))
1184	return r;
1185	}
1186	batch_type r2 = select(ltza, r, r1);
1187	return select(a == constants::minusinfinity<batch_type>(), constants::nan<batch_type>(), select(inf_result, constants::infinity<batch_type>(), r2));
1188	}
1189
1190	private:
1191	static inline batch_type negative(const batch_type& q, const batch_type& w) noexcept
1192	{
1193	batch_type p = floor(q);
1194	batch_type z = q - p;
1195	auto test2 = z < batch_type(`0.5`);
1196	z = select(test2, z - batch_type(`1.`), z);
1197	z = q * sin(z, trigo_pi_tag ());
1198	return -log(constants::invpi<batch_type>() * abs(z)) - w;
1199	}
1200
1201	static inline batch_type other(const batch_type& x) noexcept
1202	{
1203	auto xlt650 = (x < batch_type(`6.5`));
1204	batch_type r0x = x;
1205	batch_type r0z = x;
1206	batch_type r0s = batch_type(`1.`);
1207	batch_type r1 = batch_type(`0.`);
1208	batch_type p = constants::nan<batch_type>();
1209	if (any(xlt650))
1210	{
1211	batch_type z = batch_type(`1.`);
1212	batch_type tx = select(xlt650, x, batch_type(`0.`));
1213	batch_type nx = batch_type(`0.`);
1214	const batch_type _075 = batch_type(`0.75`);
1215	const batch_type _150 = batch_type(`1.50`);
1216	const batch_type _125 = batch_type(`1.25`);
1217	const batch_type _250 = batch_type(`2.50`);
1218	auto xge150 = (x >= _150);
1219	auto txgt250 = (tx > _250);
1220
1221	// x >= 1.5
1222	while (any(xge150 && txgt250))
1223	{
1224	nx = select(txgt250, nx - batch_type(`1.`), nx);
1225	tx = select(txgt250, x + nx, tx);
1226	z = select(txgt250, z * tx, z);
1227	txgt250 = (tx > _250);
1228	}
1229	r0x = select(xge150, x + nx - batch_type(`2.`), x);
1230	r0z = select(xge150, z, r0z);
1231	r0s = select(xge150, batch_type(`1.`), r0s);
1232
1233	// x >= 1.25 && x < 1.5
1234	auto xge125 = (x >= _125);
1235	auto xge125t = xge125 && !xge150;
1236	if (any(xge125))
1237	{
1238	r0x = select(xge125t, x - batch_type(`1.`), r0x);
1239	r0z = select(xge125t, z * x, r0z);
1240	r0s = select(xge125t, batch_type(-`1.`), r0s);
1241	}
1242
1243	// x >= 0.75 && x < 1.5
1244	batch_bool<float, A> kernelC(false);
1245	auto xge075 = (x >= _075);
1246	auto xge075t = xge075 && !xge125;
1247	if (any(xge075t))
1248	{
1249	kernelC = xge075t;
1250	r0x = select(xge075t, x - batch_type(`1.`), x);
1251	r0z = select(xge075t, batch_type(`1.`), r0z);
1252	r0s = select(xge075t, batch_type(-`1.`), r0s);
1253	p = gammalnC(r0x);
1254	}
1255
1256	// tx < 1.5 && x < 0.75
1257	auto txlt150 = (tx < _150) && !xge075;
1258	if (any(txlt150))
1259	{
1260	auto orig = txlt150;
1261	while (any(txlt150))
1262	{
1263	z = select(txlt150, z * tx, z);
1264	nx = select(txlt150, nx + batch_type(`1.`), nx);
1265	tx = select(txlt150, x + nx, tx);
1266	txlt150 = (tx < _150) && !xge075;
1267	}
1268	r0x = select(orig, r0x + nx - batch_type(`2.`), r0x);
1269	r0z = select(orig, z, r0z);
1270	r0s = select(orig, batch_type(-`1.`), r0s);
1271	}
1272	p = select(kernelC, p, gammalnB(r0x));
1273	if (all(xlt650))
1274	return fma(r0x, p, r0s * log(abs(r0z)));
1275	}
1276	r0z = select(xlt650, abs(r0z), x);
1277	batch_type m = log(r0z);
1278	r1 = fma(r0x, p, r0s * m);
1279	batch_type r2 = fma(x - batch_type(`0.5`), m, constants::logsqrt2pi<batch_type>() - x);
1280	r2 += gammaln2(batch_type(`1.`) / (x * x)) / x;
1281	return select(xlt650, r1, r2);
1282	}
1283	};
1284
1285	template <class A>
1286	struct lgamma_impl<batch<double, A>>
1287	{
1288	using batch_type = batch<double, A>;
1289
1290	static inline batch_type compute(const batch_type& a) noexcept
1291	{
1292	auto inf_result = (a <= batch_type(`0.`)) && is_flint(a);
1293	batch_type x = select(inf_result, constants::nan<batch_type>(), a);
1294	batch_type q = abs(x);
1295	#ifndef XSIMD_NO_INFINITIES
1296	inf_result = (q == constants::infinity<batch_type>());
1297	#endif
1298	auto test = (a < batch_type(-`34.`));
1299	batch_type r = constants::nan<batch_type>();
1300	if (any(test))
1301	{
1302	r = large_negative(q);
1303	if (all(test))
1304	return select(inf_result, constants::nan<batch_type>(), r);
1305	}
1306	batch_type r1 = other(xx: a);
1307	batch_type r2 = select(test, r, r1);
1308	return select(a == constants::minusinfinity<batch_type>(), constants::nan<batch_type>(), select(inf_result, constants::infinity<batch_type>(), r2));
1309	}
1310
1311	private:
1312	static inline batch_type large_negative(const batch_type& q) noexcept
1313	{
1314	batch_type w = lgamma(q);
1315	batch_type p = floor(q);
1316	batch_type z = q - p;
1317	auto test2 = (z < batch_type(`0.5`));
1318	z = select(test2, z - batch_type(`1.`), z);
1319	z = q * sin(z, trigo_pi_tag ());
1320	z = abs(z);
1321	return constants::logpi<batch_type>() - log(z) - w;
1322	}
1323
1324	static inline batch_type other(const batch_type& xx) noexcept
1325	{
1326	batch_type x = xx;
1327	auto test = (x < batch_type(`13.`));
1328	batch_type r1 = batch_type(`0.`);
1329	if (any(test))
1330	{
1331	batch_type z = batch_type(`1.`);
1332	batch_type p = batch_type(`0.`);
1333	batch_type u = select(test, x, batch_type(`0.`));
1334	auto test1 = (u >= batch_type(`3.`));
1335	while (any(test1))
1336	{
1337	p = select(test1, p - batch_type(`1.`), p);
1338	u = select(test1, x + p, u);
1339	z = select(test1, z * u, z);
1340	test1 = (u >= batch_type(`3.`));
1341	}
1342
1343	auto test2 = (u < batch_type(`2.`));
1344	while (any(test2))
1345	{
1346	z = select(test2, z / u, z);
1347	p = select(test2, p + batch_type(`1.`), p);
1348	u = select(test2, x + p, u);
1349	test2 = (u < batch_type(`2.`));
1350	}
1351
1352	z = abs(z);
1353	x += p - batch_type(`2.`);
1354	r1 = x * gammaln1(x) + log(z);
1355	if (all(test))
1356	return r1;
1357	}
1358	batch_type r2 = fma(xx - batch_type(`0.5`), log(xx), constants::logsqrt2pi<batch_type>() - xx);
1359	batch_type p = batch_type(`1.`) / (xx * xx);
1360	r2 += gammalnA(p) / xx;
1361	return select(test, r1, r2);
1362	}
1363	};
1364	}
1365
1366	template <class A, class T>
1367	inline batch<T, A> lgamma(batch<T, A> const& self, requires_arch<generic>) noexcept
1368	{
1369	return detail::lgamma_impl<batch<T, A>>::compute(self);
1370	}
1371
1372	// log
1373	/ origin: boost/simd/arch/common/simd/function/log.hpp /
1374	/*
1375	* ====================================================
1376	* copyright 2016 NumScale SAS
1377	*
1378	* Distributed under the Boost Software License, Version 1.0.
1379	* (See copy at http://boost.org/LICENSE_1_0.txt)
1380	* ====================================================
1381	*/
1382	template <class A>
1383	inline batch<float, A> log(batch<float, A> const& self, requires_arch<generic>) noexcept
1384	{
1385	using batch_type = batch<float, A>;
1386	using i_type = as_integer_t<batch_type>;
1387	batch_type x = self;
1388	i_type k(`0`);
1389	auto isnez = (self != batch_type(`0.`));
1390	#ifndef XSIMD_NO_DENORMALS
1391	auto test = (self < constants::smallestposval<batch_type>()) && isnez;
1392	if (any(test))
1393	{
1394	k = select(batch_bool_cast<typename i_type::value_type>(test), k - i_type(`23`), k);
1395	x = select(test, x * batch_type(`8388608ul`), x);
1396	}
1397	#endif
1398	i_type ix = ::xsimd::bitwise_cast<i_type>(x);
1399	ix += `0x3f800000` - `0x3f3504f3`;
1400	k += (ix >> `23`) - `0x7f`;
1401	ix = (ix & i_type(`0x007fffff`)) + `0x3f3504f3`;
1402	x = ::xsimd::bitwise_cast<batch_type>(ix);
1403	batch_type f = --x;
1404	batch_type s = f / (batch_type(`2.`) + f);
1405	batch_type z = s * s;
1406	batch_type w = z * z;
1407	batch_type t1 = w * detail::horner<batch_type, `0x3eccce13`, `0x3e789e26`>(w);
1408	batch_type t2 = z * detail::horner<batch_type, `0x3f2aaaaa`, `0x3e91e9ee`>(w);
1409	batch_type R = t2 + t1;
1410	batch_type hfsq = batch_type(`0.5`) * f * f;
1411	batch_type dk = to_float(k);
1412	batch_type r = fma(dk, constants::log_2hi<batch_type>(), fma(s, (hfsq + R), dk * constants::log_2lo<batch_type>()) - hfsq + f);
1413	#ifndef XSIMD_NO_INFINITIES
1414	batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1415	#else
1416	batch_type zz = select(isnez, r, constants::minusinfinity<batch_type>());
1417	#endif
1418	return select(!(self >= batch_type(`0.`)), constants::nan<batch_type>(), zz);
1419	}
1420
1421	template <class A>
1422	inline batch<double, A> log(batch<double, A> const& self, requires_arch<generic>) noexcept
1423	{
1424	using batch_type = batch<double, A>;
1425	using i_type = as_integer_t<batch_type>;
1426
1427	batch_type x = self;
1428	i_type hx = ::xsimd::bitwise_cast<i_type>(x) >> `32`;
1429	i_type k(`0`);
1430	auto isnez = (self != batch_type(`0.`));
1431	#ifndef XSIMD_NO_DENORMALS
1432	auto test = (self < constants::smallestposval<batch_type>()) && isnez;
1433	if (any(test))
1434	{
1435	k = select(batch_bool_cast<typename i_type::value_type>(test), k - i_type(`54`), k);
1436	x = select(test, x * batch_type(`18014398509481984ull`), x);
1437	}
1438	#endif
1439	hx += `0x3ff00000` - `0x3fe6a09e`;
1440	k += (hx >> `20`) - `0x3ff`;
1441	batch_type dk = to_float(k);
1442	hx = (hx & i_type(`0x000fffff`)) + `0x3fe6a09e`;
1443	x = ::xsimd::bitwise_cast<batch_type>(hx << `32` \| (i_type(`0xffffffff`) & ::xsimd::bitwise_cast<i_type>(x)));
1444
1445	batch_type f = --x;
1446	batch_type hfsq = batch_type(`0.5`) * f * f;
1447	batch_type s = f / (batch_type(`2.`) + f);
1448	batch_type z = s * s;
1449	batch_type w = z * z;
1450
1451	batch_type t1 = w * detail::horner<batch_type, `0x3fd999999997fa04ll`, `0x3fcc71c51d8e78afll`, `0x3fc39a09d078c69fll`>(w);
1452	batch_type t2 = z * detail::horner<batch_type, `0x3fe5555555555593ll`, `0x3fd2492494229359ll`, `0x3fc7466496cb03dell`, `0x3fc2f112df3e5244ll`>(w);
1453	batch_type R = t2 + t1;
1454	batch_type r = fma(dk, constants::log_2hi<batch_type>(), fma(s, (hfsq + R), dk * constants::log_2lo<batch_type>()) - hfsq + f);
1455	#ifndef XSIMD_NO_INFINITIES
1456	batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1457	#else
1458	batch_type zz = select(isnez, r, constants::minusinfinity<batch_type>());
1459	#endif
1460	return select(!(self >= batch_type(`0.`)), constants::nan<batch_type>(), zz);
1461	}
1462
1463	template <class A, class T>
1464	inline batch<std::complex<T>, A> log(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
1465	{
1466	return batch<std::complex<T>, A>(log(abs(z)), atan2(z.imag(), z.real()));
1467	}
1468
1469	// log2
1470	template <class A>
1471	inline batch<float, A> log2(batch<float, A> const& self, requires_arch<generic>) noexcept
1472	{
1473	using batch_type = batch<float, A>;
1474	using i_type = as_integer_t<batch_type>;
1475	batch_type x = self;
1476	i_type k(`0`);
1477	auto isnez = (self != batch_type(`0.`));
1478	#ifndef XSIMD_NO_DENORMALS
1479	auto test = (self < constants::smallestposval<batch_type>()) && isnez;
1480	if (any(test))
1481	{
1482	k = select(batch_bool_cast<typename i_type::value_type>(test), k - i_type(`25`), k);
1483	x = select(test, x * batch_type(`33554432ul`), x);
1484	}
1485	#endif
1486	i_type ix = ::xsimd::bitwise_cast<i_type>(x);
1487	ix += `0x3f800000` - `0x3f3504f3`;
1488	k += (ix >> `23`) - `0x7f`;
1489	ix = (ix & i_type(`0x007fffff`)) + `0x3f3504f3`;
1490	x = ::xsimd::bitwise_cast<batch_type>(ix);
1491	batch_type f = --x;
1492	batch_type s = f / (batch_type(`2.`) + f);
1493	batch_type z = s * s;
1494	batch_type w = z * z;
1495	batch_type t1 = w * detail::horner<batch_type, `0x3eccce13`, `0x3e789e26`>(w);
1496	batch_type t2 = z * detail::horner<batch_type, `0x3f2aaaaa`, `0x3e91e9ee`>(w);
1497	batch_type R = t1 + t2;
1498	batch_type hfsq = batch_type(`0.5`) * f * f;
1499	batch_type dk = to_float(k);
1500	batch_type r = fma(fms(s, hfsq + R, hfsq) + f, constants::invlog_2<batch_type>(), dk);
1501	#ifndef XSIMD_NO_INFINITIES
1502	batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1503	#else
1504	batch_type zz = select(isnez, r, constants::minusinfinity<batch_type>());
1505	#endif
1506	return select(!(self >= batch_type(`0.`)), constants::nan<batch_type>(), zz);
1507	}
1508
1509	template <class A>
1510	inline batch<double, A> log2(batch<double, A> const& self, requires_arch<generic>) noexcept
1511	{
1512	using batch_type = batch<double, A>;
1513	using i_type = as_integer_t<batch_type>;
1514	batch_type x = self;
1515	i_type hx = ::xsimd::bitwise_cast<i_type>(x) >> `32`;
1516	i_type k(`0`);
1517	auto isnez = (self != batch_type(`0.`));
1518	#ifndef XSIMD_NO_DENORMALS
1519	auto test = (self < constants::smallestposval<batch_type>()) && isnez;
1520	if (any(test))
1521	{
1522	k = select(batch_bool_cast<typename i_type::value_type>(test), k - i_type(`54`), k);
1523	x = select(test, x * batch_type(`18014398509481984ull`), x);
1524	}
1525	#endif
1526	hx += `0x3ff00000` - `0x3fe6a09e`;
1527	k += (hx >> `20`) - `0x3ff`;
1528	hx = (hx & i_type(`0x000fffff`)) + `0x3fe6a09e`;
1529	x = ::xsimd::bitwise_cast<batch_type>(hx << `32` \| (i_type(`0xffffffff`) & ::xsimd::bitwise_cast<i_type>(x)));
1530	batch_type f = --x;
1531	batch_type s = f / (batch_type(`2.`) + f);
1532	batch_type z = s * s;
1533	batch_type w = z * z;
1534	batch_type t1 = w * detail::horner<batch_type, `0x3fd999999997fa04ll`, `0x3fcc71c51d8e78afll`, `0x3fc39a09d078c69fll`>(w);
1535	batch_type t2 = z * detail::horner<batch_type, `0x3fe5555555555593ll`, `0x3fd2492494229359ll`, `0x3fc7466496cb03dell`, `0x3fc2f112df3e5244ll`>(w);
1536	batch_type R = t2 + t1;
1537	batch_type hfsq = batch_type(`0.5`) * f * f;
1538	batch_type hi = f - hfsq;
1539	hi = hi & ::xsimd::bitwise_cast<batch_type>((constants::allbits<i_type>() << `32`));
1540	batch_type lo = fma(s, hfsq + R, f - hi - hfsq);
1541	batch_type val_hi = hi * constants::invlog_2hi<batch_type>();
1542	batch_type val_lo = fma(lo + hi, constants::invlog_2lo<batch_type>(), lo * constants::invlog_2hi<batch_type>());
1543	batch_type dk = to_float(k);
1544	batch_type w1 = dk + val_hi;
1545	val_lo += (dk - w1) + val_hi;
1546	val_hi = w1;
1547	batch_type r = val_lo + val_hi;
1548	#ifndef XSIMD_NO_INFINITIES
1549	batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1550	#else
1551	batch_type zz = select(isnez, r, constants::minusinfinity<batch_type>());
1552	#endif
1553	return select(!(self >= batch_type(`0.`)), constants::nan<batch_type>(), zz);
1554	}
1555
1556	namespace detail
1557	{
1558	template <class T, class A>
1559	inline batch<T, A> logN_complex_impl(const batch<T, A>& z, typename batch<T, A>::value_type base) noexcept
1560	{
1561	using batch_type = batch<T, A>;
1562	using rv_type = typename batch_type::value_type;
1563	return log(z) / batch_type(rv_type(base));
1564	}
1565	}
1566
1567	template <class A, class T>
1568	inline batch<std::complex<T>, A> log2(batch<std::complex<T>, A> const& self, requires_arch<generic>) noexcept
1569	{
1570	return detail::logN_complex_impl(self, std::log(x: `2`));
1571	}
1572
1573	// log10
1574	/ origin: FreeBSD /usr/src/lib/msun/src/e_log10f.c /
1575	/*
1576	* ====================================================
1577	* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
1578	*
1579	* Developed at SunPro, a Sun Microsystems, Inc. business.
1580	* Permission to use, copy, modify, and distribute this
1581	* software is freely granted, provided that this notice
1582	* is preserved.
1583	* ====================================================
1584	*/
1585	template <class A>
1586	inline batch<float, A> log10(batch<float, A> const& self, requires_arch<generic>) noexcept
1587	{
1588	using batch_type = batch<float, A>;
1589	const batch_type
1590	ivln10hi(`4.3432617188e-01f`),
1591	ivln10lo(-`3.1689971365e-05f`),
1592	log10_2hi(`3.0102920532e-01f`),
1593	log10_2lo(`7.9034151668e-07f`);
1594	using i_type = as_integer_t<batch_type>;
1595	batch_type x = self;
1596	i_type k(`0`);
1597	auto isnez = (self != batch_type(`0.`));
1598	#ifndef XSIMD_NO_DENORMALS
1599	auto test = (self < constants::smallestposval<batch_type>()) && isnez;
1600	if (any(test))
1601	{
1602	k = select(batch_bool_cast<typename i_type::value_type>(test), k - i_type(`25`), k);
1603	x = select(test, x * batch_type(`33554432ul`), x);
1604	}
1605	#endif
1606	i_type ix = ::xsimd::bitwise_cast<i_type>(x);
1607	ix += `0x3f800000` - `0x3f3504f3`;
1608	k += (ix >> `23`) - `0x7f`;
1609	ix = (ix & i_type(`0x007fffff`)) + `0x3f3504f3`;
1610	x = ::xsimd::bitwise_cast<batch_type>(ix);
1611	batch_type f = --x;
1612	batch_type s = f / (batch_type(`2.`) + f);
1613	batch_type z = s * s;
1614	batch_type w = z * z;
1615	batch_type t1 = w * detail::horner<batch_type, `0x3eccce13`, `0x3e789e26`>(w);
1616	batch_type t2 = z * detail::horner<batch_type, `0x3f2aaaaa`, `0x3e91e9ee`>(w);
1617	batch_type R = t2 + t1;
1618	batch_type dk = to_float(k);
1619	batch_type hfsq = batch_type(`0.5`) * f * f;
1620	batch_type hibits = f - hfsq;
1621	hibits &= ::xsimd::bitwise_cast<batch_type>(i_type(`0xfffff000`));
1622	batch_type lobits = fma(s, hfsq + R, f - hibits - hfsq);
1623	batch_type r = fma(dk, log10_2hi,
1624	fma(hibits, ivln10hi,
1625	fma(lobits, ivln10hi,
1626	fma(lobits + hibits, ivln10lo, dk * log10_2lo))));
1627	#ifndef XSIMD_NO_INFINITIES
1628	batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1629	#else
1630	batch_type zz = select(isnez, r, constants::minusinfinity<batch_type>());
1631	#endif
1632	return select(!(self >= batch_type(`0.`)), constants::nan<batch_type>(), zz);
1633	}
1634
1635	template <class A>
1636	inline batch<double, A> log10(batch<double, A> const& self, requires_arch<generic>) noexcept
1637	{
1638	using batch_type = batch<double, A>;
1639	const batch_type
1640	ivln10hi(`4.34294481878168880939e-01`),
1641	ivln10lo(`2.50829467116452752298e-11`),
1642	log10_2hi(`3.01029995663611771306e-01`),
1643	log10_2lo(`3.69423907715893078616e-13`);
1644	using i_type = as_integer_t<batch_type>;
1645	batch_type x = self;
1646	i_type hx = ::xsimd::bitwise_cast<i_type>(x) >> `32`;
1647	i_type k(`0`);
1648	auto isnez = (self != batch_type(`0.`));
1649	#ifndef XSIMD_NO_DENORMALS
1650	auto test = (self < constants::smallestposval<batch_type>()) && isnez;
1651	if (any(test))
1652	{
1653	k = select(batch_bool_cast<typename i_type::value_type>(test), k - i_type(`54`), k);
1654	x = select(test, x * batch_type(`18014398509481984ull`), x);
1655	}
1656	#endif
1657	hx += `0x3ff00000` - `0x3fe6a09e`;
1658	k += (hx >> `20`) - `0x3ff`;
1659	hx = (hx & i_type(`0x000fffff`)) + `0x3fe6a09e`;
1660	x = ::xsimd::bitwise_cast<batch_type>(hx << `32` \| (i_type(`0xffffffff`) & ::xsimd::bitwise_cast<i_type>(x)));
1661	batch_type f = --x;
1662	batch_type dk = to_float(k);
1663	batch_type s = f / (batch_type(`2.`) + f);
1664	batch_type z = s * s;
1665	batch_type w = z * z;
1666	batch_type t1 = w * detail::horner<batch_type, `0x3fd999999997fa04ll`, `0x3fcc71c51d8e78afll`, `0x3fc39a09d078c69fll`>(w);
1667	batch_type t2 = z * detail::horner<batch_type, `0x3fe5555555555593ll`, `0x3fd2492494229359ll`, `0x3fc7466496cb03dell`, `0x3fc2f112df3e5244ll`>(w);
1668	batch_type R = t2 + t1;
1669	batch_type hfsq = batch_type(`0.5`) * f * f;
1670	batch_type hi = f - hfsq;
1671	hi = hi & ::xsimd::bitwise_cast<batch_type>(constants::allbits<i_type>() << `32`);
1672	batch_type lo = f - hi - hfsq + s * (hfsq + R);
1673	batch_type val_hi = hi * ivln10hi;
1674	batch_type y = dk * log10_2hi;
1675	batch_type val_lo = dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi;
1676	batch_type w1 = y + val_hi;
1677	val_lo += (y - w1) + val_hi;
1678	val_hi = w1;
1679	batch_type r = val_lo + val_hi;
1680	#ifndef XSIMD_NO_INFINITIES
1681	batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1682	#else
1683	batch_type zz = select(isnez, r, constants::minusinfinity<batch_type>());
1684	#endif
1685	return select(!(self >= batch_type(`0.`)), constants::nan<batch_type>(), zz);
1686	}
1687
1688	template <class A, class T>
1689	inline batch<std::complex<T>, A> log10(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
1690	{
1691	return detail::logN_complex_impl(z, std::log(x: `10`));
1692	}
1693
1694	// log1p
1695	/ origin: boost/simd/arch/common/simd/function/log1p.hpp /
1696	/*
1697	* ====================================================
1698	* copyright 2016 NumScale SAS
1699	*
1700	* Distributed under the Boost Software License, Version 1.0.
1701	* (See copy at http://boost.org/LICENSE_1_0.txt)
1702	* ====================================================
1703	*/
1704	template <class A>
1705	inline batch<float, A> log1p(batch<float, A> const& self, requires_arch<generic>) noexcept
1706	{
1707	using batch_type = batch<float, A>;
1708	using i_type = as_integer_t<batch_type>;
1709	const batch_type uf = self + batch_type(`1.`);
1710	auto isnez = (uf != batch_type(`0.`));
1711	i_type iu = ::xsimd::bitwise_cast<i_type>(uf);
1712	iu += `0x3f800000` - `0x3f3504f3`;
1713	i_type k = (iu >> `23`) - `0x7f`;
1714	iu = (iu & i_type(`0x007fffff`)) + `0x3f3504f3`;
1715	batch_type f = --(::xsimd::bitwise_cast<batch_type>(iu));
1716	batch_type s = f / (batch_type(`2.`) + f);
1717	batch_type z = s * s;
1718	batch_type w = z * z;
1719	batch_type t1 = w * detail::horner<batch_type, `0x3eccce13`, `0x3e789e26`>(w);
1720	batch_type t2 = z * detail::horner<batch_type, `0x3f2aaaaa`, `0x3e91e9ee`>(w);
1721	batch_type R = t2 + t1;
1722	batch_type hfsq = batch_type(`0.5`) * f * f;
1723	batch_type dk = to_float(k);
1724	/ correction term ~ log(1+x)-log(u), avoid underflow in c/u /
1725	batch_type c = select(batch_bool_cast<float>(k >= i_type(`2`)), batch_type(`1.`) - (uf - self), self - (uf - batch_type(`1.`))) / uf;
1726	batch_type r = fma(dk, constants::log_2hi<batch_type>(), fma(s, (hfsq + R), dk * constants::log_2lo<batch_type>() + c) - hfsq + f);
1727	#ifndef XSIMD_NO_INFINITIES
1728	batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1729	#else
1730	batch_type zz = select(isnez, r, constants::minusinfinity<batch_type>());
1731	#endif
1732	return select(!(uf >= batch_type(`0.`)), constants::nan<batch_type>(), zz);
1733	}
1734
1735	template <class A>
1736	inline batch<double, A> log1p(batch<double, A> const& self, requires_arch<generic>) noexcept
1737	{
1738	using batch_type = batch<double, A>;
1739	using i_type = as_integer_t<batch_type>;
1740	const batch_type uf = self + batch_type(`1.`);
1741	auto isnez = (uf != batch_type(`0.`));
1742	i_type hu = ::xsimd::bitwise_cast<i_type>(uf) >> `32`;
1743	hu += `0x3ff00000` - `0x3fe6a09e`;
1744	i_type k = (hu >> `20`) - `0x3ff`;
1745	/ correction term ~ log(1+x)-log(u), avoid underflow in c/u /
1746	batch_type c = select(batch_bool_cast<double>(k >= i_type(`2`)), batch_type(`1.`) - (uf - self), self - (uf - batch_type(`1.`))) / uf;
1747	hu = (hu & i_type(`0x000fffff`)) + `0x3fe6a09e`;
1748	batch_type f = ::xsimd::bitwise_cast<batch_type>((hu << `32`) \| (i_type(`0xffffffff`) & ::xsimd::bitwise_cast<i_type>(uf)));
1749	f = --f;
1750	batch_type hfsq = batch_type(`0.5`) * f * f;
1751	batch_type s = f / (batch_type(`2.`) + f);
1752	batch_type z = s * s;
1753	batch_type w = z * z;
1754	batch_type t1 = w * detail::horner<batch_type, `0x3fd999999997fa04ll`, `0x3fcc71c51d8e78afll`, `0x3fc39a09d078c69fll`>(w);
1755	batch_type t2 = z * detail::horner<batch_type, `0x3fe5555555555593ll`, `0x3fd2492494229359ll`, `0x3fc7466496cb03dell`, `0x3fc2f112df3e5244ll`>(w);
1756	batch_type R = t2 + t1;
1757	batch_type dk = to_float(k);
1758	batch_type r = fma(dk, constants::log_2hi<batch_type>(), fma(s, hfsq + R, dk * constants::log_2lo<batch_type>() + c) - hfsq + f);
1759	#ifndef XSIMD_NO_INFINITIES
1760	batch_type zz = select(isnez, select(self == constants::infinity<batch_type>(), constants::infinity<batch_type>(), r), constants::minusinfinity<batch_type>());
1761	#else
1762	batch_type zz = select(isnez, r, constants::minusinfinity<batch_type>());
1763	#endif
1764	return select(!(uf >= batch_type(`0.`)), constants::nan<batch_type>(), zz);
1765	}
1766
1767	template <class A, class T>
1768	inline batch<std::complex<T>, A> log1p(batch<std::complex<T>, A> const& self, requires_arch<generic>) noexcept
1769	{
1770	using batch_type = batch<std::complex<T>, A>;
1771	using real_batch = typename batch_type::real_batch;
1772	batch_type u = `1` + self;
1773	batch_type logu = log(u);
1774	return select(u == batch_type(`1.`),
1775	self,
1776	select(u.real() <= real_batch(`0.`),
1777	logu,
1778	logu * self / (u - batch_type(`1.`))));
1779	}
1780
1781	// mod
1782	template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1783	inline batch<T, A> mod(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
1784	{
1785	return detail::apply([](T x, T y) noexcept -> T
1786	{ return x % y; },
1787	self, other);
1788	}
1789
1790	// nearbyint
1791	template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1792	inline batch<T, A> nearbyint(batch<T, A> const& self, requires_arch<generic>) noexcept
1793	{
1794	return self;
1795	}
1796	namespace detail
1797	{
1798	template <class A, class T>
1799	inline batch<T, A> nearbyintf(batch<T, A> const& self) noexcept
1800	{
1801	using batch_type = batch<T, A>;
1802	batch_type s = bitofsign(self);
1803	batch_type v = self ^ s;
1804	batch_type t2n = constants::twotonmb<batch_type>();
1805	// Under fast-math, reordering is possible and the compiler optimizes d
1806	// to v. That's not what we want, so prevent compiler optimization here.
1807	// FIXME: it may be better to emit a memory barrier here (?).
1808	#ifdef __FAST_MATH__
1809	volatile batch_type d0 = v + t2n;
1810	batch_type d = (batch_type)(void*)(&d0) - t2n;
1811	#else
1812	batch_type d0 = v + t2n;
1813	batch_type d = d0 - t2n;
1814	#endif
1815	return s ^ select(v < t2n, d, v);
1816	}
1817	}
1818	template <class A>
1819	inline batch<float, A> nearbyint(batch<float, A> const& self, requires_arch<generic>) noexcept
1820	{
1821	return detail::nearbyintf(self);
1822	}
1823	template <class A>
1824	inline batch<double, A> nearbyint(batch<double, A> const& self, requires_arch<generic>) noexcept
1825	{
1826	return detail::nearbyintf(self);
1827	}
1828
1829	// nearbyint_as_int
1830	template <class T, class A, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
1831	inline batch<T, A> nearbyint_as_int(batch<T, A> const& self, requires_arch<generic>) noexcept
1832	{
1833	return self;
1834	}
1835
1836	// nearbyint_as_int
1837	template <class A>
1838	inline batch<as_integer_t<float>, A>
1839	nearbyint_as_int(batch<float, A> const& self, requires_arch<generic>) noexcept
1840	{
1841	using U = as_integer_t<float>;
1842	return kernel::detail::apply_transform<U>([](float x) noexcept -> U
1843	{ return std::lroundf(x: x); },
1844	self);
1845	}
1846
1847	template <class A>
1848	inline batch<as_integer_t<double>, A>
1849	nearbyint_as_int(batch<double, A> const& self, requires_arch<generic>) noexcept
1850	{
1851	using U = as_integer_t<double>;
1852	return kernel::detail::apply_transform<U>([](double x) noexcept -> U
1853	{ return std::llround(x: x); },
1854	self);
1855	}
1856
1857	// nextafter
1858	namespace detail
1859	{
1860	template <class T, class A, bool is_int = std::is_integral<T>::value>
1861	struct nextafter_kernel
1862	{
1863	using batch_type = batch<T, A>;
1864
1865	static inline batch_type next(batch_type const& b) noexcept
1866	{
1867	return b;
1868	}
1869
1870	static inline batch_type prev(batch_type const& b) noexcept
1871	{
1872	return b;
1873	}
1874	};
1875
1876	template <class T, class A>
1877	struct bitwise_cast_batch;
1878
1879	template <class A>
1880	struct bitwise_cast_batch<float, A>
1881	{
1882	using type = batch<int32_t, A>;
1883	};
1884
1885	template <class A>
1886	struct bitwise_cast_batch<double, A>
1887	{
1888	using type = batch<int64_t, A>;
1889	};
1890
1891	template <class T, class A>
1892	struct nextafter_kernel<T, A, false>
1893	{
1894	using batch_type = batch<T, A>;
1895	using int_batch = typename bitwise_cast_batch<T, A>::type;
1896	using int_type = typename int_batch::value_type;
1897
1898	static inline batch_type next(const batch_type& b) noexcept
1899	{
1900	batch_type n = ::xsimd::bitwise_cast<batch_type>(::xsimd::bitwise_cast<int_batch>(b) + int_type(`1`));
1901	return select(b == constants::infinity<batch_type>(), b, n);
1902	}
1903
1904	static inline batch_type prev(const batch_type& b) noexcept
1905	{
1906	batch_type p = ::xsimd::bitwise_cast<batch_type>(::xsimd::bitwise_cast<int_batch>(b) - int_type(`1`));
1907	return select(b == constants::minusinfinity<batch_type>(), b, p);
1908	}
1909	};
1910	}
1911	template <class A, class T>
1912	inline batch<T, A> nextafter(batch<T, A> const& from, batch<T, A> const& to, requires_arch<generic>) noexcept
1913	{
1914	using kernel = detail::nextafter_kernel<T, A>;
1915	return select(from == to, from,
1916	select(to > from, kernel::next(from), kernel::prev(from)));
1917	}
1918
1919	// pow
1920	/ origin: boost/simd/arch/common/simd/function/pow.hpp/
1921	/*
1922	* ====================================================
1923	* copyright 2016 NumScale SAS
1924	*
1925	* Distributed under the Boost Software License, Version 1.0.
1926	* (See copy at http://boost.org/LICENSE_1_0.txt)
1927	* ====================================================
1928	*/
1929	template <class A, class T>
1930	inline batch<T, A> pow(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
1931	{
1932	using batch_type = batch<T, A>;
1933	const auto zero = batch_type(`0.`);
1934	auto negx = self < zero;
1935	auto iszero = self == zero;
1936	constexpr T e = static_cast<T>(`2.718281828459045`);
1937	auto adj_self = select(iszero, batch_type(e), abs(self));
1938	batch_type z = exp(other * log(adj_self));
1939	z = select(iszero, zero, z);
1940	z = select(is_odd(other) && negx, -z, z);
1941	auto invalid = negx && !(is_flint(other) \|\| isinf(other));
1942	return select(invalid, constants::nan<batch_type>(), z);
1943	}
1944
1945	template <class A, class T>
1946	inline batch<std::complex<T>, A> pow(const batch<std::complex<T>, A>& a, const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
1947	{
1948	using cplx_batch = batch<std::complex<T>, A>;
1949	using real_batch = typename cplx_batch::real_batch;
1950	real_batch absa = abs(a);
1951	real_batch arga = arg(a);
1952	real_batch x = z.real();
1953	real_batch y = z.imag();
1954	real_batch r = pow(absa, x);
1955	real_batch theta = x * arga;
1956	real_batch ze(`0`);
1957	auto cond = (y == ze);
1958	r = select(cond, r, r * exp(-y * arga));
1959	theta = select(cond, theta, theta + y * log(absa));
1960	return select(absa == ze, cplx_batch(ze), cplx_batch(r * cos(theta), r * sin(theta)));
1961	}
1962
1963	// reciprocal
1964	template <class T, class A, class = typename std::enable_if<std::is_floating_point<T>::value, void>::type>
1965	inline batch<T, A> reciprocal(batch<T, A> const& self,
1966	requires_arch<generic>) noexcept
1967	{
1968	using batch_type = batch<T, A>;
1969	return div(batch_type(`1`), self);
1970	}
1971
1972	// reduce_add
1973	template <class A, class T>
1974	inline std::complex<T> reduce_add(batch<std::complex<T>, A> const& self, requires_arch<generic>) noexcept
1975	{
1976	return { reduce_add(self.real()), reduce_add(self.imag()) };
1977	}
1978
1979	namespace detail
1980	{
1981	template <class T, T N>
1982	struct split_high
1983	{
1984	static constexpr T get(T i, T)
1985	{
1986	return i >= N ? (i % `2`) : i + N;
1987	}
1988	};
1989
1990	template <class Op, class A, class T>
1991	inline T reduce(Op, batch<T, A> const& self, std::integral_constant<unsigned, `1`>) noexcept
1992	{
1993	return self.get(`0`);
1994	}
1995
1996	template <class Op, class A, class T, unsigned Lvl>
1997	inline T reduce(Op op, batch<T, A> const& self, std::integral_constant<unsigned, Lvl>) noexcept
1998	{
1999	using index_type = as_unsigned_integer_t<T>;
2000	batch<T, A> split = swizzle(self, make_batch_constant<batch<index_type, A>, split_high<index_type, Lvl / `2`>>());
2001	return reduce(op, op(split, self), std::integral_constant<unsigned, Lvl / `2`>());
2002	}
2003	}
2004
2005	// reduce_max
2006	template <class A, class T>
2007	inline T reduce_max(batch<T, A> const& self, requires_arch<generic>) noexcept
2008	{
2009	return detail::reduce([](batch<T, A> const& x, batch<T, A> const& y)
2010	{ return max(x, y); },
2011	self, std::integral_constant<unsigned, batch<T, A>::size>());
2012	}
2013
2014	// reduce_min
2015	template <class A, class T>
2016	inline T reduce_min(batch<T, A> const& self, requires_arch<generic>) noexcept
2017	{
2018	return detail::reduce([](batch<T, A> const& x, batch<T, A> const& y)
2019	{ return min(x, y); },
2020	self, std::integral_constant<unsigned, batch<T, A>::size>());
2021	}
2022
2023	// remainder
2024	template <class A>
2025	inline batch<float, A> remainder(batch<float, A> const& self, batch<float, A> const& other, requires_arch<generic>) noexcept
2026	{
2027	return fnma(nearbyint(self / other), other, self);
2028	}
2029	template <class A>
2030	inline batch<double, A> remainder(batch<double, A> const& self, batch<double, A> const& other, requires_arch<generic>) noexcept
2031	{
2032	return fnma(nearbyint(self / other), other, self);
2033	}
2034	template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
2035	inline batch<T, A> remainder(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
2036	{
2037	auto mod = self % other;
2038	return select(mod <= other / `2`, mod, mod - other);
2039	}
2040
2041	// select
2042	template <class A, class T>
2043	inline batch<std::complex<T>, A> select(batch_bool<T, A> const& cond, batch<std::complex<T>, A> const& true_br, batch<std::complex<T>, A> const& false_br, requires_arch<generic>) noexcept
2044	{
2045	return { select(cond, true_br.real(), false_br.real()), select(cond, true_br.imag(), false_br.imag()) };
2046	}
2047
2048	// sign
2049	template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
2050	inline batch<T, A> sign(batch<T, A> const& self, requires_arch<generic>) noexcept
2051	{
2052	using batch_type = batch<T, A>;
2053	batch_type res = select(self > batch_type(`0`), batch_type(`1`), batch_type(`0`)) - select(self < batch_type(`0`), batch_type(`1`), batch_type(`0`));
2054	return res;
2055	}
2056
2057	namespace detail
2058	{
2059	template <class T, class A>
2060	inline batch<T, A> signf(batch<T, A> const& self) noexcept
2061	{
2062	using batch_type = batch<T, A>;
2063	batch_type res = select(self > batch_type(`0.f`), batch_type(`1.f`), batch_type(`0.f`)) - select(self < batch_type(`0.f`), batch_type(`1.f`), batch_type(`0.f`));
2064	#ifdef XSIMD_NO_NANS
2065	return res;
2066	#else
2067	return select(isnan(self), constants::nan<batch_type>(), res);
2068	#endif
2069	}
2070	}
2071
2072	template <class A>
2073	inline batch<float, A> sign(batch<float, A> const& self, requires_arch<generic>) noexcept
2074	{
2075	return detail::signf(self);
2076	}
2077	template <class A>
2078	inline batch<double, A> sign(batch<double, A> const& self, requires_arch<generic>) noexcept
2079	{
2080	return detail::signf(self);
2081	}
2082	template <class A, class T>
2083	inline batch<std::complex<T>, A> sign(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
2084	{
2085	using batch_type = batch<std::complex<T>, A>;
2086	using real_batch = typename batch_type::real_batch;
2087	auto rz = z.real();
2088	auto iz = z.imag();
2089	return select(rz != real_batch(`0.`),
2090	batch_type(sign(rz)),
2091	batch_type(sign(iz)));
2092	}
2093
2094	// signnz
2095	template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
2096	inline batch<T, A> signnz(batch<T, A> const& self, requires_arch<generic>) noexcept
2097	{
2098	using batch_type = batch<T, A>;
2099	return (self >> (sizeof(T) * `8` - `1`)) \| batch_type(`1.`);
2100	}
2101
2102	namespace detail
2103	{
2104	template <class T, class A>
2105	inline batch<T, A> signnzf(batch<T, A> const& self) noexcept
2106	{
2107	using batch_type = batch<T, A>;
2108	#ifndef XSIMD_NO_NANS
2109	return select(isnan(self), constants::nan<batch_type>(), batch_type(`1.`) \| (constants::signmask<batch_type>() & self));
2110	#else
2111	return batch_type(`1.`) \| (constants::signmask<batch_type>() & self);
2112	#endif
2113	}
2114	}
2115
2116	template <class A>
2117	inline batch<float, A> signnz(batch<float, A> const& self, requires_arch<generic>) noexcept
2118	{
2119	return detail::signnzf(self);
2120	}
2121	template <class A>
2122	inline batch<double, A> signnz(batch<double, A> const& self, requires_arch<generic>) noexcept
2123	{
2124	return detail::signnzf(self);
2125	}
2126
2127	// sqrt
2128	template <class A, class T>
2129	inline batch<std::complex<T>, A> sqrt(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
2130	{
2131
2132	constexpr T csqrt_scale_factor = std::is_same<T, float>::value ? `6.7108864e7f` : `1.8014398509481984e16`;
2133	constexpr T csqrt_scale = std::is_same<T, float>::value ? `1.220703125e-4f` : `7.450580596923828125e-9`;
2134	using batch_type = batch<std::complex<T>, A>;
2135	using real_batch = batch<T, A>;
2136	real_batch x = z.real();
2137	real_batch y = z.imag();
2138	real_batch sqrt_x = sqrt(fabs(x));
2139	real_batch sqrt_hy = sqrt(`0.5` * fabs(y));
2140	auto cond = (fabs(x) > real_batch(`4.`) \|\| fabs(y) > real_batch(`4.`));
2141	x = select(cond, x * `0.25`, x * csqrt_scale_factor);
2142	y = select(cond, y * `0.25`, y * csqrt_scale_factor);
2143	real_batch scale = select(cond, real_batch(`2.`), real_batch(csqrt_scale));
2144	real_batch r = abs(batch_type(x, y));
2145
2146	auto condxp = x > real_batch(`0.`);
2147	real_batch t0 = select(condxp, xsimd::sqrt(`0.5` * (r + x)), xsimd::sqrt(`0.5` * (r - x)));
2148	real_batch r0 = scale * fabs((`0.5` * y) / t0);
2149	t0 *= scale;
2150	real_batch t = select(condxp, t0, r0);
2151	r = select(condxp, r0, t0);
2152	batch_type resg = select(y < real_batch(`0.`), batch_type(t, -r), batch_type(t, r));
2153	real_batch ze(`0.`);
2154
2155	return select(y == ze,
2156	select(x == ze,
2157	batch_type(ze, ze),
2158	select(x < ze, batch_type(ze, sqrt_x), batch_type(sqrt_x, ze))),
2159	select(x == ze,
2160	select(y > ze, batch_type(sqrt_hy, sqrt_hy), batch_type(sqrt_hy, -sqrt_hy)),
2161	resg));
2162	}
2163
2164	// tgamma
2165
2166	namespace detail
2167	{
2168	/ origin: boost/simd/arch/common/detail/generic/stirling_kernel.hpp /
2169	/*
2170	* ====================================================
2171	* copyright 2016 NumScale SAS
2172	*
2173	* Distributed under the Boost Software License, Version 1.0.
2174	* (See copy at http://boost.org/LICENSE_1_0.txt)
2175	* ====================================================
2176	*/
2177	template <class B>
2178	struct stirling_kernel;
2179
2180	template <class A>
2181	struct stirling_kernel<batch<float, A>>
2182	{
2183	using batch_type = batch<float, A>;
2184	static inline batch_type compute(const batch_type& x) noexcept
2185	{
2186	return horner<batch_type,
2187	`0x3daaaaab`,
2188	`0x3b638e39`,
2189	`0xbb2fb930`,
2190	`0xb970b359`>(x);
2191	}
2192
2193	static inline batch_type split_limit() noexcept
2194	{
2195	return batch_type(bit_cast<float>(val: uint32_t(`0x41d628f6`)));
2196	}
2197
2198	static inline batch_type large_limit() noexcept
2199	{
2200	return batch_type(bit_cast<float>(val: uint32_t(`0x420c28f3`)));
2201	}
2202	};
2203
2204	template <class A>
2205	struct stirling_kernel<batch<double, A>>
2206	{
2207	using batch_type = batch<double, A>;
2208	static inline batch_type compute(const batch_type& x) noexcept
2209	{
2210	return horner<batch_type,
2211	`0x3fb5555555555986ull`, // 8.33333333333482257126E-2
2212	`0x3f6c71c71b98c5fdull`, // 3.47222221605458667310E-3
2213	`0xbf65f72607d44fd7ull`, // -2.68132617805781232825E-3
2214	`0xbf2e166b27e61d7cull`, // -2.29549961613378126380E-4
2215	`0x3f49cc72592d7293ull` // 7.87311395793093628397E-4
2216	>(x);
2217	}
2218
2219	static inline batch_type split_limit() noexcept
2220	{
2221	return batch_type(bit_cast<double>(val: uint64_t(`0x4061e083ba3443d4`)));
2222	}
2223
2224	static inline batch_type large_limit() noexcept
2225	{
2226	return batch_type(bit_cast<double>(val: uint64_t(`0x4065800000000000`)));
2227	}
2228	};
2229
2230	/ origin: boost/simd/arch/common/simd/function/stirling.hpp /
2231	/*
2232	* ====================================================
2233	* copyright 2016 NumScale SAS
2234	*
2235	* Distributed under the Boost Software License, Version 1.0.
2236	* (See copy at http://boost.org/LICENSE_1_0.txt)
2237	* ====================================================
2238	*/
2239	template <class T, class A>
2240	inline batch<T, A> stirling(const batch<T, A>& a) noexcept
2241	{
2242	using batch_type = batch<T, A>;
2243	const batch_type stirlingsplitlim = stirling_kernel<batch_type>::split_limit();
2244	const batch_type stirlinglargelim = stirling_kernel<batch_type>::large_limit();
2245	batch_type x = select(a >= batch_type(`0.`), a, constants::nan<batch_type>());
2246	batch_type w = batch_type(`1.`) / x;
2247	w = fma(w, stirling_kernel<batch_type>::compute(w), batch_type(`1.`));
2248	batch_type y = exp(-x);
2249	auto test = (x < stirlingsplitlim);
2250	batch_type z = x - batch_type(`0.5`);
2251	z = select(test, z, batch_type(`0.5`) * z);
2252	batch_type v = exp(z * log(abs(x)));
2253	y *= v;
2254	y = select(test, y, y * v);
2255	y = constants::sqrt_2pi<batch_type>() w;
2256	#ifndef XSIMD_NO_INFINITIES
2257	y = select(isinf(x), x, y);
2258	#endif
2259	return select(x > stirlinglargelim, constants::infinity<batch_type>(), y);
2260	}
2261
2262	/ origin: boost/simd/arch/common/detail/generic/gamma_kernel.hpp /
2263	/*
2264	* ====================================================
2265	* copyright 2016 NumScale SAS
2266	*
2267	* Distributed under the Boost Software License, Version 1.0.
2268	* (See copy at http://boost.org/LICENSE_1_0.txt)
2269	* ====================================================
2270	*/
2271	template <class B>
2272	struct tgamma_kernel;
2273
2274	template <class A>
2275	struct tgamma_kernel<batch<float, A>>
2276	{
2277	using batch_type = batch<float, A>;
2278	static inline batch_type compute(const batch_type& x) noexcept
2279	{
2280	return horner<batch_type,
2281	`0x3f800000UL`, // 9.999999757445841E-01
2282	`0x3ed87799UL`, // 4.227874605370421E-01
2283	`0x3ed2d411UL`, // 4.117741948434743E-01
2284	`0x3da82a34UL`, // 8.211174403261340E-02
2285	`0x3d93ae7cUL`, // 7.211014349068177E-02
2286	`0x3b91db14UL`, // 4.451165155708328E-03
2287	`0x3ba90c99UL`, // 5.158972571345137E-03
2288	`0x3ad28b22UL` // 1.606319369134976E-03
2289	>(x);
2290	}
2291	};
2292
2293	template <class A>
2294	struct tgamma_kernel<batch<double, A>>
2295	{
2296	using batch_type = batch<double, A>;
2297	static inline batch_type compute(const batch_type& x) noexcept
2298	{
2299	return horner<batch_type,
2300	`0x3ff0000000000000ULL`, // 9.99999999999999996796E-1
2301	`0x3fdfa1373993e312ULL`, // 4.94214826801497100753E-1
2302	`0x3fca8da9dcae7d31ULL`, // 2.07448227648435975150E-1
2303	`0x3fa863d918c423d3ULL`, // 4.76367800457137231464E-2
2304	`0x3f8557cde9db14b0ULL`, // 1.04213797561761569935E-2
2305	`0x3f5384e3e686bfabULL`, // 1.19135147006586384913E-3
2306	`0x3f24fcb839982153ULL` // 1.60119522476751861407E-4
2307	>(x)
2308	/ horner<batch_type,
2309	`0x3ff0000000000000ULL`, // 1.00000000000000000320E00
2310	`0x3fb24944c9cd3c51ULL`, // 7.14304917030273074085E-2
2311	`0xbfce071a9d4287c2ULL`, // -2.34591795718243348568E-1
2312	`0x3fa25779e33fde67ULL`, // 3.58236398605498653373E-2
2313	`0x3f8831ed5b1bb117ULL`, // 1.18139785222060435552E-2
2314	`0xBf7240e4e750b44aULL`, // -4.45641913851797240494E-3
2315	`0x3f41ae8a29152573ULL`, // 5.39605580493303397842E-4
2316	`0xbef8487a8400d3aFULL` // -2.31581873324120129819E-5
2317	>(x);
2318	}
2319	};
2320
2321	/ origin: boost/simd/arch/common/simd/function/gamma.hpp /
2322	/*
2323	* ====================================================
2324	* copyright 2016 NumScale SAS
2325	*
2326	* Distributed under the Boost Software License, Version 1.0.
2327	* (See copy at http://boost.org/LICENSE_1_0.txt)
2328	* ====================================================
2329	*/
2330	template <class B>
2331	inline B tgamma_large_negative(const B& a) noexcept
2332	{
2333	B st = stirling(a);
2334	B p = floor(a);
2335	B sgngam = select(is_even(p), -B(`1.`), B(`1.`));
2336	B z = a - p;
2337	auto test2 = z < B(`0.5`);
2338	z = select(test2, z - B(`1.`), z);
2339	z = a * sin(z, trigo_pi_tag ());
2340	z = abs(z);
2341	return sgngam * constants::pi<B>() / (z * st);
2342	}
2343
2344	template <class B, class BB>
2345	inline B tgamma_other(const B& a, const BB& test) noexcept
2346	{
2347	B x = select(test, B(`2.`), a);
2348	#ifndef XSIMD_NO_INFINITIES
2349	auto inf_result = (a == constants::infinity<B>());
2350	x = select(inf_result, B(`2.`), x);
2351	#endif
2352	B z = B(`1.`);
2353	auto test1 = (x >= B(`3.`));
2354	while (any(test1))
2355	{
2356	x = select(test1, x - B(`1.`), x);
2357	z = select(test1, z * x, z);
2358	test1 = (x >= B(`3.`));
2359	}
2360	test1 = (x < B(`0.`));
2361	while (any(test1))
2362	{
2363	z = select(test1, z / x, z);
2364	x = select(test1, x + B(`1.`), x);
2365	test1 = (x < B(`0.`));
2366	}
2367	auto test2 = (x < B(`2.`));
2368	while (any(test2))
2369	{
2370	z = select(test2, z / x, z);
2371	x = select(test2, x + B(`1.`), x);
2372	test2 = (x < B(`2.`));
2373	}
2374	x = z * tgamma_kernel<B>::compute(x - B(`2.`));
2375	#ifndef XSIMD_NO_INFINITIES
2376	return select(inf_result, a, x);
2377	#else
2378	return x;
2379	#endif
2380	}
2381	}
2382
2383	template <class A, class T>
2384	inline batch<T, A> tgamma(batch<T, A> const& self, requires_arch<generic>) noexcept
2385	{
2386	using batch_type = batch<T, A>;
2387	auto nan_result = (self < batch_type(`0.`) && is_flint(self));
2388	#ifndef XSIMD_NO_INVALIDS
2389	nan_result = isnan(self) \|\| nan_result;
2390	#endif
2391	batch_type q = abs(self);
2392	auto test = (self < batch_type(-`33.`));
2393	batch_type r = constants::nan<batch_type>();
2394	if (any(test))
2395	{
2396	r = detail::tgamma_large_negative(q);
2397	if (all(test))
2398	return select(nan_result, constants::nan<batch_type>(), r);
2399	}
2400	batch_type r1 = detail::tgamma_other(self, test);
2401	batch_type r2 = select(test, r, r1);
2402	return select(self == batch_type(`0.`), copysign(constants::infinity<batch_type>(), self), select(nan_result, constants::nan<batch_type>(), r2));
2403	}
2404
2405	}
2406
2407	}
2408
2409	#endif
2410

Browse the source code of Velox/build/_deps/xsimd-src/include/xsimd/arch/generic/xsimd_generic_math.hpp