1/***************************************************************************
2 * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3 * Martin Renou *
4 * Copyright (c) QuantStack *
5 * Copyright (c) Serge Guelton *
6 * *
7 * Distributed under the terms of the BSD 3-Clause License. *
8 * *
9 * The full license is in the file LICENSE, distributed with this software. *
10 ****************************************************************************/
11
12#ifndef XSIMD_GENERIC_TRIGO_HPP
13#define XSIMD_GENERIC_TRIGO_HPP
14
15#include "./xsimd_generic_details.hpp"
16
17#include <array>
18
19namespace xsimd
20{
21
22 namespace kernel
23 {
24 /* origin: boost/simd/arch/common/detail/simd/trig_base.hpp */
25 /*
26 * ====================================================
27 * copyright 2016 NumScale SAS
28 *
29 * Distributed under the Boost Software License, Version 1.0.
30 * (See copy at http://boost.org/LICENSE_1_0.txt)
31 * ====================================================
32 */
33
34 using namespace types;
35
36 // acos
37 template <class A, class T>
38 inline batch<T, A> acos(batch<T, A> const& self, requires_arch<generic>) noexcept
39 {
40 using batch_type = batch<T, A>;
41 batch_type x = abs(self);
42 auto x_larger_05 = x > batch_type(0.5);
43 x = select(x_larger_05, sqrt(fma(batch_type(-0.5), x, batch_type(0.5))), self);
44 x = asin(x);
45 x = select(x_larger_05, x + x, x);
46 x = select(self < batch_type(-0.5), constants::pi<batch_type>() - x, x);
47 return select(x_larger_05, x, constants::pio2<batch_type>() - x);
48 }
49 template <class A, class T>
50 inline batch<std::complex<T>, A> acos(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
51 {
52 using batch_type = batch<std::complex<T>, A>;
53 using real_batch = typename batch_type::real_batch;
54 batch_type tmp = asin(z);
55 return { constants::pio2<real_batch>() - tmp.real(), -tmp.imag() };
56 }
57
58 // acosh
59 /* origin: boost/simd/arch/common/simd/function/acosh.hpp */
60 /*
61 * ====================================================
62 * copyright 2016 NumScale SAS
63 *
64 * Distributed under the Boost Software License, Version 1.0.
65 * (See copy at http://boost.org/LICENSE_1_0.txt)
66 * ====================================================
67 */
68 template <class A, class T>
69 inline batch<T, A> acosh(batch<T, A> const& self, requires_arch<generic>) noexcept
70 {
71 using batch_type = batch<T, A>;
72 batch_type x = self - batch_type(1.);
73 auto test = x > constants::oneotwoeps<batch_type>();
74 batch_type z = select(test, self, x + sqrt(x + x + x * x));
75 batch_type l1pz = log1p(z);
76 return select(test, l1pz + constants::log_2<batch_type>(), l1pz);
77 }
78 template <class A, class T>
79 inline batch<std::complex<T>, A> acosh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
80 {
81 using batch_type = batch<std::complex<T>, A>;
82 batch_type w = acos(z);
83 w = batch_type(-w.imag(), w.real());
84 return w;
85 }
86
87 // asin
88 template <class A>
89 inline batch<float, A> asin(batch<float, A> const& self, requires_arch<generic>) noexcept
90 {
91 using batch_type = batch<float, A>;
92 batch_type x = abs(self);
93 batch_type sign = bitofsign(self);
94 auto x_larger_05 = x > batch_type(0.5);
95 batch_type z = select(x_larger_05, batch_type(0.5) * (batch_type(1.) - x), x * x);
96 x = select(x_larger_05, sqrt(z), x);
97 batch_type z1 = detail::horner<batch_type,
98 0x3e2aaae4,
99 0x3d9980f6,
100 0x3d3a3ec7,
101 0x3cc617e3,
102 0x3d2cb352>(z);
103 z1 = fma(z1, z * x, x);
104 z = select(x_larger_05, constants::pio2<batch_type>() - (z1 + z1), z1);
105 return z ^ sign;
106 }
107 template <class A>
108 inline batch<double, A> asin(batch<double, A> const& self, requires_arch<generic>) noexcept
109 {
110 using batch_type = batch<double, A>;
111 batch_type x = abs(self);
112 auto small_cond = x < constants::sqrteps<batch_type>();
113 batch_type ct1 = batch_type(bit_cast<double>(val: int64_t(0x3fe4000000000000)));
114 batch_type zz1 = batch_type(1.) - x;
115 batch_type vp = zz1 * detail::horner<batch_type, 0x403c896240f3081dull, 0xc03991aaac01ab68ull, 0x401bdff5baf33e6aull, 0xbfe2079259f9290full, 0x3f684fc3988e9f08ull>(zz1) / detail::horner1<batch_type, 0x40756709b0b644beull, 0xc077fe08959063eeull, 0x40626219af6a7f42ull, 0xc035f2a2b6bf5d8cull>(zz1);
116 zz1 = sqrt(zz1 + zz1);
117 batch_type z = constants::pio4<batch_type>() - zz1;
118 zz1 = fms(zz1, vp, constants::pio_2lo<batch_type>());
119 z = z - zz1;
120 zz1 = z + constants::pio4<batch_type>();
121 batch_type zz2 = self * self;
122 z = zz2 * detail::horner<batch_type, 0xc020656c06ceafd5ull, 0x40339007da779259ull, 0xc0304331de27907bull, 0x4015c74b178a2dd9ull, 0xbfe34341333e5c16ull, 0x3f716b9b0bd48ad3ull>(zz2) / detail::horner1<batch_type, 0xc04898220a3607acull, 0x4061705684ffbf9dull, 0xc06265bb6d3576d7ull, 0x40519fc025fe9054ull, 0xc02d7b590b5e0eabull>(zz2);
123 zz2 = fma(x, z, x);
124 return select(x > batch_type(1.), constants::nan<batch_type>(),
125 select(small_cond, x,
126 select(x > ct1, zz1, zz2))
127 ^ bitofsign(self));
128 }
129 template <class A, class T>
130 inline batch<std::complex<T>, A> asin(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
131 {
132 using batch_type = batch<std::complex<T>, A>;
133 using real_batch = typename batch_type::real_batch;
134 real_batch x = z.real();
135 real_batch y = z.imag();
136
137 batch_type ct(-y, x);
138 batch_type zz(real_batch(1.) - (x - y) * (x + y), -2 * x * y);
139 zz = log(ct + sqrt(zz));
140 batch_type resg(zz.imag(), -zz.real());
141
142 return select(y == real_batch(0.),
143 select(fabs(x) > real_batch(1.),
144 batch_type(constants::pio2<real_batch>(), real_batch(0.)),
145 batch_type(asin(x), real_batch(0.))),
146 resg);
147 }
148
149 // asinh
150 /* origin: boost/simd/arch/common/simd/function/asinh.hpp */
151 /*
152 * ====================================================
153 * copyright 2016 NumScale SAS
154 *
155 * Distributed under the Boost Software License, Version 1.0.
156 * (See copy at http://boost.org/LICENSE_1_0.txt)
157 * ====================================================
158 */
159 namespace detail
160 {
161 template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
162 inline batch<T, A>
163 average(const batch<T, A>& x1, const batch<T, A>& x2) noexcept
164 {
165 return (x1 & x2) + ((x1 ^ x2) >> 1);
166 }
167
168 template <class A, class T>
169 inline batch<T, A>
170 averagef(const batch<T, A>& x1, const batch<T, A>& x2) noexcept
171 {
172 using batch_type = batch<T, A>;
173 return fma(x1, batch_type(0.5), x2 * batch_type(0.5));
174 }
175 template <class A>
176 inline batch<float, A> average(batch<float, A> const& x1, batch<float, A> const& x2) noexcept
177 {
178 return averagef(x1, x2);
179 }
180 template <class A>
181 inline batch<double, A> average(batch<double, A> const& x1, batch<double, A> const& x2) noexcept
182 {
183 return averagef(x1, x2);
184 }
185 }
186 template <class A>
187 inline batch<float, A> asinh(batch<float, A> const& self, requires_arch<generic>) noexcept
188 {
189 using batch_type = batch<float, A>;
190 batch_type x = abs(self);
191 auto lthalf = x < batch_type(0.5);
192 batch_type x2 = x * x;
193 batch_type bts = bitofsign(self);
194 batch_type z(0.);
195 if (any(lthalf))
196 {
197 z = detail::horner<batch_type,
198 0x3f800000,
199 0xbe2aa9ad,
200 0x3d9949b1,
201 0xbd2ee581,
202 0x3ca4d6e6>(x2)
203 * x;
204 if (all(lthalf))
205 return z ^ bts;
206 }
207 batch_type tmp = select(x > constants::oneosqrteps<batch_type>(), x, detail::average(x, hypot(batch_type(1.), x)));
208#ifndef XSIMD_NO_NANS
209 return select(isnan(self), constants::nan<batch_type>(), select(lthalf, z, log(tmp) + constants::log_2<batch_type>()) ^ bts);
210#else
211 return select(lthalf, z, log(tmp) + constants::log_2<batch_type>()) ^ bts;
212#endif
213 }
214 template <class A>
215 inline batch<double, A> asinh(batch<double, A> const& self, requires_arch<generic>) noexcept
216 {
217 using batch_type = batch<double, A>;
218 batch_type x = abs(self);
219 auto test = x > constants::oneosqrteps<batch_type>();
220 batch_type z = select(test, x - batch_type(1.), x + x * x / (batch_type(1.) + hypot(batch_type(1.), x)));
221#ifndef XSIMD_NO_INFINITIES
222 z = select(x == constants::infinity<batch_type>(), x, z);
223#endif
224 batch_type l1pz = log1p(z);
225 z = select(test, l1pz + constants::log_2<batch_type>(), l1pz);
226 return bitofsign(self) ^ z;
227 }
228 template <class A, class T>
229 inline batch<std::complex<T>, A> asinh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
230 {
231 using batch_type = batch<std::complex<T>, A>;
232 batch_type w = asin(batch_type(-z.imag(), z.real()));
233 w = batch_type(w.imag(), -w.real());
234 return w;
235 }
236
237 // atan
238 namespace detail
239 {
240 template <class A>
241 static inline batch<float, A> kernel_atan(const batch<float, A>& x, const batch<float, A>& recx) noexcept
242 {
243 using batch_type = batch<float, A>;
244 const auto flag1 = x < constants::tan3pio8<batch_type>();
245 const auto flag2 = (x >= batch_type(bit_cast<float>(val: (uint32_t)0x3ed413cd))) && flag1;
246 batch_type yy = select(flag1, batch_type(0.), constants::pio2<batch_type>());
247 yy = select(flag2, constants::pio4<batch_type>(), yy);
248 batch_type xx = select(flag1, x, -recx);
249 xx = select(flag2, (x - batch_type(1.)) / (x + batch_type(1.)), xx);
250 const batch_type z = xx * xx;
251 batch_type z1 = detail::horner<batch_type,
252 0xbeaaaa2aul,
253 0x3e4c925ful,
254 0xbe0e1b85ul,
255 0x3da4f0d1ul>(z);
256 z1 = fma(xx, z1 * z, xx);
257 z1 = select(flag2, z1 + constants::pio_4lo<batch_type>(), z1);
258 z1 = select(!flag1, z1 + constants::pio_2lo<batch_type>(), z1);
259 return yy + z1;
260 }
261 template <class A>
262 static inline batch<double, A> kernel_atan(const batch<double, A>& x, const batch<double, A>& recx) noexcept
263 {
264 using batch_type = batch<double, A>;
265 const auto flag1 = x < constants::tan3pio8<batch_type>();
266 const auto flag2 = (x >= constants::tanpio8<batch_type>()) && flag1;
267 batch_type yy = select(flag1, batch_type(0.), constants::pio2<batch_type>());
268 yy = select(flag2, constants::pio4<batch_type>(), yy);
269 batch_type xx = select(flag1, x, -recx);
270 xx = select(flag2, (x - batch_type(1.)) / (x + batch_type(1.)), xx);
271 batch_type z = xx * xx;
272 z *= detail::horner<batch_type,
273 0xc0503669fd28ec8eull,
274 0xc05eb8bf2d05ba25ull,
275 0xc052c08c36880273ull,
276 0xc03028545b6b807aull,
277 0xbfec007fa1f72594ull>(z)
278 / detail::horner1<batch_type,
279 0x4068519efbbd62ecull,
280 0x407e563f13b049eaull,
281 0x407b0e18d2e2be3bull,
282 0x4064a0dd43b8fa25ull,
283 0x4038dbc45b14603cull>(z);
284 z = fma(xx, z, xx);
285 z = select(flag2, z + constants::pio_4lo<batch_type>(), z);
286 z = z + select(flag1, batch_type(0.), constants::pio_2lo<batch_type>());
287 return yy + z;
288 }
289 }
290 template <class A, class T>
291 inline batch<T, A> atan(batch<T, A> const& self, requires_arch<generic>) noexcept
292 {
293 using batch_type = batch<T, A>;
294 const batch_type absa = abs(self);
295 const batch_type x = detail::kernel_atan(absa, batch_type(1.) / absa);
296 return x ^ bitofsign(self);
297 }
298 template <class A, class T>
299 inline batch<std::complex<T>, A> atan(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
300 {
301 using batch_type = batch<std::complex<T>, A>;
302 using real_batch = typename batch_type::real_batch;
303 real_batch x = z.real();
304 real_batch y = z.imag();
305 real_batch x2 = x * x;
306 real_batch one(1.);
307 real_batch a = one - x2 - (y * y);
308 real_batch w = 0.5 * atan2(2. * x, a);
309 real_batch num = y + one;
310 num = x2 + num * num;
311 real_batch den = y - one;
312 den = x2 + den * den;
313 batch_type res = select((x == real_batch(0.)) && (y == real_batch(1.)),
314 batch_type(real_batch(0.), constants::infinity<real_batch>()),
315 batch_type(w, 0.25 * log(num / den)));
316 return res;
317 }
318
319 // atanh
320 /* origin: boost/simd/arch/common/simd/function/acosh.hpp */
321 /*
322 * ====================================================
323 * copyright 2016 NumScale SAS
324 *
325 * Distributed under the Boost Software License, Version 1.0.
326 * (See copy at http://boost.org/LICENSE_1_0.txt)
327 * ====================================================
328 */
329 template <class A, class T>
330 inline batch<T, A> atanh(batch<T, A> const& self, requires_arch<generic>) noexcept
331 {
332 using batch_type = batch<T, A>;
333 batch_type x = abs(self);
334 batch_type t = x + x;
335 batch_type z = batch_type(1.) - x;
336 auto test = x < batch_type(0.5);
337 batch_type tmp = select(test, x, t) / z;
338 return bitofsign(self) ^ (batch_type(0.5) * log1p(select(test, fma(t, tmp, t), tmp)));
339 }
340 template <class A, class T>
341 inline batch<std::complex<T>, A> atanh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
342 {
343 using batch_type = batch<std::complex<T>, A>;
344 batch_type w = atan(batch_type(-z.imag(), z.real()));
345 w = batch_type(w.imag(), -w.real());
346 return w;
347 }
348
349 // atan2
350 template <class A, class T>
351 inline batch<T, A> atan2(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept
352 {
353 using batch_type = batch<T, A>;
354 const batch_type q = abs(self / other);
355 const batch_type z = detail::kernel_atan(q, batch_type(1.) / q);
356 return select(other > batch_type(0.), z, constants::pi<batch_type>() - z) * signnz(self);
357 }
358
359 // cos
360 namespace detail
361 {
362 template <class T, class A>
363 inline batch<T, A> quadrant(const batch<T, A>& x) noexcept
364 {
365 return x & batch<T, A>(3);
366 }
367
368 template <class A>
369 inline batch<float, A> quadrant(const batch<float, A>& x) noexcept
370 {
371 return to_float(quadrant(to_int(x)));
372 }
373
374 template <class A>
375 inline batch<double, A> quadrant(const batch<double, A>& x) noexcept
376 {
377 using batch_type = batch<double, A>;
378 batch_type a = x * batch_type(0.25);
379 return (a - floor(a)) * batch_type(4.);
380 }
381 /* origin: boost/simd/arch/common/detail/simd/f_trig_evaluation.hpp */
382 /*
383 * ====================================================
384 * copyright 2016 NumScale SAS
385 *
386 * Distributed under the Boost Software License, Version 1.0.
387 * (See copy at http://boost.org/LICENSE_1_0.txt)
388 * ====================================================
389 */
390
391 template <class A>
392 inline batch<float, A> cos_eval(const batch<float, A>& z) noexcept
393 {
394 using batch_type = batch<float, A>;
395 batch_type y = detail::horner<batch_type,
396 0x3d2aaaa5,
397 0xbab60619,
398 0x37ccf5ce>(z);
399 return batch_type(1.) + fma(z, batch_type(-0.5), y * z * z);
400 }
401
402 template <class A>
403 inline batch<float, A> sin_eval(const batch<float, A>& z, const batch<float, A>& x) noexcept
404 {
405 using batch_type = batch<float, A>;
406 batch_type y = detail::horner<batch_type,
407 0xbe2aaaa2,
408 0x3c08839d,
409 0xb94ca1f9>(z);
410 return fma(y * z, x, x);
411 }
412
413 template <class A>
414 static inline batch<float, A> base_tancot_eval(const batch<float, A>& z) noexcept
415 {
416 using batch_type = batch<float, A>;
417 batch_type zz = z * z;
418 batch_type y = detail::horner<batch_type,
419 0x3eaaaa6f,
420 0x3e0896dd,
421 0x3d5ac5c9,
422 0x3cc821b5,
423 0x3b4c779c,
424 0x3c19c53b>(zz);
425 return fma(y, zz * z, z);
426 }
427
428 template <class A, class BB>
429 static inline batch<float, A> tan_eval(const batch<float, A>& z, const BB& test) noexcept
430 {
431 using batch_type = batch<float, A>;
432 batch_type y = base_tancot_eval(z);
433 return select(test, y, -batch_type(1.) / y);
434 }
435
436 template <class A, class BB>
437 static inline batch<float, A> cot_eval(const batch<float, A>& z, const BB& test) noexcept
438 {
439 using batch_type = batch<float, A>;
440 batch_type y = base_tancot_eval(z);
441 return select(test, batch_type(1.) / y, -y);
442 }
443
444 /* origin: boost/simd/arch/common/detail/simd/d_trig_evaluation.hpp */
445 /*
446 * ====================================================
447 * copyright 2016 NumScale SAS
448 *
449 * Distributed under the Boost Software License, Version 1.0.
450 * (See copy at http://boost.org/LICENSE_1_0.txt)
451 * ====================================================
452 */
453 template <class A>
454 static inline batch<double, A> cos_eval(const batch<double, A>& z) noexcept
455 {
456 using batch_type = batch<double, A>;
457 batch_type y = detail::horner<batch_type,
458 0x3fe0000000000000ull,
459 0xbfa5555555555551ull,
460 0x3f56c16c16c15d47ull,
461 0xbefa01a019ddbcd9ull,
462 0x3e927e4f8e06d9a5ull,
463 0xbe21eea7c1e514d4ull,
464 0x3da8ff831ad9b219ull>(z);
465 return batch_type(1.) - y * z;
466 }
467
468 template <class A>
469 static inline batch<double, A> sin_eval(const batch<double, A>& z, const batch<double, A>& x) noexcept
470 {
471 using batch_type = batch<double, A>;
472 batch_type y = detail::horner<batch_type,
473 0xbfc5555555555548ull,
474 0x3f8111111110f7d0ull,
475 0xbf2a01a019bfdf03ull,
476 0x3ec71de3567d4896ull,
477 0xbe5ae5e5a9291691ull,
478 0x3de5d8fd1fcf0ec1ull>(z);
479 return fma(y * z, x, x);
480 }
481
482 template <class A>
483 static inline batch<double, A> base_tancot_eval(const batch<double, A>& z) noexcept
484 {
485 using batch_type = batch<double, A>;
486 batch_type zz = z * z;
487 batch_type num = detail::horner<batch_type,
488 0xc1711fead3299176ull,
489 0x413199eca5fc9dddull,
490 0xc0c992d8d24f3f38ull>(zz);
491 batch_type den = detail::horner1<batch_type,
492 0xc189afe03cbe5a31ull,
493 0x4177d98fc2ead8efull,
494 0xc13427bc582abc96ull,
495 0x40cab8a5eeb36572ull>(zz);
496 return fma(z, (zz * (num / den)), z);
497 }
498
499 template <class A, class BB>
500 static inline batch<double, A> tan_eval(const batch<double, A>& z, const BB& test) noexcept
501 {
502 using batch_type = batch<double, A>;
503 batch_type y = base_tancot_eval(z);
504 return select(test, y, -batch_type(1.) / y);
505 }
506
507 template <class A, class BB>
508 static inline batch<double, A> cot_eval(const batch<double, A>& z, const BB& test) noexcept
509 {
510 using batch_type = batch<double, A>;
511 batch_type y = base_tancot_eval(z);
512 return select(test, batch_type(1.) / y, -y);
513 }
514 /* origin: boost/simd/arch/common/detail/simd/trig_reduction.hpp */
515 /*
516 * ====================================================
517 * copyright 2016 NumScale SAS
518 *
519 * Distributed under the Boost Software License, Version 1.0.
520 * (See copy at http://boost.org/LICENSE_1_0.txt)
521 * ====================================================
522 */
523
524 struct trigo_radian_tag
525 {
526 };
527 struct trigo_pi_tag
528 {
529 };
530
531 template <class B, class Tag = trigo_radian_tag>
532 struct trigo_reducer
533 {
534 static inline B reduce(const B& x, B& xr) noexcept
535 {
536 if (all(x <= constants::pio4<B>()))
537 {
538 xr = x;
539 return B(0.);
540 }
541 else if (all(x <= constants::pio2<B>()))
542 {
543 auto test = x > constants::pio4<B>();
544 xr = x - constants::pio2_1<B>();
545 xr -= constants::pio2_2<B>();
546 xr -= constants::pio2_3<B>();
547 xr = select(test, xr, x);
548 return select(test, B(1.), B(0.));
549 }
550 else if (all(x <= constants::twentypi<B>()))
551 {
552 B xi = nearbyint(x * constants::twoopi<B>());
553 xr = fnma(xi, constants::pio2_1<B>(), x);
554 xr -= xi * constants::pio2_2<B>();
555 xr -= xi * constants::pio2_3<B>();
556 return quadrant(xi);
557 }
558 else if (all(x <= constants::mediumpi<B>()))
559 {
560 B fn = nearbyint(x * constants::twoopi<B>());
561 B r = x - fn * constants::pio2_1<B>();
562 B w = fn * constants::pio2_1t<B>();
563 B t = r;
564 w = fn * constants::pio2_2<B>();
565 r = t - w;
566 w = fn * constants::pio2_2t<B>() - ((t - r) - w);
567 t = r;
568 w = fn * constants::pio2_3<B>();
569 r = t - w;
570 w = fn * constants::pio2_3t<B>() - ((t - r) - w);
571 xr = r - w;
572 return quadrant(fn);
573 }
574 else
575 {
576 static constexpr std::size_t size = B::size;
577 using value_type = typename B::value_type;
578 alignas(B) std::array<value_type, size> tmp;
579 alignas(B) std::array<value_type, size> txr;
580 alignas(B) std::array<value_type, size> args;
581 x.store_aligned(args.data());
582
583 for (std::size_t i = 0; i < size; ++i)
584 {
585 double arg = args[i];
586 if (arg == std::numeric_limits<value_type>::infinity())
587 {
588 tmp[i] = 0.;
589 txr[i] = std::numeric_limits<value_type>::quiet_NaN();
590 }
591 else
592 {
593 double y[2];
594 std::int32_t n = ::xsimd::detail::__ieee754_rem_pio2(x: arg, y);
595 tmp[i] = value_type(n & 3);
596 txr[i] = value_type(y[0]);
597 }
598 }
599 xr = B::load_aligned(&txr[0]);
600 B res = B::load_aligned(&tmp[0]);
601 return res;
602 }
603 }
604 };
605
606 template <class B>
607 struct trigo_reducer<B, trigo_pi_tag>
608 {
609 static inline B reduce(const B& x, B& xr) noexcept
610 {
611 B xi = nearbyint(x * B(2.));
612 B x2 = x - xi * B(0.5);
613 xr = x2 * constants::pi<B>();
614 return quadrant(xi);
615 }
616 };
617
618 }
619 template <class A, class T>
620 inline batch<T, A> cos(batch<T, A> const& self, requires_arch<generic>) noexcept
621 {
622 using batch_type = batch<T, A>;
623 const batch_type x = abs(self);
624 batch_type xr = constants::nan<batch_type>();
625 const batch_type n = detail::trigo_reducer<batch_type>::reduce(x, xr);
626 auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.));
627 auto swap_bit = fma(batch_type(-2.), tmp, n);
628 auto sign_bit = select((swap_bit ^ tmp) != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.));
629 const batch_type z = xr * xr;
630 const batch_type se = detail::sin_eval(z, xr);
631 const batch_type ce = detail::cos_eval(z);
632 const batch_type z1 = select(swap_bit != batch_type(0.), se, ce);
633 return z1 ^ sign_bit;
634 }
635
636 template <class A, class T>
637 inline batch<std::complex<T>, A> cos(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
638 {
639 return { cos(z.real()) * cosh(z.imag()), -sin(z.real()) * sinh(z.imag()) };
640 }
641
642 // cosh
643
644 /* origin: boost/simd/arch/common/simd/function/cosh.hpp */
645 /*
646 * ====================================================
647 * copyright 2016 NumScale SAS
648 *
649 * Distributed under the Boost Software License, Version 1.0.
650 * (See copy at http://boost.org/LICENSE_1_0.txt)
651 * ====================================================
652 */
653
654 template <class A, class T>
655 inline batch<T, A> cosh(batch<T, A> const& self, requires_arch<generic>) noexcept
656 {
657 using batch_type = batch<T, A>;
658 batch_type x = abs(self);
659 auto test1 = x > (constants::maxlog<batch_type>() - constants::log_2<batch_type>());
660 batch_type fac = select(test1, batch_type(0.5), batch_type(1.));
661 batch_type tmp = exp(x * fac);
662 batch_type tmp1 = batch_type(0.5) * tmp;
663 return select(test1, tmp1 * tmp, detail::average(tmp, batch_type(1.) / tmp));
664 }
665 template <class A, class T>
666 inline batch<std::complex<T>, A> cosh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
667 {
668 auto x = z.real();
669 auto y = z.imag();
670 return { cosh(x) * cos(y), sinh(x) * sin(y) };
671 }
672
673 // sin
674 namespace detail
675 {
676 template <class A, class T, class Tag = trigo_radian_tag>
677 inline batch<T, A> sin(batch<T, A> const& self, Tag = Tag()) noexcept
678 {
679 using batch_type = batch<T, A>;
680 const batch_type x = abs(self);
681 batch_type xr = constants::nan<batch_type>();
682 const batch_type n = detail::trigo_reducer<batch_type, Tag>::reduce(x, xr);
683 auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.));
684 auto swap_bit = fma(batch_type(-2.), tmp, n);
685 auto sign_bit = bitofsign(self) ^ select(tmp != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.));
686 const batch_type z = xr * xr;
687 const batch_type se = detail::sin_eval(z, xr);
688 const batch_type ce = detail::cos_eval(z);
689 const batch_type z1 = select(swap_bit == batch_type(0.), se, ce);
690 return z1 ^ sign_bit;
691 }
692 }
693
694 template <class A, class T>
695 inline batch<T, A> sin(batch<T, A> const& self, requires_arch<generic>) noexcept
696 {
697 return detail::sin(self);
698 }
699
700 template <class A, class T>
701 inline batch<std::complex<T>, A> sin(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
702 {
703 return { sin(z.real()) * cosh(z.imag()), cos(z.real()) * sinh(z.imag()) };
704 }
705
706 // sincos
707 template <class A, class T>
708 inline std::pair<batch<T, A>, batch<T, A>> sincos(batch<T, A> const& self, requires_arch<generic>) noexcept
709 {
710 using batch_type = batch<T, A>;
711 const batch_type x = abs(self);
712 batch_type xr = constants::nan<batch_type>();
713 const batch_type n = detail::trigo_reducer<batch_type>::reduce(x, xr);
714 auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.));
715 auto swap_bit = fma(batch_type(-2.), tmp, n);
716 const batch_type z = xr * xr;
717 const batch_type se = detail::sin_eval(z, xr);
718 const batch_type ce = detail::cos_eval(z);
719 auto sin_sign_bit = bitofsign(self) ^ select(tmp != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.));
720 const batch_type sin_z1 = select(swap_bit == batch_type(0.), se, ce);
721 auto cos_sign_bit = select((swap_bit ^ tmp) != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.));
722 const batch_type cos_z1 = select(swap_bit != batch_type(0.), se, ce);
723 return std::make_pair(sin_z1 ^ sin_sign_bit, cos_z1 ^ cos_sign_bit);
724 }
725
726 template <class A, class T>
727 inline std::pair<batch<std::complex<T>, A>, batch<std::complex<T>, A>>
728 sincos(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
729 {
730 using batch_type = batch<std::complex<T>, A>;
731 using real_batch = typename batch_type::real_batch;
732 real_batch rcos = cos(z.real());
733 real_batch rsin = sin(z.real());
734 real_batch icosh = cosh(z.imag());
735 real_batch isinh = sinh(z.imag());
736 return std::make_pair(batch_type(rsin * icosh, rcos * isinh), batch_type(rcos * icosh, -rsin * isinh));
737 }
738
739 // sinh
740 namespace detail
741 {
742 /* origin: boost/simd/arch/common/detail/generic/sinh_kernel.hpp */
743 /*
744 * ====================================================
745 * copyright 2016 NumScale SAS
746 *
747 * Distributed under the Boost Software License, Version 1.0.
748 * (See copy at http://boost.org/LICENSE_1_0.txt)
749 * ====================================================
750 */
751 template <class A>
752 inline batch<float, A> sinh_kernel(batch<float, A> const& self) noexcept
753 {
754 using batch_type = batch<float, A>;
755 batch_type sqr_self = self * self;
756 return detail::horner<batch_type,
757 0x3f800000, // 1.0f
758 0x3e2aaacc, // 1.66667160211E-1f
759 0x3c087bbe, // 8.33028376239E-3f
760 0x39559e2f // 2.03721912945E-4f
761 >(sqr_self)
762 * self;
763 }
764
765 template <class A>
766 inline batch<double, A> sinh_kernel(batch<double, A> const& self) noexcept
767 {
768 using batch_type = batch<double, A>;
769 batch_type sqrself = self * self;
770 return fma(self, (detail::horner<batch_type,
771 0xc115782bdbf6ab05ull, // -3.51754964808151394800E5
772 0xc0c694b8c71d6182ull, // -1.15614435765005216044E4,
773 0xc064773a398ff4feull, // -1.63725857525983828727E2,
774 0xbfe9435fe8bb3cd6ull // -7.89474443963537015605E-1
775 >(sqrself)
776 / detail::horner1<batch_type,
777 0xc1401a20e4f90044ull, // -2.11052978884890840399E6
778 0x40e1a7ba7ed72245ull, // 3.61578279834431989373E4,
779 0xc0715b6096e96484ull // -2.77711081420602794433E2,
780 >(sqrself))
781 * sqrself,
782 self);
783 }
784 }
785 /* origin: boost/simd/arch/common/simd/function/sinh.hpp */
786 /*
787 * ====================================================
788 * copyright 2016 NumScale SAS
789 *
790 * Distributed under the Boost Software License, Version 1.0.
791 * (See copy at http://boost.org/LICENSE_1_0.txt)
792 * ====================================================
793 */
794 template <class A, class T>
795 inline batch<T, A> sinh(batch<T, A> const& a, requires_arch<generic>) noexcept
796 {
797 using batch_type = batch<T, A>;
798 batch_type half(0.5);
799 batch_type x = abs(a);
800 auto lt1 = x < batch_type(1.);
801 batch_type bts = bitofsign(a);
802 batch_type z(0.);
803 if (any(lt1))
804 {
805 z = detail::sinh_kernel(x);
806 if (all(lt1))
807 return z ^ bts;
808 }
809 auto test1 = x > (constants::maxlog<batch_type>() - constants::log_2<batch_type>());
810 batch_type fac = select(test1, half, batch_type(1.));
811 batch_type tmp = exp(x * fac);
812 batch_type tmp1 = half * tmp;
813 batch_type r = select(test1, tmp1 * tmp, tmp1 - half / tmp);
814 return select(lt1, z, r) ^ bts;
815 }
816 template <class A, class T>
817 inline batch<std::complex<T>, A> sinh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
818 {
819 auto x = z.real();
820 auto y = z.imag();
821 return { sinh(x) * cos(y), cosh(x) * sin(y) };
822 }
823
824 // tan
825 template <class A, class T>
826 inline batch<T, A> tan(batch<T, A> const& self, requires_arch<generic>) noexcept
827 {
828 using batch_type = batch<T, A>;
829 const batch_type x = abs(self);
830 batch_type xr = constants::nan<batch_type>();
831 const batch_type n = detail::trigo_reducer<batch_type>::reduce(x, xr);
832 auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.));
833 auto swap_bit = fma(batch_type(-2.), tmp, n);
834 auto test = (swap_bit == batch_type(0.));
835 const batch_type y = detail::tan_eval(xr, test);
836 return y ^ bitofsign(self);
837 }
838 template <class A, class T>
839 inline batch<std::complex<T>, A> tan(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept
840 {
841 using batch_type = batch<std::complex<T>, A>;
842 using real_batch = typename batch_type::real_batch;
843 real_batch d = cos(2 * z.real()) + cosh(2 * z.imag());
844 batch_type winf(constants::infinity<real_batch>(), constants::infinity<real_batch>());
845 real_batch wreal = sin(2 * z.real()) / d;
846 real_batch wimag = sinh(2 * z.imag());
847 batch_type wres = select(isinf(wimag), batch_type(wreal, real_batch(1.)), batch_type(wreal, wimag / d));
848 return select(d == real_batch(0.), winf, wres);
849 }
850
851 // tanh
852 namespace detail
853 {
854 /* origin: boost/simd/arch/common/detail/generic/tanh_kernel.hpp */
855 /*
856 * ====================================================
857 * copyright 2016 NumScale SAS
858 *
859 * Distributed under the Boost Software License, Version 1.0.
860 * (See copy at http://boost.org/LICENSE_1_0.txt)
861 * ====================================================
862 */
863 template <class B>
864 struct tanh_kernel;
865
866 template <class A>
867 struct tanh_kernel<batch<float, A>>
868 {
869 using batch_type = batch<float, A>;
870 static inline batch_type tanh(const batch_type& x) noexcept
871 {
872 batch_type sqrx = x * x;
873 return fma(detail::horner<batch_type,
874 0xbeaaaa99, // -3.33332819422E-1F
875 0x3e088393, // +1.33314422036E-1F
876 0xbd5c1e2d, // -5.37397155531E-2F
877 0x3ca9134e, // +2.06390887954E-2F
878 0xbbbaf0ea // -5.70498872745E-3F
879 >(sqrx)
880 * sqrx,
881 x, x);
882 }
883
884 static inline batch_type cotanh(const batch_type& x) noexcept
885 {
886 return batch_type(1.) / tanh(x);
887 }
888 };
889
890 template <class A>
891 struct tanh_kernel<batch<double, A>>
892 {
893 using batch_type = batch<double, A>;
894 static inline batch_type tanh(const batch_type& x) noexcept
895 {
896 batch_type sqrx = x * x;
897 return fma(sqrx * p(x: sqrx) / q(x: sqrx), x, x);
898 }
899
900 static inline batch_type cotanh(const batch_type& x) noexcept
901 {
902 batch_type sqrx = x * x;
903 batch_type qval = q(x: sqrx);
904 return qval / (x * fma(p(x: sqrx), sqrx, qval));
905 }
906
907 static inline batch_type p(const batch_type& x) noexcept
908 {
909 return detail::horner<batch_type,
910 0xc0993ac030580563, // -1.61468768441708447952E3
911 0xc058d26a0e26682d, // -9.92877231001918586564E1,
912 0xbfeedc5baafd6f4b // -9.64399179425052238628E-1
913 >(x);
914 }
915
916 static inline batch_type q(const batch_type& x) noexcept
917 {
918 return detail::horner1<batch_type,
919 0x40b2ec102442040c, // 4.84406305325125486048E3
920 0x40a176fa0e5535fa, // 2.23548839060100448583E3,
921 0x405c33f28a581B86 // 1.12811678491632931402E2,
922 >(x);
923 }
924 };
925
926 }
927 /* origin: boost/simd/arch/common/simd/function/tanh.hpp */
928 /*
929 * ====================================================
930 * copyright 2016 NumScale SAS
931 *
932 * Distributed under the Boost Software License, Version 1.0.
933 * (See copy at http://boost.org/LICENSE_1_0.txt)
934 * ====================================================
935 */
936 template <class A, class T>
937 inline batch<T, A> tanh(batch<T, A> const& self, requires_arch<generic>) noexcept
938 {
939 using batch_type = batch<T, A>;
940 batch_type one(1.);
941 batch_type x = abs(self);
942 auto test = x < (batch_type(5.) / batch_type(8.));
943 batch_type bts = bitofsign(self);
944 batch_type z = one;
945 if (any(test))
946 {
947 z = detail::tanh_kernel<batch_type>::tanh(x);
948 if (all(test))
949 return z ^ bts;
950 }
951 batch_type r = fma(batch_type(-2.), one / (one + exp(x + x)), one);
952 return select(test, z, r) ^ bts;
953 }
954 template <class A, class T>
955 inline batch<std::complex<T>, A> tanh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept
956 {
957 using real_batch = typename batch<std::complex<T>, A>::real_batch;
958 auto x = z.real();
959 auto y = z.imag();
960 real_batch two(2);
961 auto d = cosh(two * x) + cos(two * y);
962 return { sinh(two * x) / d, sin(two * y) / d };
963 }
964
965 }
966
967}
968
969#endif
970