1 | /*************************************************************************** |
2 | * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * |
3 | * Martin Renou * |
4 | * Copyright (c) QuantStack * |
5 | * Copyright (c) Serge Guelton * |
6 | * * |
7 | * Distributed under the terms of the BSD 3-Clause License. * |
8 | * * |
9 | * The full license is in the file LICENSE, distributed with this software. * |
10 | ****************************************************************************/ |
11 | |
12 | #ifndef XSIMD_GENERIC_TRIGO_HPP |
13 | #define XSIMD_GENERIC_TRIGO_HPP |
14 | |
15 | #include "./xsimd_generic_details.hpp" |
16 | |
17 | #include <array> |
18 | |
19 | namespace xsimd |
20 | { |
21 | |
22 | namespace kernel |
23 | { |
24 | /* origin: boost/simd/arch/common/detail/simd/trig_base.hpp */ |
25 | /* |
26 | * ==================================================== |
27 | * copyright 2016 NumScale SAS |
28 | * |
29 | * Distributed under the Boost Software License, Version 1.0. |
30 | * (See copy at http://boost.org/LICENSE_1_0.txt) |
31 | * ==================================================== |
32 | */ |
33 | |
34 | using namespace types; |
35 | |
36 | // acos |
37 | template <class A, class T> |
38 | inline batch<T, A> acos(batch<T, A> const& self, requires_arch<generic>) noexcept |
39 | { |
40 | using batch_type = batch<T, A>; |
41 | batch_type x = abs(self); |
42 | auto x_larger_05 = x > batch_type(0.5); |
43 | x = select(x_larger_05, sqrt(fma(batch_type(-0.5), x, batch_type(0.5))), self); |
44 | x = asin(x); |
45 | x = select(x_larger_05, x + x, x); |
46 | x = select(self < batch_type(-0.5), constants::pi<batch_type>() - x, x); |
47 | return select(x_larger_05, x, constants::pio2<batch_type>() - x); |
48 | } |
49 | template <class A, class T> |
50 | inline batch<std::complex<T>, A> acos(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept |
51 | { |
52 | using batch_type = batch<std::complex<T>, A>; |
53 | using real_batch = typename batch_type::real_batch; |
54 | batch_type tmp = asin(z); |
55 | return { constants::pio2<real_batch>() - tmp.real(), -tmp.imag() }; |
56 | } |
57 | |
58 | // acosh |
59 | /* origin: boost/simd/arch/common/simd/function/acosh.hpp */ |
60 | /* |
61 | * ==================================================== |
62 | * copyright 2016 NumScale SAS |
63 | * |
64 | * Distributed under the Boost Software License, Version 1.0. |
65 | * (See copy at http://boost.org/LICENSE_1_0.txt) |
66 | * ==================================================== |
67 | */ |
68 | template <class A, class T> |
69 | inline batch<T, A> acosh(batch<T, A> const& self, requires_arch<generic>) noexcept |
70 | { |
71 | using batch_type = batch<T, A>; |
72 | batch_type x = self - batch_type(1.); |
73 | auto test = x > constants::oneotwoeps<batch_type>(); |
74 | batch_type z = select(test, self, x + sqrt(x + x + x * x)); |
75 | batch_type l1pz = log1p(z); |
76 | return select(test, l1pz + constants::log_2<batch_type>(), l1pz); |
77 | } |
78 | template <class A, class T> |
79 | inline batch<std::complex<T>, A> acosh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept |
80 | { |
81 | using batch_type = batch<std::complex<T>, A>; |
82 | batch_type w = acos(z); |
83 | w = batch_type(-w.imag(), w.real()); |
84 | return w; |
85 | } |
86 | |
87 | // asin |
88 | template <class A> |
89 | inline batch<float, A> asin(batch<float, A> const& self, requires_arch<generic>) noexcept |
90 | { |
91 | using batch_type = batch<float, A>; |
92 | batch_type x = abs(self); |
93 | batch_type sign = bitofsign(self); |
94 | auto x_larger_05 = x > batch_type(0.5); |
95 | batch_type z = select(x_larger_05, batch_type(0.5) * (batch_type(1.) - x), x * x); |
96 | x = select(x_larger_05, sqrt(z), x); |
97 | batch_type z1 = detail::horner<batch_type, |
98 | 0x3e2aaae4, |
99 | 0x3d9980f6, |
100 | 0x3d3a3ec7, |
101 | 0x3cc617e3, |
102 | 0x3d2cb352>(z); |
103 | z1 = fma(z1, z * x, x); |
104 | z = select(x_larger_05, constants::pio2<batch_type>() - (z1 + z1), z1); |
105 | return z ^ sign; |
106 | } |
107 | template <class A> |
108 | inline batch<double, A> asin(batch<double, A> const& self, requires_arch<generic>) noexcept |
109 | { |
110 | using batch_type = batch<double, A>; |
111 | batch_type x = abs(self); |
112 | auto small_cond = x < constants::sqrteps<batch_type>(); |
113 | batch_type ct1 = batch_type(bit_cast<double>(val: int64_t(0x3fe4000000000000))); |
114 | batch_type zz1 = batch_type(1.) - x; |
115 | batch_type vp = zz1 * detail::horner<batch_type, 0x403c896240f3081dull, 0xc03991aaac01ab68ull, 0x401bdff5baf33e6aull, 0xbfe2079259f9290full, 0x3f684fc3988e9f08ull>(zz1) / detail::horner1<batch_type, 0x40756709b0b644beull, 0xc077fe08959063eeull, 0x40626219af6a7f42ull, 0xc035f2a2b6bf5d8cull>(zz1); |
116 | zz1 = sqrt(zz1 + zz1); |
117 | batch_type z = constants::pio4<batch_type>() - zz1; |
118 | zz1 = fms(zz1, vp, constants::pio_2lo<batch_type>()); |
119 | z = z - zz1; |
120 | zz1 = z + constants::pio4<batch_type>(); |
121 | batch_type zz2 = self * self; |
122 | z = zz2 * detail::horner<batch_type, 0xc020656c06ceafd5ull, 0x40339007da779259ull, 0xc0304331de27907bull, 0x4015c74b178a2dd9ull, 0xbfe34341333e5c16ull, 0x3f716b9b0bd48ad3ull>(zz2) / detail::horner1<batch_type, 0xc04898220a3607acull, 0x4061705684ffbf9dull, 0xc06265bb6d3576d7ull, 0x40519fc025fe9054ull, 0xc02d7b590b5e0eabull>(zz2); |
123 | zz2 = fma(x, z, x); |
124 | return select(x > batch_type(1.), constants::nan<batch_type>(), |
125 | select(small_cond, x, |
126 | select(x > ct1, zz1, zz2)) |
127 | ^ bitofsign(self)); |
128 | } |
129 | template <class A, class T> |
130 | inline batch<std::complex<T>, A> asin(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept |
131 | { |
132 | using batch_type = batch<std::complex<T>, A>; |
133 | using real_batch = typename batch_type::real_batch; |
134 | real_batch x = z.real(); |
135 | real_batch y = z.imag(); |
136 | |
137 | batch_type ct(-y, x); |
138 | batch_type zz(real_batch(1.) - (x - y) * (x + y), -2 * x * y); |
139 | zz = log(ct + sqrt(zz)); |
140 | batch_type resg(zz.imag(), -zz.real()); |
141 | |
142 | return select(y == real_batch(0.), |
143 | select(fabs(x) > real_batch(1.), |
144 | batch_type(constants::pio2<real_batch>(), real_batch(0.)), |
145 | batch_type(asin(x), real_batch(0.))), |
146 | resg); |
147 | } |
148 | |
149 | // asinh |
150 | /* origin: boost/simd/arch/common/simd/function/asinh.hpp */ |
151 | /* |
152 | * ==================================================== |
153 | * copyright 2016 NumScale SAS |
154 | * |
155 | * Distributed under the Boost Software License, Version 1.0. |
156 | * (See copy at http://boost.org/LICENSE_1_0.txt) |
157 | * ==================================================== |
158 | */ |
159 | namespace detail |
160 | { |
161 | template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type> |
162 | inline batch<T, A> |
163 | average(const batch<T, A>& x1, const batch<T, A>& x2) noexcept |
164 | { |
165 | return (x1 & x2) + ((x1 ^ x2) >> 1); |
166 | } |
167 | |
168 | template <class A, class T> |
169 | inline batch<T, A> |
170 | averagef(const batch<T, A>& x1, const batch<T, A>& x2) noexcept |
171 | { |
172 | using batch_type = batch<T, A>; |
173 | return fma(x1, batch_type(0.5), x2 * batch_type(0.5)); |
174 | } |
175 | template <class A> |
176 | inline batch<float, A> average(batch<float, A> const& x1, batch<float, A> const& x2) noexcept |
177 | { |
178 | return averagef(x1, x2); |
179 | } |
180 | template <class A> |
181 | inline batch<double, A> average(batch<double, A> const& x1, batch<double, A> const& x2) noexcept |
182 | { |
183 | return averagef(x1, x2); |
184 | } |
185 | } |
186 | template <class A> |
187 | inline batch<float, A> asinh(batch<float, A> const& self, requires_arch<generic>) noexcept |
188 | { |
189 | using batch_type = batch<float, A>; |
190 | batch_type x = abs(self); |
191 | auto lthalf = x < batch_type(0.5); |
192 | batch_type x2 = x * x; |
193 | batch_type bts = bitofsign(self); |
194 | batch_type z(0.); |
195 | if (any(lthalf)) |
196 | { |
197 | z = detail::horner<batch_type, |
198 | 0x3f800000, |
199 | 0xbe2aa9ad, |
200 | 0x3d9949b1, |
201 | 0xbd2ee581, |
202 | 0x3ca4d6e6>(x2) |
203 | * x; |
204 | if (all(lthalf)) |
205 | return z ^ bts; |
206 | } |
207 | batch_type tmp = select(x > constants::oneosqrteps<batch_type>(), x, detail::average(x, hypot(batch_type(1.), x))); |
208 | #ifndef XSIMD_NO_NANS |
209 | return select(isnan(self), constants::nan<batch_type>(), select(lthalf, z, log(tmp) + constants::log_2<batch_type>()) ^ bts); |
210 | #else |
211 | return select(lthalf, z, log(tmp) + constants::log_2<batch_type>()) ^ bts; |
212 | #endif |
213 | } |
214 | template <class A> |
215 | inline batch<double, A> asinh(batch<double, A> const& self, requires_arch<generic>) noexcept |
216 | { |
217 | using batch_type = batch<double, A>; |
218 | batch_type x = abs(self); |
219 | auto test = x > constants::oneosqrteps<batch_type>(); |
220 | batch_type z = select(test, x - batch_type(1.), x + x * x / (batch_type(1.) + hypot(batch_type(1.), x))); |
221 | #ifndef XSIMD_NO_INFINITIES |
222 | z = select(x == constants::infinity<batch_type>(), x, z); |
223 | #endif |
224 | batch_type l1pz = log1p(z); |
225 | z = select(test, l1pz + constants::log_2<batch_type>(), l1pz); |
226 | return bitofsign(self) ^ z; |
227 | } |
228 | template <class A, class T> |
229 | inline batch<std::complex<T>, A> asinh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept |
230 | { |
231 | using batch_type = batch<std::complex<T>, A>; |
232 | batch_type w = asin(batch_type(-z.imag(), z.real())); |
233 | w = batch_type(w.imag(), -w.real()); |
234 | return w; |
235 | } |
236 | |
237 | // atan |
238 | namespace detail |
239 | { |
240 | template <class A> |
241 | static inline batch<float, A> kernel_atan(const batch<float, A>& x, const batch<float, A>& recx) noexcept |
242 | { |
243 | using batch_type = batch<float, A>; |
244 | const auto flag1 = x < constants::tan3pio8<batch_type>(); |
245 | const auto flag2 = (x >= batch_type(bit_cast<float>(val: (uint32_t)0x3ed413cd))) && flag1; |
246 | batch_type yy = select(flag1, batch_type(0.), constants::pio2<batch_type>()); |
247 | yy = select(flag2, constants::pio4<batch_type>(), yy); |
248 | batch_type xx = select(flag1, x, -recx); |
249 | xx = select(flag2, (x - batch_type(1.)) / (x + batch_type(1.)), xx); |
250 | const batch_type z = xx * xx; |
251 | batch_type z1 = detail::horner<batch_type, |
252 | 0xbeaaaa2aul, |
253 | 0x3e4c925ful, |
254 | 0xbe0e1b85ul, |
255 | 0x3da4f0d1ul>(z); |
256 | z1 = fma(xx, z1 * z, xx); |
257 | z1 = select(flag2, z1 + constants::pio_4lo<batch_type>(), z1); |
258 | z1 = select(!flag1, z1 + constants::pio_2lo<batch_type>(), z1); |
259 | return yy + z1; |
260 | } |
261 | template <class A> |
262 | static inline batch<double, A> kernel_atan(const batch<double, A>& x, const batch<double, A>& recx) noexcept |
263 | { |
264 | using batch_type = batch<double, A>; |
265 | const auto flag1 = x < constants::tan3pio8<batch_type>(); |
266 | const auto flag2 = (x >= constants::tanpio8<batch_type>()) && flag1; |
267 | batch_type yy = select(flag1, batch_type(0.), constants::pio2<batch_type>()); |
268 | yy = select(flag2, constants::pio4<batch_type>(), yy); |
269 | batch_type xx = select(flag1, x, -recx); |
270 | xx = select(flag2, (x - batch_type(1.)) / (x + batch_type(1.)), xx); |
271 | batch_type z = xx * xx; |
272 | z *= detail::horner<batch_type, |
273 | 0xc0503669fd28ec8eull, |
274 | 0xc05eb8bf2d05ba25ull, |
275 | 0xc052c08c36880273ull, |
276 | 0xc03028545b6b807aull, |
277 | 0xbfec007fa1f72594ull>(z) |
278 | / detail::horner1<batch_type, |
279 | 0x4068519efbbd62ecull, |
280 | 0x407e563f13b049eaull, |
281 | 0x407b0e18d2e2be3bull, |
282 | 0x4064a0dd43b8fa25ull, |
283 | 0x4038dbc45b14603cull>(z); |
284 | z = fma(xx, z, xx); |
285 | z = select(flag2, z + constants::pio_4lo<batch_type>(), z); |
286 | z = z + select(flag1, batch_type(0.), constants::pio_2lo<batch_type>()); |
287 | return yy + z; |
288 | } |
289 | } |
290 | template <class A, class T> |
291 | inline batch<T, A> atan(batch<T, A> const& self, requires_arch<generic>) noexcept |
292 | { |
293 | using batch_type = batch<T, A>; |
294 | const batch_type absa = abs(self); |
295 | const batch_type x = detail::kernel_atan(absa, batch_type(1.) / absa); |
296 | return x ^ bitofsign(self); |
297 | } |
298 | template <class A, class T> |
299 | inline batch<std::complex<T>, A> atan(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept |
300 | { |
301 | using batch_type = batch<std::complex<T>, A>; |
302 | using real_batch = typename batch_type::real_batch; |
303 | real_batch x = z.real(); |
304 | real_batch y = z.imag(); |
305 | real_batch x2 = x * x; |
306 | real_batch one(1.); |
307 | real_batch a = one - x2 - (y * y); |
308 | real_batch w = 0.5 * atan2(2. * x, a); |
309 | real_batch num = y + one; |
310 | num = x2 + num * num; |
311 | real_batch den = y - one; |
312 | den = x2 + den * den; |
313 | batch_type res = select((x == real_batch(0.)) && (y == real_batch(1.)), |
314 | batch_type(real_batch(0.), constants::infinity<real_batch>()), |
315 | batch_type(w, 0.25 * log(num / den))); |
316 | return res; |
317 | } |
318 | |
319 | // atanh |
320 | /* origin: boost/simd/arch/common/simd/function/acosh.hpp */ |
321 | /* |
322 | * ==================================================== |
323 | * copyright 2016 NumScale SAS |
324 | * |
325 | * Distributed under the Boost Software License, Version 1.0. |
326 | * (See copy at http://boost.org/LICENSE_1_0.txt) |
327 | * ==================================================== |
328 | */ |
329 | template <class A, class T> |
330 | inline batch<T, A> atanh(batch<T, A> const& self, requires_arch<generic>) noexcept |
331 | { |
332 | using batch_type = batch<T, A>; |
333 | batch_type x = abs(self); |
334 | batch_type t = x + x; |
335 | batch_type z = batch_type(1.) - x; |
336 | auto test = x < batch_type(0.5); |
337 | batch_type tmp = select(test, x, t) / z; |
338 | return bitofsign(self) ^ (batch_type(0.5) * log1p(select(test, fma(t, tmp, t), tmp))); |
339 | } |
340 | template <class A, class T> |
341 | inline batch<std::complex<T>, A> atanh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept |
342 | { |
343 | using batch_type = batch<std::complex<T>, A>; |
344 | batch_type w = atan(batch_type(-z.imag(), z.real())); |
345 | w = batch_type(w.imag(), -w.real()); |
346 | return w; |
347 | } |
348 | |
349 | // atan2 |
350 | template <class A, class T> |
351 | inline batch<T, A> atan2(batch<T, A> const& self, batch<T, A> const& other, requires_arch<generic>) noexcept |
352 | { |
353 | using batch_type = batch<T, A>; |
354 | const batch_type q = abs(self / other); |
355 | const batch_type z = detail::kernel_atan(q, batch_type(1.) / q); |
356 | return select(other > batch_type(0.), z, constants::pi<batch_type>() - z) * signnz(self); |
357 | } |
358 | |
359 | // cos |
360 | namespace detail |
361 | { |
362 | template <class T, class A> |
363 | inline batch<T, A> quadrant(const batch<T, A>& x) noexcept |
364 | { |
365 | return x & batch<T, A>(3); |
366 | } |
367 | |
368 | template <class A> |
369 | inline batch<float, A> quadrant(const batch<float, A>& x) noexcept |
370 | { |
371 | return to_float(quadrant(to_int(x))); |
372 | } |
373 | |
374 | template <class A> |
375 | inline batch<double, A> quadrant(const batch<double, A>& x) noexcept |
376 | { |
377 | using batch_type = batch<double, A>; |
378 | batch_type a = x * batch_type(0.25); |
379 | return (a - floor(a)) * batch_type(4.); |
380 | } |
381 | /* origin: boost/simd/arch/common/detail/simd/f_trig_evaluation.hpp */ |
382 | /* |
383 | * ==================================================== |
384 | * copyright 2016 NumScale SAS |
385 | * |
386 | * Distributed under the Boost Software License, Version 1.0. |
387 | * (See copy at http://boost.org/LICENSE_1_0.txt) |
388 | * ==================================================== |
389 | */ |
390 | |
391 | template <class A> |
392 | inline batch<float, A> cos_eval(const batch<float, A>& z) noexcept |
393 | { |
394 | using batch_type = batch<float, A>; |
395 | batch_type y = detail::horner<batch_type, |
396 | 0x3d2aaaa5, |
397 | 0xbab60619, |
398 | 0x37ccf5ce>(z); |
399 | return batch_type(1.) + fma(z, batch_type(-0.5), y * z * z); |
400 | } |
401 | |
402 | template <class A> |
403 | inline batch<float, A> sin_eval(const batch<float, A>& z, const batch<float, A>& x) noexcept |
404 | { |
405 | using batch_type = batch<float, A>; |
406 | batch_type y = detail::horner<batch_type, |
407 | 0xbe2aaaa2, |
408 | 0x3c08839d, |
409 | 0xb94ca1f9>(z); |
410 | return fma(y * z, x, x); |
411 | } |
412 | |
413 | template <class A> |
414 | static inline batch<float, A> base_tancot_eval(const batch<float, A>& z) noexcept |
415 | { |
416 | using batch_type = batch<float, A>; |
417 | batch_type zz = z * z; |
418 | batch_type y = detail::horner<batch_type, |
419 | 0x3eaaaa6f, |
420 | 0x3e0896dd, |
421 | 0x3d5ac5c9, |
422 | 0x3cc821b5, |
423 | 0x3b4c779c, |
424 | 0x3c19c53b>(zz); |
425 | return fma(y, zz * z, z); |
426 | } |
427 | |
428 | template <class A, class BB> |
429 | static inline batch<float, A> tan_eval(const batch<float, A>& z, const BB& test) noexcept |
430 | { |
431 | using batch_type = batch<float, A>; |
432 | batch_type y = base_tancot_eval(z); |
433 | return select(test, y, -batch_type(1.) / y); |
434 | } |
435 | |
436 | template <class A, class BB> |
437 | static inline batch<float, A> cot_eval(const batch<float, A>& z, const BB& test) noexcept |
438 | { |
439 | using batch_type = batch<float, A>; |
440 | batch_type y = base_tancot_eval(z); |
441 | return select(test, batch_type(1.) / y, -y); |
442 | } |
443 | |
444 | /* origin: boost/simd/arch/common/detail/simd/d_trig_evaluation.hpp */ |
445 | /* |
446 | * ==================================================== |
447 | * copyright 2016 NumScale SAS |
448 | * |
449 | * Distributed under the Boost Software License, Version 1.0. |
450 | * (See copy at http://boost.org/LICENSE_1_0.txt) |
451 | * ==================================================== |
452 | */ |
453 | template <class A> |
454 | static inline batch<double, A> cos_eval(const batch<double, A>& z) noexcept |
455 | { |
456 | using batch_type = batch<double, A>; |
457 | batch_type y = detail::horner<batch_type, |
458 | 0x3fe0000000000000ull, |
459 | 0xbfa5555555555551ull, |
460 | 0x3f56c16c16c15d47ull, |
461 | 0xbefa01a019ddbcd9ull, |
462 | 0x3e927e4f8e06d9a5ull, |
463 | 0xbe21eea7c1e514d4ull, |
464 | 0x3da8ff831ad9b219ull>(z); |
465 | return batch_type(1.) - y * z; |
466 | } |
467 | |
468 | template <class A> |
469 | static inline batch<double, A> sin_eval(const batch<double, A>& z, const batch<double, A>& x) noexcept |
470 | { |
471 | using batch_type = batch<double, A>; |
472 | batch_type y = detail::horner<batch_type, |
473 | 0xbfc5555555555548ull, |
474 | 0x3f8111111110f7d0ull, |
475 | 0xbf2a01a019bfdf03ull, |
476 | 0x3ec71de3567d4896ull, |
477 | 0xbe5ae5e5a9291691ull, |
478 | 0x3de5d8fd1fcf0ec1ull>(z); |
479 | return fma(y * z, x, x); |
480 | } |
481 | |
482 | template <class A> |
483 | static inline batch<double, A> base_tancot_eval(const batch<double, A>& z) noexcept |
484 | { |
485 | using batch_type = batch<double, A>; |
486 | batch_type zz = z * z; |
487 | batch_type num = detail::horner<batch_type, |
488 | 0xc1711fead3299176ull, |
489 | 0x413199eca5fc9dddull, |
490 | 0xc0c992d8d24f3f38ull>(zz); |
491 | batch_type den = detail::horner1<batch_type, |
492 | 0xc189afe03cbe5a31ull, |
493 | 0x4177d98fc2ead8efull, |
494 | 0xc13427bc582abc96ull, |
495 | 0x40cab8a5eeb36572ull>(zz); |
496 | return fma(z, (zz * (num / den)), z); |
497 | } |
498 | |
499 | template <class A, class BB> |
500 | static inline batch<double, A> tan_eval(const batch<double, A>& z, const BB& test) noexcept |
501 | { |
502 | using batch_type = batch<double, A>; |
503 | batch_type y = base_tancot_eval(z); |
504 | return select(test, y, -batch_type(1.) / y); |
505 | } |
506 | |
507 | template <class A, class BB> |
508 | static inline batch<double, A> cot_eval(const batch<double, A>& z, const BB& test) noexcept |
509 | { |
510 | using batch_type = batch<double, A>; |
511 | batch_type y = base_tancot_eval(z); |
512 | return select(test, batch_type(1.) / y, -y); |
513 | } |
514 | /* origin: boost/simd/arch/common/detail/simd/trig_reduction.hpp */ |
515 | /* |
516 | * ==================================================== |
517 | * copyright 2016 NumScale SAS |
518 | * |
519 | * Distributed under the Boost Software License, Version 1.0. |
520 | * (See copy at http://boost.org/LICENSE_1_0.txt) |
521 | * ==================================================== |
522 | */ |
523 | |
524 | struct trigo_radian_tag |
525 | { |
526 | }; |
527 | struct trigo_pi_tag |
528 | { |
529 | }; |
530 | |
531 | template <class B, class Tag = trigo_radian_tag> |
532 | struct trigo_reducer |
533 | { |
534 | static inline B reduce(const B& x, B& xr) noexcept |
535 | { |
536 | if (all(x <= constants::pio4<B>())) |
537 | { |
538 | xr = x; |
539 | return B(0.); |
540 | } |
541 | else if (all(x <= constants::pio2<B>())) |
542 | { |
543 | auto test = x > constants::pio4<B>(); |
544 | xr = x - constants::pio2_1<B>(); |
545 | xr -= constants::pio2_2<B>(); |
546 | xr -= constants::pio2_3<B>(); |
547 | xr = select(test, xr, x); |
548 | return select(test, B(1.), B(0.)); |
549 | } |
550 | else if (all(x <= constants::twentypi<B>())) |
551 | { |
552 | B xi = nearbyint(x * constants::twoopi<B>()); |
553 | xr = fnma(xi, constants::pio2_1<B>(), x); |
554 | xr -= xi * constants::pio2_2<B>(); |
555 | xr -= xi * constants::pio2_3<B>(); |
556 | return quadrant(xi); |
557 | } |
558 | else if (all(x <= constants::mediumpi<B>())) |
559 | { |
560 | B fn = nearbyint(x * constants::twoopi<B>()); |
561 | B r = x - fn * constants::pio2_1<B>(); |
562 | B w = fn * constants::pio2_1t<B>(); |
563 | B t = r; |
564 | w = fn * constants::pio2_2<B>(); |
565 | r = t - w; |
566 | w = fn * constants::pio2_2t<B>() - ((t - r) - w); |
567 | t = r; |
568 | w = fn * constants::pio2_3<B>(); |
569 | r = t - w; |
570 | w = fn * constants::pio2_3t<B>() - ((t - r) - w); |
571 | xr = r - w; |
572 | return quadrant(fn); |
573 | } |
574 | else |
575 | { |
576 | static constexpr std::size_t size = B::size; |
577 | using value_type = typename B::value_type; |
578 | alignas(B) std::array<value_type, size> tmp; |
579 | alignas(B) std::array<value_type, size> txr; |
580 | alignas(B) std::array<value_type, size> args; |
581 | x.store_aligned(args.data()); |
582 | |
583 | for (std::size_t i = 0; i < size; ++i) |
584 | { |
585 | double arg = args[i]; |
586 | if (arg == std::numeric_limits<value_type>::infinity()) |
587 | { |
588 | tmp[i] = 0.; |
589 | txr[i] = std::numeric_limits<value_type>::quiet_NaN(); |
590 | } |
591 | else |
592 | { |
593 | double y[2]; |
594 | std::int32_t n = ::xsimd::detail::__ieee754_rem_pio2(x: arg, y); |
595 | tmp[i] = value_type(n & 3); |
596 | txr[i] = value_type(y[0]); |
597 | } |
598 | } |
599 | xr = B::load_aligned(&txr[0]); |
600 | B res = B::load_aligned(&tmp[0]); |
601 | return res; |
602 | } |
603 | } |
604 | }; |
605 | |
606 | template <class B> |
607 | struct trigo_reducer<B, trigo_pi_tag> |
608 | { |
609 | static inline B reduce(const B& x, B& xr) noexcept |
610 | { |
611 | B xi = nearbyint(x * B(2.)); |
612 | B x2 = x - xi * B(0.5); |
613 | xr = x2 * constants::pi<B>(); |
614 | return quadrant(xi); |
615 | } |
616 | }; |
617 | |
618 | } |
619 | template <class A, class T> |
620 | inline batch<T, A> cos(batch<T, A> const& self, requires_arch<generic>) noexcept |
621 | { |
622 | using batch_type = batch<T, A>; |
623 | const batch_type x = abs(self); |
624 | batch_type xr = constants::nan<batch_type>(); |
625 | const batch_type n = detail::trigo_reducer<batch_type>::reduce(x, xr); |
626 | auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.)); |
627 | auto swap_bit = fma(batch_type(-2.), tmp, n); |
628 | auto sign_bit = select((swap_bit ^ tmp) != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.)); |
629 | const batch_type z = xr * xr; |
630 | const batch_type se = detail::sin_eval(z, xr); |
631 | const batch_type ce = detail::cos_eval(z); |
632 | const batch_type z1 = select(swap_bit != batch_type(0.), se, ce); |
633 | return z1 ^ sign_bit; |
634 | } |
635 | |
636 | template <class A, class T> |
637 | inline batch<std::complex<T>, A> cos(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept |
638 | { |
639 | return { cos(z.real()) * cosh(z.imag()), -sin(z.real()) * sinh(z.imag()) }; |
640 | } |
641 | |
642 | // cosh |
643 | |
644 | /* origin: boost/simd/arch/common/simd/function/cosh.hpp */ |
645 | /* |
646 | * ==================================================== |
647 | * copyright 2016 NumScale SAS |
648 | * |
649 | * Distributed under the Boost Software License, Version 1.0. |
650 | * (See copy at http://boost.org/LICENSE_1_0.txt) |
651 | * ==================================================== |
652 | */ |
653 | |
654 | template <class A, class T> |
655 | inline batch<T, A> cosh(batch<T, A> const& self, requires_arch<generic>) noexcept |
656 | { |
657 | using batch_type = batch<T, A>; |
658 | batch_type x = abs(self); |
659 | auto test1 = x > (constants::maxlog<batch_type>() - constants::log_2<batch_type>()); |
660 | batch_type fac = select(test1, batch_type(0.5), batch_type(1.)); |
661 | batch_type tmp = exp(x * fac); |
662 | batch_type tmp1 = batch_type(0.5) * tmp; |
663 | return select(test1, tmp1 * tmp, detail::average(tmp, batch_type(1.) / tmp)); |
664 | } |
665 | template <class A, class T> |
666 | inline batch<std::complex<T>, A> cosh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept |
667 | { |
668 | auto x = z.real(); |
669 | auto y = z.imag(); |
670 | return { cosh(x) * cos(y), sinh(x) * sin(y) }; |
671 | } |
672 | |
673 | // sin |
674 | namespace detail |
675 | { |
676 | template <class A, class T, class Tag = trigo_radian_tag> |
677 | inline batch<T, A> sin(batch<T, A> const& self, Tag = Tag()) noexcept |
678 | { |
679 | using batch_type = batch<T, A>; |
680 | const batch_type x = abs(self); |
681 | batch_type xr = constants::nan<batch_type>(); |
682 | const batch_type n = detail::trigo_reducer<batch_type, Tag>::reduce(x, xr); |
683 | auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.)); |
684 | auto swap_bit = fma(batch_type(-2.), tmp, n); |
685 | auto sign_bit = bitofsign(self) ^ select(tmp != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.)); |
686 | const batch_type z = xr * xr; |
687 | const batch_type se = detail::sin_eval(z, xr); |
688 | const batch_type ce = detail::cos_eval(z); |
689 | const batch_type z1 = select(swap_bit == batch_type(0.), se, ce); |
690 | return z1 ^ sign_bit; |
691 | } |
692 | } |
693 | |
694 | template <class A, class T> |
695 | inline batch<T, A> sin(batch<T, A> const& self, requires_arch<generic>) noexcept |
696 | { |
697 | return detail::sin(self); |
698 | } |
699 | |
700 | template <class A, class T> |
701 | inline batch<std::complex<T>, A> sin(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept |
702 | { |
703 | return { sin(z.real()) * cosh(z.imag()), cos(z.real()) * sinh(z.imag()) }; |
704 | } |
705 | |
706 | // sincos |
707 | template <class A, class T> |
708 | inline std::pair<batch<T, A>, batch<T, A>> sincos(batch<T, A> const& self, requires_arch<generic>) noexcept |
709 | { |
710 | using batch_type = batch<T, A>; |
711 | const batch_type x = abs(self); |
712 | batch_type xr = constants::nan<batch_type>(); |
713 | const batch_type n = detail::trigo_reducer<batch_type>::reduce(x, xr); |
714 | auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.)); |
715 | auto swap_bit = fma(batch_type(-2.), tmp, n); |
716 | const batch_type z = xr * xr; |
717 | const batch_type se = detail::sin_eval(z, xr); |
718 | const batch_type ce = detail::cos_eval(z); |
719 | auto sin_sign_bit = bitofsign(self) ^ select(tmp != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.)); |
720 | const batch_type sin_z1 = select(swap_bit == batch_type(0.), se, ce); |
721 | auto cos_sign_bit = select((swap_bit ^ tmp) != batch_type(0.), constants::signmask<batch_type>(), batch_type(0.)); |
722 | const batch_type cos_z1 = select(swap_bit != batch_type(0.), se, ce); |
723 | return std::make_pair(sin_z1 ^ sin_sign_bit, cos_z1 ^ cos_sign_bit); |
724 | } |
725 | |
726 | template <class A, class T> |
727 | inline std::pair<batch<std::complex<T>, A>, batch<std::complex<T>, A>> |
728 | sincos(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept |
729 | { |
730 | using batch_type = batch<std::complex<T>, A>; |
731 | using real_batch = typename batch_type::real_batch; |
732 | real_batch rcos = cos(z.real()); |
733 | real_batch rsin = sin(z.real()); |
734 | real_batch icosh = cosh(z.imag()); |
735 | real_batch isinh = sinh(z.imag()); |
736 | return std::make_pair(batch_type(rsin * icosh, rcos * isinh), batch_type(rcos * icosh, -rsin * isinh)); |
737 | } |
738 | |
739 | // sinh |
740 | namespace detail |
741 | { |
742 | /* origin: boost/simd/arch/common/detail/generic/sinh_kernel.hpp */ |
743 | /* |
744 | * ==================================================== |
745 | * copyright 2016 NumScale SAS |
746 | * |
747 | * Distributed under the Boost Software License, Version 1.0. |
748 | * (See copy at http://boost.org/LICENSE_1_0.txt) |
749 | * ==================================================== |
750 | */ |
751 | template <class A> |
752 | inline batch<float, A> sinh_kernel(batch<float, A> const& self) noexcept |
753 | { |
754 | using batch_type = batch<float, A>; |
755 | batch_type sqr_self = self * self; |
756 | return detail::horner<batch_type, |
757 | 0x3f800000, // 1.0f |
758 | 0x3e2aaacc, // 1.66667160211E-1f |
759 | 0x3c087bbe, // 8.33028376239E-3f |
760 | 0x39559e2f // 2.03721912945E-4f |
761 | >(sqr_self) |
762 | * self; |
763 | } |
764 | |
765 | template <class A> |
766 | inline batch<double, A> sinh_kernel(batch<double, A> const& self) noexcept |
767 | { |
768 | using batch_type = batch<double, A>; |
769 | batch_type sqrself = self * self; |
770 | return fma(self, (detail::horner<batch_type, |
771 | 0xc115782bdbf6ab05ull, // -3.51754964808151394800E5 |
772 | 0xc0c694b8c71d6182ull, // -1.15614435765005216044E4, |
773 | 0xc064773a398ff4feull, // -1.63725857525983828727E2, |
774 | 0xbfe9435fe8bb3cd6ull // -7.89474443963537015605E-1 |
775 | >(sqrself) |
776 | / detail::horner1<batch_type, |
777 | 0xc1401a20e4f90044ull, // -2.11052978884890840399E6 |
778 | 0x40e1a7ba7ed72245ull, // 3.61578279834431989373E4, |
779 | 0xc0715b6096e96484ull // -2.77711081420602794433E2, |
780 | >(sqrself)) |
781 | * sqrself, |
782 | self); |
783 | } |
784 | } |
785 | /* origin: boost/simd/arch/common/simd/function/sinh.hpp */ |
786 | /* |
787 | * ==================================================== |
788 | * copyright 2016 NumScale SAS |
789 | * |
790 | * Distributed under the Boost Software License, Version 1.0. |
791 | * (See copy at http://boost.org/LICENSE_1_0.txt) |
792 | * ==================================================== |
793 | */ |
794 | template <class A, class T> |
795 | inline batch<T, A> sinh(batch<T, A> const& a, requires_arch<generic>) noexcept |
796 | { |
797 | using batch_type = batch<T, A>; |
798 | batch_type half(0.5); |
799 | batch_type x = abs(a); |
800 | auto lt1 = x < batch_type(1.); |
801 | batch_type bts = bitofsign(a); |
802 | batch_type z(0.); |
803 | if (any(lt1)) |
804 | { |
805 | z = detail::sinh_kernel(x); |
806 | if (all(lt1)) |
807 | return z ^ bts; |
808 | } |
809 | auto test1 = x > (constants::maxlog<batch_type>() - constants::log_2<batch_type>()); |
810 | batch_type fac = select(test1, half, batch_type(1.)); |
811 | batch_type tmp = exp(x * fac); |
812 | batch_type tmp1 = half * tmp; |
813 | batch_type r = select(test1, tmp1 * tmp, tmp1 - half / tmp); |
814 | return select(lt1, z, r) ^ bts; |
815 | } |
816 | template <class A, class T> |
817 | inline batch<std::complex<T>, A> sinh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept |
818 | { |
819 | auto x = z.real(); |
820 | auto y = z.imag(); |
821 | return { sinh(x) * cos(y), cosh(x) * sin(y) }; |
822 | } |
823 | |
824 | // tan |
825 | template <class A, class T> |
826 | inline batch<T, A> tan(batch<T, A> const& self, requires_arch<generic>) noexcept |
827 | { |
828 | using batch_type = batch<T, A>; |
829 | const batch_type x = abs(self); |
830 | batch_type xr = constants::nan<batch_type>(); |
831 | const batch_type n = detail::trigo_reducer<batch_type>::reduce(x, xr); |
832 | auto tmp = select(n >= batch_type(2.), batch_type(1.), batch_type(0.)); |
833 | auto swap_bit = fma(batch_type(-2.), tmp, n); |
834 | auto test = (swap_bit == batch_type(0.)); |
835 | const batch_type y = detail::tan_eval(xr, test); |
836 | return y ^ bitofsign(self); |
837 | } |
838 | template <class A, class T> |
839 | inline batch<std::complex<T>, A> tan(batch<std::complex<T>, A> const& z, requires_arch<generic>) noexcept |
840 | { |
841 | using batch_type = batch<std::complex<T>, A>; |
842 | using real_batch = typename batch_type::real_batch; |
843 | real_batch d = cos(2 * z.real()) + cosh(2 * z.imag()); |
844 | batch_type winf(constants::infinity<real_batch>(), constants::infinity<real_batch>()); |
845 | real_batch wreal = sin(2 * z.real()) / d; |
846 | real_batch wimag = sinh(2 * z.imag()); |
847 | batch_type wres = select(isinf(wimag), batch_type(wreal, real_batch(1.)), batch_type(wreal, wimag / d)); |
848 | return select(d == real_batch(0.), winf, wres); |
849 | } |
850 | |
851 | // tanh |
852 | namespace detail |
853 | { |
854 | /* origin: boost/simd/arch/common/detail/generic/tanh_kernel.hpp */ |
855 | /* |
856 | * ==================================================== |
857 | * copyright 2016 NumScale SAS |
858 | * |
859 | * Distributed under the Boost Software License, Version 1.0. |
860 | * (See copy at http://boost.org/LICENSE_1_0.txt) |
861 | * ==================================================== |
862 | */ |
863 | template <class B> |
864 | struct tanh_kernel; |
865 | |
866 | template <class A> |
867 | struct tanh_kernel<batch<float, A>> |
868 | { |
869 | using batch_type = batch<float, A>; |
870 | static inline batch_type tanh(const batch_type& x) noexcept |
871 | { |
872 | batch_type sqrx = x * x; |
873 | return fma(detail::horner<batch_type, |
874 | 0xbeaaaa99, // -3.33332819422E-1F |
875 | 0x3e088393, // +1.33314422036E-1F |
876 | 0xbd5c1e2d, // -5.37397155531E-2F |
877 | 0x3ca9134e, // +2.06390887954E-2F |
878 | 0xbbbaf0ea // -5.70498872745E-3F |
879 | >(sqrx) |
880 | * sqrx, |
881 | x, x); |
882 | } |
883 | |
884 | static inline batch_type cotanh(const batch_type& x) noexcept |
885 | { |
886 | return batch_type(1.) / tanh(x); |
887 | } |
888 | }; |
889 | |
890 | template <class A> |
891 | struct tanh_kernel<batch<double, A>> |
892 | { |
893 | using batch_type = batch<double, A>; |
894 | static inline batch_type tanh(const batch_type& x) noexcept |
895 | { |
896 | batch_type sqrx = x * x; |
897 | return fma(sqrx * p(x: sqrx) / q(x: sqrx), x, x); |
898 | } |
899 | |
900 | static inline batch_type cotanh(const batch_type& x) noexcept |
901 | { |
902 | batch_type sqrx = x * x; |
903 | batch_type qval = q(x: sqrx); |
904 | return qval / (x * fma(p(x: sqrx), sqrx, qval)); |
905 | } |
906 | |
907 | static inline batch_type p(const batch_type& x) noexcept |
908 | { |
909 | return detail::horner<batch_type, |
910 | 0xc0993ac030580563, // -1.61468768441708447952E3 |
911 | 0xc058d26a0e26682d, // -9.92877231001918586564E1, |
912 | 0xbfeedc5baafd6f4b // -9.64399179425052238628E-1 |
913 | >(x); |
914 | } |
915 | |
916 | static inline batch_type q(const batch_type& x) noexcept |
917 | { |
918 | return detail::horner1<batch_type, |
919 | 0x40b2ec102442040c, // 4.84406305325125486048E3 |
920 | 0x40a176fa0e5535fa, // 2.23548839060100448583E3, |
921 | 0x405c33f28a581B86 // 1.12811678491632931402E2, |
922 | >(x); |
923 | } |
924 | }; |
925 | |
926 | } |
927 | /* origin: boost/simd/arch/common/simd/function/tanh.hpp */ |
928 | /* |
929 | * ==================================================== |
930 | * copyright 2016 NumScale SAS |
931 | * |
932 | * Distributed under the Boost Software License, Version 1.0. |
933 | * (See copy at http://boost.org/LICENSE_1_0.txt) |
934 | * ==================================================== |
935 | */ |
936 | template <class A, class T> |
937 | inline batch<T, A> tanh(batch<T, A> const& self, requires_arch<generic>) noexcept |
938 | { |
939 | using batch_type = batch<T, A>; |
940 | batch_type one(1.); |
941 | batch_type x = abs(self); |
942 | auto test = x < (batch_type(5.) / batch_type(8.)); |
943 | batch_type bts = bitofsign(self); |
944 | batch_type z = one; |
945 | if (any(test)) |
946 | { |
947 | z = detail::tanh_kernel<batch_type>::tanh(x); |
948 | if (all(test)) |
949 | return z ^ bts; |
950 | } |
951 | batch_type r = fma(batch_type(-2.), one / (one + exp(x + x)), one); |
952 | return select(test, z, r) ^ bts; |
953 | } |
954 | template <class A, class T> |
955 | inline batch<std::complex<T>, A> tanh(const batch<std::complex<T>, A>& z, requires_arch<generic>) noexcept |
956 | { |
957 | using real_batch = typename batch<std::complex<T>, A>::real_batch; |
958 | auto x = z.real(); |
959 | auto y = z.imag(); |
960 | real_batch two(2); |
961 | auto d = cosh(two * x) + cos(two * y); |
962 | return { sinh(two * x) / d, sin(two * y) / d }; |
963 | } |
964 | |
965 | } |
966 | |
967 | } |
968 | |
969 | #endif |
970 | |