2 * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3 * Martin Renou *
4 * Copyright (c) QuantStack *
5 * Copyright (c) Serge Guelton *
6 * *
7 * Distributed under the terms of the BSD 3-Clause License. *
8 * *
9 * The full license is in the file LICENSE, distributed with this software. *
10 ****************************************************************************/
15#include <complex>
17#include "../../math/xsimd_rem_pio2.hpp"
18#include "../../types/xsimd_generic_arch.hpp"
19#include "../../types/xsimd_utils.hpp"
20#include "../xsimd_constants.hpp"
22namespace xsimd
24 // Forward declaration. Should we put them in a separate file?
25 template <class T, class A>
26 inline batch<T, A> abs(batch<T, A> const& self) noexcept;
27 template <class T, class A>
28 inline batch<T, A> abs(batch<std::complex<T>, A> const& self) noexcept;
29 template <class T, class A>
30 inline bool any(batch_bool<T, A> const& self) noexcept;
31 template <class T, class A>
32 inline batch<T, A> atan2(batch<T, A> const& self, batch<T, A> const& other) noexcept;
33 template <class A, class T_out, class T_in>
34 inline batch<T_out, A> batch_cast(batch<T_in, A> const&, batch<T_out, A> const& out) noexcept;
35 template <class T, class A>
36 inline batch<T, A> bitofsign(batch<T, A> const& self) noexcept;
37 template <class B, class T, class A>
38 inline B bitwise_cast(batch<T, A> const& self) noexcept;
39 template <class T, class A>
40 inline batch<T, A> cos(batch<T, A> const& self) noexcept;
41 template <class T, class A>
42 inline batch<T, A> cosh(batch<T, A> const& self) noexcept;
43 template <class T, class A>
44 inline batch<T, A> exp(batch<T, A> const& self) noexcept;
45 template <class T, class A>
46 inline batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept;
47 template <class T, class A>
48 inline batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept;
49 template <class T, class A>
50 inline batch<T, A> frexp(const batch<T, A>& x, const batch<as_integer_t<T>, A>& e) noexcept;
51 template <class T, class A, uint64_t... Coefs>
52 inline batch<T, A> horner(const batch<T, A>& self) noexcept;
53 template <class T, class A>
54 inline batch<T, A> hypot(const batch<T, A>& self) noexcept;
55 template <class T, class A>
56 inline batch_bool<T, A> is_even(batch<T, A> const& self) noexcept;
57 template <class T, class A>
58 inline batch_bool<T, A> is_flint(batch<T, A> const& self) noexcept;
59 template <class T, class A>
60 inline batch_bool<T, A> is_odd(batch<T, A> const& self) noexcept;
61 template <class T, class A>
62 inline batch_bool<T, A> isinf(batch<T, A> const& self) noexcept;
63 template <class T, class A>
64 inline typename batch<T, A>::batch_bool_type isnan(batch<T, A> const& self) noexcept;
65 template <class T, class A>
66 inline batch<T, A> ldexp(const batch<T, A>& x, const batch<as_integer_t<T>, A>& e) noexcept;
67 template <class T, class A>
68 inline batch<T, A> log(batch<T, A> const& self) noexcept;
69 template <class T, class A>
70 inline batch<T, A> nearbyint(batch<T, A> const& self) noexcept;
71 template <class T, class A>
72 inline batch<as_integer_t<T>, A> nearbyint_as_int(const batch<T, A>& x) noexcept;
73 template <class T, class A>
74 inline T reduce_add(batch<T, A> const&) noexcept;
75 template <class T, class A>
76 inline batch<T, A> select(batch_bool<T, A> const&, batch<T, A> const&, batch<T, A> const&) noexcept;
77 template <class T, class A>
78 inline batch<std::complex<T>, A> select(batch_bool<T, A> const&, batch<std::complex<T>, A> const&, batch<std::complex<T>, A> const&) noexcept;
79 template <class T, class A>
80 inline batch<T, A> sign(batch<T, A> const& self) noexcept;
81 template <class T, class A>
82 inline batch<T, A> signnz(batch<T, A> const& self) noexcept;
83 template <class T, class A>
84 inline batch<T, A> sin(batch<T, A> const& self) noexcept;
85 template <class T, class A>
86 inline batch<T, A> sinh(batch<T, A> const& self) noexcept;
87 template <class T, class A>
88 inline std::pair<batch<T, A>, batch<T, A>> sincos(batch<T, A> const& self) noexcept;
89 template <class T, class A>
90 inline batch<T, A> sqrt(batch<T, A> const& self) noexcept;
91 template <class T, class A>
92 inline batch<T, A> tan(batch<T, A> const& self) noexcept;
93 template <class T, class A>
94 inline batch<as_float_t<T>, A> to_float(batch<T, A> const& self) noexcept;
95 template <class T, class A>
96 inline batch<as_integer_t<T>, A> to_int(batch<T, A> const& self) noexcept;
97 template <class T, class A>
98 inline batch<T, A> trunc(batch<T, A> const& self) noexcept;
100 namespace kernel
101 {
103 namespace detail
104 {
105 template <class F, class A, class T, class... Batches>
106 inline batch<T, A> apply(F&& func, batch<T, A> const& self, batch<T, A> const& other) noexcept
107 {
108 constexpr std::size_t size = batch<T, A>::size;
109 alignas(A::alignment()) T self_buffer[size];
110 alignas(A::alignment()) T other_buffer[size];
111 self.store_aligned(&self_buffer[0]);
112 other.store_aligned(&other_buffer[0]);
113 for (std::size_t i = 0; i < size; ++i)
114 {
115 self_buffer[i] = func(self_buffer[i], other_buffer[i]);
116 }
117 return batch<T, A>::load_aligned(self_buffer);
118 }
120 template <class U, class F, class A, class T>
121 inline batch<U, A> apply_transform(F&& func, batch<T, A> const& self) noexcept
122 {
123 static_assert(batch<T, A>::size == batch<U, A>::size,
124 "Source and destination sizes must match");
125 constexpr std::size_t src_size = batch<T, A>::size;
126 constexpr std::size_t dest_size = batch<U, A>::size;
127 alignas(A::alignment()) T self_buffer[src_size];
128 alignas(A::alignment()) U other_buffer[dest_size];
129 self.store_aligned(&self_buffer[0]);
130 for (std::size_t i = 0; i < src_size; ++i)
131 {
132 other_buffer[i] = func(self_buffer[i]);
133 }
134 return batch<U, A>::load_aligned(other_buffer);
135 }
136 }
138 namespace detail
139 {
140 // Generic conversion handling machinery. Each architecture must define
141 // conversion function when such conversions exits in the form of
142 // intrinsic. Then we use that information to automatically decide whether
143 // to use scalar or vector conversion when doing load / store / batch_cast
144 struct with_fast_conversion
145 {
146 };
147 struct with_slow_conversion
148 {
149 };
151 template <class A, class From, class To, class = void>
152 struct conversion_type_impl
153 {
154 using type = with_slow_conversion;
155 };
157 using xsimd::detail::void_t;
159 template <class A, class From, class To>
160 struct conversion_type_impl<A, From, To,
161 void_t<decltype(fast_cast(std::declval<const batch<From, A>&>(),
162 std::declval<const batch<To, A>&>(),
163 std::declval<const A&>()))>>
164 {
165 using type = with_fast_conversion;
166 };
168 template <class A, class From, class To>
169 using conversion_type = typename conversion_type_impl<A, From, To>::type;
170 }
172 namespace detail
173 {
174 /* origin: boost/simdfunction/horn.hpp*/
175 /*
176 * ====================================================
177 * copyright 2016 NumScale SAS
178 *
179 * Distributed under the Boost Software License, Version 1.0.
180 * (See copy at http://boost.org/LICENSE_1_0.txt)
181 * ====================================================
182 */
183 template <class B, uint64_t c>
184 inline B coef() noexcept
185 {
186 using value_type = typename B::value_type;
187 return B(bit_cast<value_type>(as_unsigned_integer_t<value_type>(c)));
188 }
189 template <class B>
190 inline B horner(const B&) noexcept
191 {
192 return B(typename B::value_type(0.));
193 }
195 template <class B, uint64_t c0>
196 inline B horner(const B&) noexcept
197 {
198 return coef<B, c0>();
199 }
201 template <class B, uint64_t c0, uint64_t c1, uint64_t... args>
202 inline B horner(const B& self) noexcept
203 {
204 return fma(self, horner<B, c1, args...>(self), coef<B, c0>());
205 }
207 /* origin: boost/simdfunction/horn1.hpp*/
208 /*
209 * ====================================================
210 * copyright 2016 NumScale SAS
211 *
212 * Distributed under the Boost Software License, Version 1.0.
213 * (See copy at http://boost.org/LICENSE_1_0.txt)
214 * ====================================================
215 */
216 template <class B>
217 inline B horner1(const B&) noexcept
218 {
219 return B(1.);
220 }
222 template <class B, uint64_t c0>
223 inline B horner1(const B& x) noexcept
224 {
225 return x + detail::coef<B, c0>();
226 }
228 template <class B, uint64_t c0, uint64_t c1, uint64_t... args>
229 inline B horner1(const B& x) noexcept
230 {
231 return fma(x, horner1<B, c1, args...>(x), detail::coef<B, c0>());
232 }
233 }
235 }