1 | /*************************************************************************** |
2 | * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * |
3 | * Martin Renou * |
4 | * Copyright (c) QuantStack * |
5 | * Copyright (c) Serge Guelton * |
6 | * * |
7 | * Distributed under the terms of the BSD 3-Clause License. * |
8 | * * |
9 | * The full license is in the file LICENSE, distributed with this software. * |
10 | ****************************************************************************/ |
11 | |
12 | #ifndef XSIMD_GENERIC_DETAILS_HPP |
13 | #define XSIMD_GENERIC_DETAILS_HPP |
14 | |
15 | #include <complex> |
16 | |
17 | #include "../../math/xsimd_rem_pio2.hpp" |
18 | #include "../../types/xsimd_generic_arch.hpp" |
19 | #include "../../types/xsimd_utils.hpp" |
20 | #include "../xsimd_constants.hpp" |
21 | |
22 | namespace xsimd |
23 | { |
24 | // Forward declaration. Should we put them in a separate file? |
25 | template <class T, class A> |
26 | inline batch<T, A> abs(batch<T, A> const& self) noexcept; |
27 | template <class T, class A> |
28 | inline batch<T, A> abs(batch<std::complex<T>, A> const& self) noexcept; |
29 | template <class T, class A> |
30 | inline bool any(batch_bool<T, A> const& self) noexcept; |
31 | template <class T, class A> |
32 | inline batch<T, A> atan2(batch<T, A> const& self, batch<T, A> const& other) noexcept; |
33 | template <class A, class T_out, class T_in> |
34 | inline batch<T_out, A> batch_cast(batch<T_in, A> const&, batch<T_out, A> const& out) noexcept; |
35 | template <class T, class A> |
36 | inline batch<T, A> bitofsign(batch<T, A> const& self) noexcept; |
37 | template <class B, class T, class A> |
38 | inline B bitwise_cast(batch<T, A> const& self) noexcept; |
39 | template <class T, class A> |
40 | inline batch<T, A> cos(batch<T, A> const& self) noexcept; |
41 | template <class T, class A> |
42 | inline batch<T, A> cosh(batch<T, A> const& self) noexcept; |
43 | template <class T, class A> |
44 | inline batch<T, A> exp(batch<T, A> const& self) noexcept; |
45 | template <class T, class A> |
46 | inline batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept; |
47 | template <class T, class A> |
48 | inline batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept; |
49 | template <class T, class A> |
50 | inline batch<T, A> frexp(const batch<T, A>& x, const batch<as_integer_t<T>, A>& e) noexcept; |
51 | template <class T, class A, uint64_t... Coefs> |
52 | inline batch<T, A> horner(const batch<T, A>& self) noexcept; |
53 | template <class T, class A> |
54 | inline batch<T, A> hypot(const batch<T, A>& self) noexcept; |
55 | template <class T, class A> |
56 | inline batch_bool<T, A> is_even(batch<T, A> const& self) noexcept; |
57 | template <class T, class A> |
58 | inline batch_bool<T, A> is_flint(batch<T, A> const& self) noexcept; |
59 | template <class T, class A> |
60 | inline batch_bool<T, A> is_odd(batch<T, A> const& self) noexcept; |
61 | template <class T, class A> |
62 | inline batch_bool<T, A> isinf(batch<T, A> const& self) noexcept; |
63 | template <class T, class A> |
64 | inline typename batch<T, A>::batch_bool_type isnan(batch<T, A> const& self) noexcept; |
65 | template <class T, class A> |
66 | inline batch<T, A> ldexp(const batch<T, A>& x, const batch<as_integer_t<T>, A>& e) noexcept; |
67 | template <class T, class A> |
68 | inline batch<T, A> log(batch<T, A> const& self) noexcept; |
69 | template <class T, class A> |
70 | inline batch<T, A> nearbyint(batch<T, A> const& self) noexcept; |
71 | template <class T, class A> |
72 | inline batch<as_integer_t<T>, A> nearbyint_as_int(const batch<T, A>& x) noexcept; |
73 | template <class T, class A> |
74 | inline T reduce_add(batch<T, A> const&) noexcept; |
75 | template <class T, class A> |
76 | inline batch<T, A> select(batch_bool<T, A> const&, batch<T, A> const&, batch<T, A> const&) noexcept; |
77 | template <class T, class A> |
78 | inline batch<std::complex<T>, A> select(batch_bool<T, A> const&, batch<std::complex<T>, A> const&, batch<std::complex<T>, A> const&) noexcept; |
79 | template <class T, class A> |
80 | inline batch<T, A> sign(batch<T, A> const& self) noexcept; |
81 | template <class T, class A> |
82 | inline batch<T, A> signnz(batch<T, A> const& self) noexcept; |
83 | template <class T, class A> |
84 | inline batch<T, A> sin(batch<T, A> const& self) noexcept; |
85 | template <class T, class A> |
86 | inline batch<T, A> sinh(batch<T, A> const& self) noexcept; |
87 | template <class T, class A> |
88 | inline std::pair<batch<T, A>, batch<T, A>> sincos(batch<T, A> const& self) noexcept; |
89 | template <class T, class A> |
90 | inline batch<T, A> sqrt(batch<T, A> const& self) noexcept; |
91 | template <class T, class A> |
92 | inline batch<T, A> tan(batch<T, A> const& self) noexcept; |
93 | template <class T, class A> |
94 | inline batch<as_float_t<T>, A> to_float(batch<T, A> const& self) noexcept; |
95 | template <class T, class A> |
96 | inline batch<as_integer_t<T>, A> to_int(batch<T, A> const& self) noexcept; |
97 | template <class T, class A> |
98 | inline batch<T, A> trunc(batch<T, A> const& self) noexcept; |
99 | |
100 | namespace kernel |
101 | { |
102 | |
103 | namespace detail |
104 | { |
105 | template <class F, class A, class T, class... Batches> |
106 | inline batch<T, A> apply(F&& func, batch<T, A> const& self, batch<T, A> const& other) noexcept |
107 | { |
108 | constexpr std::size_t size = batch<T, A>::size; |
109 | alignas(A::alignment()) T self_buffer[size]; |
110 | alignas(A::alignment()) T other_buffer[size]; |
111 | self.store_aligned(&self_buffer[0]); |
112 | other.store_aligned(&other_buffer[0]); |
113 | for (std::size_t i = 0; i < size; ++i) |
114 | { |
115 | self_buffer[i] = func(self_buffer[i], other_buffer[i]); |
116 | } |
117 | return batch<T, A>::load_aligned(self_buffer); |
118 | } |
119 | |
120 | template <class U, class F, class A, class T> |
121 | inline batch<U, A> apply_transform(F&& func, batch<T, A> const& self) noexcept |
122 | { |
123 | static_assert(batch<T, A>::size == batch<U, A>::size, |
124 | "Source and destination sizes must match" ); |
125 | constexpr std::size_t src_size = batch<T, A>::size; |
126 | constexpr std::size_t dest_size = batch<U, A>::size; |
127 | alignas(A::alignment()) T self_buffer[src_size]; |
128 | alignas(A::alignment()) U other_buffer[dest_size]; |
129 | self.store_aligned(&self_buffer[0]); |
130 | for (std::size_t i = 0; i < src_size; ++i) |
131 | { |
132 | other_buffer[i] = func(self_buffer[i]); |
133 | } |
134 | return batch<U, A>::load_aligned(other_buffer); |
135 | } |
136 | } |
137 | |
138 | namespace detail |
139 | { |
140 | // Generic conversion handling machinery. Each architecture must define |
141 | // conversion function when such conversions exits in the form of |
142 | // intrinsic. Then we use that information to automatically decide whether |
143 | // to use scalar or vector conversion when doing load / store / batch_cast |
144 | struct with_fast_conversion |
145 | { |
146 | }; |
147 | struct with_slow_conversion |
148 | { |
149 | }; |
150 | |
151 | template <class A, class From, class To, class = void> |
152 | struct conversion_type_impl |
153 | { |
154 | using type = with_slow_conversion; |
155 | }; |
156 | |
157 | using xsimd::detail::void_t; |
158 | |
159 | template <class A, class From, class To> |
160 | struct conversion_type_impl<A, From, To, |
161 | void_t<decltype(fast_cast(std::declval<const batch<From, A>&>(), |
162 | std::declval<const batch<To, A>&>(), |
163 | std::declval<const A&>()))>> |
164 | { |
165 | using type = with_fast_conversion; |
166 | }; |
167 | |
168 | template <class A, class From, class To> |
169 | using conversion_type = typename conversion_type_impl<A, From, To>::type; |
170 | } |
171 | |
172 | namespace detail |
173 | { |
174 | /* origin: boost/simdfunction/horn.hpp*/ |
175 | /* |
176 | * ==================================================== |
177 | * copyright 2016 NumScale SAS |
178 | * |
179 | * Distributed under the Boost Software License, Version 1.0. |
180 | * (See copy at http://boost.org/LICENSE_1_0.txt) |
181 | * ==================================================== |
182 | */ |
183 | template <class B, uint64_t c> |
184 | inline B coef() noexcept |
185 | { |
186 | using value_type = typename B::value_type; |
187 | return B(bit_cast<value_type>(as_unsigned_integer_t<value_type>(c))); |
188 | } |
189 | template <class B> |
190 | inline B horner(const B&) noexcept |
191 | { |
192 | return B(typename B::value_type(0.)); |
193 | } |
194 | |
195 | template <class B, uint64_t c0> |
196 | inline B horner(const B&) noexcept |
197 | { |
198 | return coef<B, c0>(); |
199 | } |
200 | |
201 | template <class B, uint64_t c0, uint64_t c1, uint64_t... args> |
202 | inline B horner(const B& self) noexcept |
203 | { |
204 | return fma(self, horner<B, c1, args...>(self), coef<B, c0>()); |
205 | } |
206 | |
207 | /* origin: boost/simdfunction/horn1.hpp*/ |
208 | /* |
209 | * ==================================================== |
210 | * copyright 2016 NumScale SAS |
211 | * |
212 | * Distributed under the Boost Software License, Version 1.0. |
213 | * (See copy at http://boost.org/LICENSE_1_0.txt) |
214 | * ==================================================== |
215 | */ |
216 | template <class B> |
217 | inline B horner1(const B&) noexcept |
218 | { |
219 | return B(1.); |
220 | } |
221 | |
222 | template <class B, uint64_t c0> |
223 | inline B horner1(const B& x) noexcept |
224 | { |
225 | return x + detail::coef<B, c0>(); |
226 | } |
227 | |
228 | template <class B, uint64_t c0, uint64_t c1, uint64_t... args> |
229 | inline B horner1(const B& x) noexcept |
230 | { |
231 | return fma(x, horner1<B, c1, args...>(x), detail::coef<B, c0>()); |
232 | } |
233 | } |
234 | |
235 | } |
236 | |
237 | } |
238 | |
239 | #endif |
240 | |