| 1 | /*************************************************************************** |
| 2 | * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * |
| 3 | * Martin Renou * |
| 4 | * Copyright (c) QuantStack * |
| 5 | * Copyright (c) Serge Guelton * |
| 6 | * * |
| 7 | * Distributed under the terms of the BSD 3-Clause License. * |
| 8 | * * |
| 9 | * The full license is in the file LICENSE, distributed with this software. * |
| 10 | ****************************************************************************/ |
| 11 | |
| 12 | #ifndef XSIMD_GENERIC_DETAILS_HPP |
| 13 | #define XSIMD_GENERIC_DETAILS_HPP |
| 14 | |
| 15 | #include <complex> |
| 16 | |
| 17 | #include "../../math/xsimd_rem_pio2.hpp" |
| 18 | #include "../../types/xsimd_generic_arch.hpp" |
| 19 | #include "../../types/xsimd_utils.hpp" |
| 20 | #include "../xsimd_constants.hpp" |
| 21 | |
| 22 | namespace xsimd |
| 23 | { |
| 24 | // Forward declaration. Should we put them in a separate file? |
| 25 | template <class T, class A> |
| 26 | inline batch<T, A> abs(batch<T, A> const& self) noexcept; |
| 27 | template <class T, class A> |
| 28 | inline batch<T, A> abs(batch<std::complex<T>, A> const& self) noexcept; |
| 29 | template <class T, class A> |
| 30 | inline bool any(batch_bool<T, A> const& self) noexcept; |
| 31 | template <class T, class A> |
| 32 | inline batch<T, A> atan2(batch<T, A> const& self, batch<T, A> const& other) noexcept; |
| 33 | template <class A, class T_out, class T_in> |
| 34 | inline batch<T_out, A> batch_cast(batch<T_in, A> const&, batch<T_out, A> const& out) noexcept; |
| 35 | template <class T, class A> |
| 36 | inline batch<T, A> bitofsign(batch<T, A> const& self) noexcept; |
| 37 | template <class B, class T, class A> |
| 38 | inline B bitwise_cast(batch<T, A> const& self) noexcept; |
| 39 | template <class T, class A> |
| 40 | inline batch<T, A> cos(batch<T, A> const& self) noexcept; |
| 41 | template <class T, class A> |
| 42 | inline batch<T, A> cosh(batch<T, A> const& self) noexcept; |
| 43 | template <class T, class A> |
| 44 | inline batch<T, A> exp(batch<T, A> const& self) noexcept; |
| 45 | template <class T, class A> |
| 46 | inline batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept; |
| 47 | template <class T, class A> |
| 48 | inline batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z) noexcept; |
| 49 | template <class T, class A> |
| 50 | inline batch<T, A> frexp(const batch<T, A>& x, const batch<as_integer_t<T>, A>& e) noexcept; |
| 51 | template <class T, class A, uint64_t... Coefs> |
| 52 | inline batch<T, A> horner(const batch<T, A>& self) noexcept; |
| 53 | template <class T, class A> |
| 54 | inline batch<T, A> hypot(const batch<T, A>& self) noexcept; |
| 55 | template <class T, class A> |
| 56 | inline batch_bool<T, A> is_even(batch<T, A> const& self) noexcept; |
| 57 | template <class T, class A> |
| 58 | inline batch_bool<T, A> is_flint(batch<T, A> const& self) noexcept; |
| 59 | template <class T, class A> |
| 60 | inline batch_bool<T, A> is_odd(batch<T, A> const& self) noexcept; |
| 61 | template <class T, class A> |
| 62 | inline batch_bool<T, A> isinf(batch<T, A> const& self) noexcept; |
| 63 | template <class T, class A> |
| 64 | inline typename batch<T, A>::batch_bool_type isnan(batch<T, A> const& self) noexcept; |
| 65 | template <class T, class A> |
| 66 | inline batch<T, A> ldexp(const batch<T, A>& x, const batch<as_integer_t<T>, A>& e) noexcept; |
| 67 | template <class T, class A> |
| 68 | inline batch<T, A> log(batch<T, A> const& self) noexcept; |
| 69 | template <class T, class A> |
| 70 | inline batch<T, A> nearbyint(batch<T, A> const& self) noexcept; |
| 71 | template <class T, class A> |
| 72 | inline batch<as_integer_t<T>, A> nearbyint_as_int(const batch<T, A>& x) noexcept; |
| 73 | template <class T, class A> |
| 74 | inline T reduce_add(batch<T, A> const&) noexcept; |
| 75 | template <class T, class A> |
| 76 | inline batch<T, A> select(batch_bool<T, A> const&, batch<T, A> const&, batch<T, A> const&) noexcept; |
| 77 | template <class T, class A> |
| 78 | inline batch<std::complex<T>, A> select(batch_bool<T, A> const&, batch<std::complex<T>, A> const&, batch<std::complex<T>, A> const&) noexcept; |
| 79 | template <class T, class A> |
| 80 | inline batch<T, A> sign(batch<T, A> const& self) noexcept; |
| 81 | template <class T, class A> |
| 82 | inline batch<T, A> signnz(batch<T, A> const& self) noexcept; |
| 83 | template <class T, class A> |
| 84 | inline batch<T, A> sin(batch<T, A> const& self) noexcept; |
| 85 | template <class T, class A> |
| 86 | inline batch<T, A> sinh(batch<T, A> const& self) noexcept; |
| 87 | template <class T, class A> |
| 88 | inline std::pair<batch<T, A>, batch<T, A>> sincos(batch<T, A> const& self) noexcept; |
| 89 | template <class T, class A> |
| 90 | inline batch<T, A> sqrt(batch<T, A> const& self) noexcept; |
| 91 | template <class T, class A> |
| 92 | inline batch<T, A> tan(batch<T, A> const& self) noexcept; |
| 93 | template <class T, class A> |
| 94 | inline batch<as_float_t<T>, A> to_float(batch<T, A> const& self) noexcept; |
| 95 | template <class T, class A> |
| 96 | inline batch<as_integer_t<T>, A> to_int(batch<T, A> const& self) noexcept; |
| 97 | template <class T, class A> |
| 98 | inline batch<T, A> trunc(batch<T, A> const& self) noexcept; |
| 99 | |
| 100 | namespace kernel |
| 101 | { |
| 102 | |
| 103 | namespace detail |
| 104 | { |
| 105 | template <class F, class A, class T, class... Batches> |
| 106 | inline batch<T, A> apply(F&& func, batch<T, A> const& self, batch<T, A> const& other) noexcept |
| 107 | { |
| 108 | constexpr std::size_t size = batch<T, A>::size; |
| 109 | alignas(A::alignment()) T self_buffer[size]; |
| 110 | alignas(A::alignment()) T other_buffer[size]; |
| 111 | self.store_aligned(&self_buffer[0]); |
| 112 | other.store_aligned(&other_buffer[0]); |
| 113 | for (std::size_t i = 0; i < size; ++i) |
| 114 | { |
| 115 | self_buffer[i] = func(self_buffer[i], other_buffer[i]); |
| 116 | } |
| 117 | return batch<T, A>::load_aligned(self_buffer); |
| 118 | } |
| 119 | |
| 120 | template <class U, class F, class A, class T> |
| 121 | inline batch<U, A> apply_transform(F&& func, batch<T, A> const& self) noexcept |
| 122 | { |
| 123 | static_assert(batch<T, A>::size == batch<U, A>::size, |
| 124 | "Source and destination sizes must match" ); |
| 125 | constexpr std::size_t src_size = batch<T, A>::size; |
| 126 | constexpr std::size_t dest_size = batch<U, A>::size; |
| 127 | alignas(A::alignment()) T self_buffer[src_size]; |
| 128 | alignas(A::alignment()) U other_buffer[dest_size]; |
| 129 | self.store_aligned(&self_buffer[0]); |
| 130 | for (std::size_t i = 0; i < src_size; ++i) |
| 131 | { |
| 132 | other_buffer[i] = func(self_buffer[i]); |
| 133 | } |
| 134 | return batch<U, A>::load_aligned(other_buffer); |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | namespace detail |
| 139 | { |
| 140 | // Generic conversion handling machinery. Each architecture must define |
| 141 | // conversion function when such conversions exits in the form of |
| 142 | // intrinsic. Then we use that information to automatically decide whether |
| 143 | // to use scalar or vector conversion when doing load / store / batch_cast |
| 144 | struct with_fast_conversion |
| 145 | { |
| 146 | }; |
| 147 | struct with_slow_conversion |
| 148 | { |
| 149 | }; |
| 150 | |
| 151 | template <class A, class From, class To, class = void> |
| 152 | struct conversion_type_impl |
| 153 | { |
| 154 | using type = with_slow_conversion; |
| 155 | }; |
| 156 | |
| 157 | using xsimd::detail::void_t; |
| 158 | |
| 159 | template <class A, class From, class To> |
| 160 | struct conversion_type_impl<A, From, To, |
| 161 | void_t<decltype(fast_cast(std::declval<const batch<From, A>&>(), |
| 162 | std::declval<const batch<To, A>&>(), |
| 163 | std::declval<const A&>()))>> |
| 164 | { |
| 165 | using type = with_fast_conversion; |
| 166 | }; |
| 167 | |
| 168 | template <class A, class From, class To> |
| 169 | using conversion_type = typename conversion_type_impl<A, From, To>::type; |
| 170 | } |
| 171 | |
| 172 | namespace detail |
| 173 | { |
| 174 | /* origin: boost/simdfunction/horn.hpp*/ |
| 175 | /* |
| 176 | * ==================================================== |
| 177 | * copyright 2016 NumScale SAS |
| 178 | * |
| 179 | * Distributed under the Boost Software License, Version 1.0. |
| 180 | * (See copy at http://boost.org/LICENSE_1_0.txt) |
| 181 | * ==================================================== |
| 182 | */ |
| 183 | template <class B, uint64_t c> |
| 184 | inline B coef() noexcept |
| 185 | { |
| 186 | using value_type = typename B::value_type; |
| 187 | return B(bit_cast<value_type>(as_unsigned_integer_t<value_type>(c))); |
| 188 | } |
| 189 | template <class B> |
| 190 | inline B horner(const B&) noexcept |
| 191 | { |
| 192 | return B(typename B::value_type(0.)); |
| 193 | } |
| 194 | |
| 195 | template <class B, uint64_t c0> |
| 196 | inline B horner(const B&) noexcept |
| 197 | { |
| 198 | return coef<B, c0>(); |
| 199 | } |
| 200 | |
| 201 | template <class B, uint64_t c0, uint64_t c1, uint64_t... args> |
| 202 | inline B horner(const B& self) noexcept |
| 203 | { |
| 204 | return fma(self, horner<B, c1, args...>(self), coef<B, c0>()); |
| 205 | } |
| 206 | |
| 207 | /* origin: boost/simdfunction/horn1.hpp*/ |
| 208 | /* |
| 209 | * ==================================================== |
| 210 | * copyright 2016 NumScale SAS |
| 211 | * |
| 212 | * Distributed under the Boost Software License, Version 1.0. |
| 213 | * (See copy at http://boost.org/LICENSE_1_0.txt) |
| 214 | * ==================================================== |
| 215 | */ |
| 216 | template <class B> |
| 217 | inline B horner1(const B&) noexcept |
| 218 | { |
| 219 | return B(1.); |
| 220 | } |
| 221 | |
| 222 | template <class B, uint64_t c0> |
| 223 | inline B horner1(const B& x) noexcept |
| 224 | { |
| 225 | return x + detail::coef<B, c0>(); |
| 226 | } |
| 227 | |
| 228 | template <class B, uint64_t c0, uint64_t c1, uint64_t... args> |
| 229 | inline B horner1(const B& x) noexcept |
| 230 | { |
| 231 | return fma(x, horner1<B, c1, args...>(x), detail::coef<B, c0>()); |
| 232 | } |
| 233 | } |
| 234 | |
| 235 | } |
| 236 | |
| 237 | } |
| 238 | |
| 239 | #endif |
| 240 | |