| 1 | /*************************************************************************** |
| 2 | * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * |
| 3 | * Martin Renou * |
| 4 | * Copyright (c) QuantStack * |
| 5 | * Copyright (c) Serge Guelton * |
| 6 | * * |
| 7 | * Distributed under the terms of the BSD 3-Clause License. * |
| 8 | * * |
| 9 | * The full license is in the file LICENSE, distributed with this software. * |
| 10 | ****************************************************************************/ |
| 11 | |
| 12 | #ifndef XSIMD_ARCH_HPP |
| 13 | #define XSIMD_ARCH_HPP |
| 14 | |
| 15 | #include <initializer_list> |
| 16 | #include <type_traits> |
| 17 | #include <utility> |
| 18 | |
| 19 | #include "../types/xsimd_all_registers.hpp" |
| 20 | #include "./xsimd_config.hpp" |
| 21 | #include "./xsimd_cpuid.hpp" |
| 22 | |
| 23 | namespace xsimd |
| 24 | { |
| 25 | |
| 26 | namespace detail |
| 27 | { |
| 28 | // Checks whether T appears in Tys. |
| 29 | template <class T, class... Tys> |
| 30 | struct contains; |
| 31 | |
| 32 | template <class T> |
| 33 | struct contains<T> : std::false_type |
| 34 | { |
| 35 | }; |
| 36 | |
| 37 | template <class T, class Ty, class... Tys> |
| 38 | struct contains<T, Ty, Tys...> |
| 39 | : std::conditional<std::is_same<Ty, T>::value, std::true_type, |
| 40 | contains<T, Tys...>>::type |
| 41 | { |
| 42 | }; |
| 43 | |
| 44 | template <class... Archs> |
| 45 | struct is_sorted; |
| 46 | |
| 47 | template <> |
| 48 | struct is_sorted<> : std::true_type |
| 49 | { |
| 50 | }; |
| 51 | |
| 52 | template <class Arch> |
| 53 | struct is_sorted<Arch> : std::true_type |
| 54 | { |
| 55 | }; |
| 56 | |
| 57 | template <class A0, class A1, class... Archs> |
| 58 | struct is_sorted<A0, A1, Archs...> |
| 59 | : std::conditional<(A0::version() >= A1::version()), is_sorted<Archs...>, |
| 60 | std::false_type>::type |
| 61 | { |
| 62 | }; |
| 63 | |
| 64 | template <typename T> |
| 65 | inline constexpr T max_of(T value) noexcept |
| 66 | { |
| 67 | return value; |
| 68 | } |
| 69 | |
| 70 | template <typename T, typename... Ts> |
| 71 | inline constexpr T max_of(T head0, T head1, Ts... tail) noexcept |
| 72 | { |
| 73 | return max_of((head0 > head1 ? head0 : head1), tail...); |
| 74 | } |
| 75 | |
| 76 | } // namespace detail |
| 77 | |
| 78 | // An arch_list is a list of architectures, sorted by version number. |
| 79 | template <class... Archs> |
| 80 | struct arch_list |
| 81 | { |
| 82 | #ifndef NDEBUG |
| 83 | static_assert(detail::is_sorted<Archs...>::value, |
| 84 | "architecture list must be sorted by version" ); |
| 85 | #endif |
| 86 | |
| 87 | template <class Arch> |
| 88 | using add = arch_list<Archs..., Arch>; |
| 89 | |
| 90 | template <class... OtherArchs> |
| 91 | using extend = arch_list<Archs..., OtherArchs...>; |
| 92 | |
| 93 | template <class Arch> |
| 94 | static constexpr bool contains() noexcept |
| 95 | { |
| 96 | return detail::contains<Arch, Archs...>::value; |
| 97 | } |
| 98 | |
| 99 | template <class F> |
| 100 | static void for_each(F&& f) noexcept |
| 101 | { |
| 102 | (void)std::initializer_list<bool> { (f(Archs {}), true)... }; |
| 103 | } |
| 104 | |
| 105 | static constexpr std::size_t alignment() noexcept |
| 106 | { |
| 107 | // all alignments are a power of two |
| 108 | return detail::max_of(Archs::alignment()..., static_cast<size_t>(0)); |
| 109 | } |
| 110 | }; |
| 111 | |
| 112 | struct unavailable |
| 113 | { |
| 114 | static constexpr bool supported() noexcept { return false; } |
| 115 | static constexpr bool available() noexcept { return false; } |
| 116 | static constexpr unsigned version() noexcept { return 0; } |
| 117 | static constexpr std::size_t alignment() noexcept { return 0; } |
| 118 | static constexpr bool requires_alignment() noexcept { return false; } |
| 119 | static constexpr char const* name() noexcept { return "<none>" ; } |
| 120 | }; |
| 121 | |
| 122 | namespace detail |
| 123 | { |
| 124 | // Pick the best architecture in arch_list L, which is the last |
| 125 | // because architectures are sorted by version. |
| 126 | template <class L> |
| 127 | struct best; |
| 128 | |
| 129 | template <> |
| 130 | struct best<arch_list<>> |
| 131 | { |
| 132 | using type = unavailable; |
| 133 | }; |
| 134 | |
| 135 | template <class Arch, class... Archs> |
| 136 | struct best<arch_list<Arch, Archs...>> |
| 137 | { |
| 138 | using type = Arch; |
| 139 | }; |
| 140 | |
| 141 | // Filter archlists Archs, picking only supported archs and adding |
| 142 | // them to L. |
| 143 | template <class L, class... Archs> |
| 144 | struct supported_helper; |
| 145 | |
| 146 | template <class L> |
| 147 | struct supported_helper<L, arch_list<>> |
| 148 | { |
| 149 | using type = L; |
| 150 | }; |
| 151 | |
| 152 | template <class L, class Arch, class... Archs> |
| 153 | struct supported_helper<L, arch_list<Arch, Archs...>> |
| 154 | : supported_helper< |
| 155 | typename std::conditional<Arch::supported(), |
| 156 | typename L::template add<Arch>, L>::type, |
| 157 | arch_list<Archs...>> |
| 158 | { |
| 159 | }; |
| 160 | |
| 161 | template <class... Archs> |
| 162 | struct supported : supported_helper<arch_list<>, Archs...> |
| 163 | { |
| 164 | }; |
| 165 | |
| 166 | // Joins all arch_list Archs in a single arch_list. |
| 167 | template <class... Archs> |
| 168 | struct join; |
| 169 | |
| 170 | template <class Arch> |
| 171 | struct join<Arch> |
| 172 | { |
| 173 | using type = Arch; |
| 174 | }; |
| 175 | |
| 176 | template <class Arch, class... Archs, class... Args> |
| 177 | struct join<Arch, arch_list<Archs...>, Args...> |
| 178 | : join<typename Arch::template extend<Archs...>, Args...> |
| 179 | { |
| 180 | }; |
| 181 | } // namespace detail |
| 182 | |
| 183 | struct unsupported |
| 184 | { |
| 185 | }; |
| 186 | using all_x86_architectures = arch_list<avx512bw, avx512dq, avx512cd, avx512f, fma3<avx2>, avx2, fma3<avx>, avx, fma4, fma3<sse4_2>, sse4_2, sse4_1, /*sse4a,*/ ssse3, sse3, sse2>; |
| 187 | using all_sve_architectures = arch_list<detail::sve<512>, detail::sve<256>, detail::sve<128>>; |
| 188 | using all_arm_architectures = typename detail::join<all_sve_architectures, arch_list<neon64, neon>>::type; |
| 189 | using all_architectures = typename detail::join<all_arm_architectures, all_x86_architectures>::type; |
| 190 | |
| 191 | using supported_architectures = typename detail::supported<all_architectures>::type; |
| 192 | |
| 193 | using x86_arch = typename detail::best<typename detail::supported<all_x86_architectures>::type>::type; |
| 194 | using arm_arch = typename detail::best<typename detail::supported<all_arm_architectures>::type>::type; |
| 195 | // using default_arch = typename detail::best<typename detail::supported<arch_list</*arm_arch,*/ x86_arch>>::type>::type; |
| 196 | using default_arch = typename std::conditional<std::is_same<x86_arch, unavailable>::value, |
| 197 | arm_arch, |
| 198 | x86_arch>::type; |
| 199 | |
| 200 | namespace detail |
| 201 | { |
| 202 | template <class F, class ArchList> |
| 203 | class dispatcher |
| 204 | { |
| 205 | |
| 206 | const unsigned best_arch; |
| 207 | F functor; |
| 208 | |
| 209 | template <class Arch, class... Tys> |
| 210 | auto walk_archs(arch_list<Arch>, Tys&&... args) noexcept -> decltype(functor(Arch {}, std::forward<Tys>(args)...)) |
| 211 | { |
| 212 | assert(Arch::available() && "At least one arch must be supported during dispatch" ); |
| 213 | return functor(Arch {}, std::forward<Tys>(args)...); |
| 214 | } |
| 215 | |
| 216 | template <class Arch, class ArchNext, class... Archs, class... Tys> |
| 217 | auto walk_archs(arch_list<Arch, ArchNext, Archs...>, Tys&&... args) noexcept -> decltype(functor(Arch {}, std::forward<Tys>(args)...)) |
| 218 | { |
| 219 | if (Arch::version() <= best_arch) |
| 220 | return functor(Arch {}, std::forward<Tys>(args)...); |
| 221 | else |
| 222 | return walk_archs(arch_list<ArchNext, Archs...> {}, std::forward<Tys>(args)...); |
| 223 | } |
| 224 | |
| 225 | public: |
| 226 | dispatcher(F f) noexcept |
| 227 | : best_arch(available_architectures().best) |
| 228 | , functor(f) |
| 229 | { |
| 230 | } |
| 231 | |
| 232 | template <class... Tys> |
| 233 | auto operator()(Tys&&... args) noexcept -> decltype(functor(default_arch {}, std::forward<Tys>(args)...)) |
| 234 | { |
| 235 | return walk_archs(ArchList {}, std::forward<Tys>(args)...); |
| 236 | } |
| 237 | }; |
| 238 | } |
| 239 | |
| 240 | // Generic function dispatch, à la ifunc |
| 241 | template <class ArchList = supported_architectures, class F> |
| 242 | inline detail::dispatcher<F, ArchList> dispatch(F&& f) noexcept |
| 243 | { |
| 244 | return { std::forward<F>(f) }; |
| 245 | } |
| 246 | |
| 247 | } // namespace xsimd |
| 248 | |
| 249 | #endif |
| 250 | |