1 | /*************************************************************************** |
2 | * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * |
3 | * Martin Renou * |
4 | * Copyright (c) QuantStack * |
5 | * Copyright (c) Serge Guelton * |
6 | * * |
7 | * Distributed under the terms of the BSD 3-Clause License. * |
8 | * * |
9 | * The full license is in the file LICENSE, distributed with this software. * |
10 | ****************************************************************************/ |
11 | |
12 | #ifndef XSIMD_ARCH_HPP |
13 | #define XSIMD_ARCH_HPP |
14 | |
15 | #include <initializer_list> |
16 | #include <type_traits> |
17 | #include <utility> |
18 | |
19 | #include "../types/xsimd_all_registers.hpp" |
20 | #include "./xsimd_config.hpp" |
21 | #include "./xsimd_cpuid.hpp" |
22 | |
23 | namespace xsimd |
24 | { |
25 | |
26 | namespace detail |
27 | { |
28 | // Checks whether T appears in Tys. |
29 | template <class T, class... Tys> |
30 | struct contains; |
31 | |
32 | template <class T> |
33 | struct contains<T> : std::false_type |
34 | { |
35 | }; |
36 | |
37 | template <class T, class Ty, class... Tys> |
38 | struct contains<T, Ty, Tys...> |
39 | : std::conditional<std::is_same<Ty, T>::value, std::true_type, |
40 | contains<T, Tys...>>::type |
41 | { |
42 | }; |
43 | |
44 | template <class... Archs> |
45 | struct is_sorted; |
46 | |
47 | template <> |
48 | struct is_sorted<> : std::true_type |
49 | { |
50 | }; |
51 | |
52 | template <class Arch> |
53 | struct is_sorted<Arch> : std::true_type |
54 | { |
55 | }; |
56 | |
57 | template <class A0, class A1, class... Archs> |
58 | struct is_sorted<A0, A1, Archs...> |
59 | : std::conditional<(A0::version() >= A1::version()), is_sorted<Archs...>, |
60 | std::false_type>::type |
61 | { |
62 | }; |
63 | |
64 | template <typename T> |
65 | inline constexpr T max_of(T value) noexcept |
66 | { |
67 | return value; |
68 | } |
69 | |
70 | template <typename T, typename... Ts> |
71 | inline constexpr T max_of(T head0, T head1, Ts... tail) noexcept |
72 | { |
73 | return max_of((head0 > head1 ? head0 : head1), tail...); |
74 | } |
75 | |
76 | } // namespace detail |
77 | |
78 | // An arch_list is a list of architectures, sorted by version number. |
79 | template <class... Archs> |
80 | struct arch_list |
81 | { |
82 | #ifndef NDEBUG |
83 | static_assert(detail::is_sorted<Archs...>::value, |
84 | "architecture list must be sorted by version" ); |
85 | #endif |
86 | |
87 | template <class Arch> |
88 | using add = arch_list<Archs..., Arch>; |
89 | |
90 | template <class... OtherArchs> |
91 | using extend = arch_list<Archs..., OtherArchs...>; |
92 | |
93 | template <class Arch> |
94 | static constexpr bool contains() noexcept |
95 | { |
96 | return detail::contains<Arch, Archs...>::value; |
97 | } |
98 | |
99 | template <class F> |
100 | static void for_each(F&& f) noexcept |
101 | { |
102 | (void)std::initializer_list<bool> { (f(Archs {}), true)... }; |
103 | } |
104 | |
105 | static constexpr std::size_t alignment() noexcept |
106 | { |
107 | // all alignments are a power of two |
108 | return detail::max_of(Archs::alignment()..., static_cast<size_t>(0)); |
109 | } |
110 | }; |
111 | |
112 | struct unavailable |
113 | { |
114 | static constexpr bool supported() noexcept { return false; } |
115 | static constexpr bool available() noexcept { return false; } |
116 | static constexpr unsigned version() noexcept { return 0; } |
117 | static constexpr std::size_t alignment() noexcept { return 0; } |
118 | static constexpr bool requires_alignment() noexcept { return false; } |
119 | static constexpr char const* name() noexcept { return "<none>" ; } |
120 | }; |
121 | |
122 | namespace detail |
123 | { |
124 | // Pick the best architecture in arch_list L, which is the last |
125 | // because architectures are sorted by version. |
126 | template <class L> |
127 | struct best; |
128 | |
129 | template <> |
130 | struct best<arch_list<>> |
131 | { |
132 | using type = unavailable; |
133 | }; |
134 | |
135 | template <class Arch, class... Archs> |
136 | struct best<arch_list<Arch, Archs...>> |
137 | { |
138 | using type = Arch; |
139 | }; |
140 | |
141 | // Filter archlists Archs, picking only supported archs and adding |
142 | // them to L. |
143 | template <class L, class... Archs> |
144 | struct supported_helper; |
145 | |
146 | template <class L> |
147 | struct supported_helper<L, arch_list<>> |
148 | { |
149 | using type = L; |
150 | }; |
151 | |
152 | template <class L, class Arch, class... Archs> |
153 | struct supported_helper<L, arch_list<Arch, Archs...>> |
154 | : supported_helper< |
155 | typename std::conditional<Arch::supported(), |
156 | typename L::template add<Arch>, L>::type, |
157 | arch_list<Archs...>> |
158 | { |
159 | }; |
160 | |
161 | template <class... Archs> |
162 | struct supported : supported_helper<arch_list<>, Archs...> |
163 | { |
164 | }; |
165 | |
166 | // Joins all arch_list Archs in a single arch_list. |
167 | template <class... Archs> |
168 | struct join; |
169 | |
170 | template <class Arch> |
171 | struct join<Arch> |
172 | { |
173 | using type = Arch; |
174 | }; |
175 | |
176 | template <class Arch, class... Archs, class... Args> |
177 | struct join<Arch, arch_list<Archs...>, Args...> |
178 | : join<typename Arch::template extend<Archs...>, Args...> |
179 | { |
180 | }; |
181 | } // namespace detail |
182 | |
183 | struct unsupported |
184 | { |
185 | }; |
186 | using all_x86_architectures = arch_list<avx512bw, avx512dq, avx512cd, avx512f, fma3<avx2>, avx2, fma3<avx>, avx, fma4, fma3<sse4_2>, sse4_2, sse4_1, /*sse4a,*/ ssse3, sse3, sse2>; |
187 | using all_sve_architectures = arch_list<detail::sve<512>, detail::sve<256>, detail::sve<128>>; |
188 | using all_arm_architectures = typename detail::join<all_sve_architectures, arch_list<neon64, neon>>::type; |
189 | using all_architectures = typename detail::join<all_arm_architectures, all_x86_architectures>::type; |
190 | |
191 | using supported_architectures = typename detail::supported<all_architectures>::type; |
192 | |
193 | using x86_arch = typename detail::best<typename detail::supported<all_x86_architectures>::type>::type; |
194 | using arm_arch = typename detail::best<typename detail::supported<all_arm_architectures>::type>::type; |
195 | // using default_arch = typename detail::best<typename detail::supported<arch_list</*arm_arch,*/ x86_arch>>::type>::type; |
196 | using default_arch = typename std::conditional<std::is_same<x86_arch, unavailable>::value, |
197 | arm_arch, |
198 | x86_arch>::type; |
199 | |
200 | namespace detail |
201 | { |
202 | template <class F, class ArchList> |
203 | class dispatcher |
204 | { |
205 | |
206 | const unsigned best_arch; |
207 | F functor; |
208 | |
209 | template <class Arch, class... Tys> |
210 | auto walk_archs(arch_list<Arch>, Tys&&... args) noexcept -> decltype(functor(Arch {}, std::forward<Tys>(args)...)) |
211 | { |
212 | assert(Arch::available() && "At least one arch must be supported during dispatch" ); |
213 | return functor(Arch {}, std::forward<Tys>(args)...); |
214 | } |
215 | |
216 | template <class Arch, class ArchNext, class... Archs, class... Tys> |
217 | auto walk_archs(arch_list<Arch, ArchNext, Archs...>, Tys&&... args) noexcept -> decltype(functor(Arch {}, std::forward<Tys>(args)...)) |
218 | { |
219 | if (Arch::version() <= best_arch) |
220 | return functor(Arch {}, std::forward<Tys>(args)...); |
221 | else |
222 | return walk_archs(arch_list<ArchNext, Archs...> {}, std::forward<Tys>(args)...); |
223 | } |
224 | |
225 | public: |
226 | dispatcher(F f) noexcept |
227 | : best_arch(available_architectures().best) |
228 | , functor(f) |
229 | { |
230 | } |
231 | |
232 | template <class... Tys> |
233 | auto operator()(Tys&&... args) noexcept -> decltype(functor(default_arch {}, std::forward<Tys>(args)...)) |
234 | { |
235 | return walk_archs(ArchList {}, std::forward<Tys>(args)...); |
236 | } |
237 | }; |
238 | } |
239 | |
240 | // Generic function dispatch, à la ifunc |
241 | template <class ArchList = supported_architectures, class F> |
242 | inline detail::dispatcher<F, ArchList> dispatch(F&& f) noexcept |
243 | { |
244 | return { std::forward<F>(f) }; |
245 | } |
246 | |
247 | } // namespace xsimd |
248 | |
249 | #endif |
250 | |