1/***************************************************************************
2 * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3 * Martin Renou *
4 * Copyright (c) QuantStack *
5 * Copyright (c) Serge Guelton *
6 * *
7 * Distributed under the terms of the BSD 3-Clause License. *
8 * *
9 * The full license is in the file LICENSE, distributed with this software. *
10 ****************************************************************************/
11
12#ifndef XSIMD_ARCH_HPP
13#define XSIMD_ARCH_HPP
14
15#include <initializer_list>
16#include <type_traits>
17#include <utility>
18
19#include "../types/xsimd_all_registers.hpp"
20#include "./xsimd_config.hpp"
21#include "./xsimd_cpuid.hpp"
22
23namespace xsimd
24{
25
26 namespace detail
27 {
28 // Checks whether T appears in Tys.
29 template <class T, class... Tys>
30 struct contains;
31
32 template <class T>
33 struct contains<T> : std::false_type
34 {
35 };
36
37 template <class T, class Ty, class... Tys>
38 struct contains<T, Ty, Tys...>
39 : std::conditional<std::is_same<Ty, T>::value, std::true_type,
40 contains<T, Tys...>>::type
41 {
42 };
43
44 template <class... Archs>
45 struct is_sorted;
46
47 template <>
48 struct is_sorted<> : std::true_type
49 {
50 };
51
52 template <class Arch>
53 struct is_sorted<Arch> : std::true_type
54 {
55 };
56
57 template <class A0, class A1, class... Archs>
58 struct is_sorted<A0, A1, Archs...>
59 : std::conditional<(A0::version() >= A1::version()), is_sorted<Archs...>,
60 std::false_type>::type
61 {
62 };
63
64 template <typename T>
65 inline constexpr T max_of(T value) noexcept
66 {
67 return value;
68 }
69
70 template <typename T, typename... Ts>
71 inline constexpr T max_of(T head0, T head1, Ts... tail) noexcept
72 {
73 return max_of((head0 > head1 ? head0 : head1), tail...);
74 }
75
76 } // namespace detail
77
78 // An arch_list is a list of architectures, sorted by version number.
79 template <class... Archs>
80 struct arch_list
81 {
82#ifndef NDEBUG
83 static_assert(detail::is_sorted<Archs...>::value,
84 "architecture list must be sorted by version");
85#endif
86
87 template <class Arch>
88 using add = arch_list<Archs..., Arch>;
89
90 template <class... OtherArchs>
91 using extend = arch_list<Archs..., OtherArchs...>;
92
93 template <class Arch>
94 static constexpr bool contains() noexcept
95 {
96 return detail::contains<Arch, Archs...>::value;
97 }
98
99 template <class F>
100 static void for_each(F&& f) noexcept
101 {
102 (void)std::initializer_list<bool> { (f(Archs {}), true)... };
103 }
104
105 static constexpr std::size_t alignment() noexcept
106 {
107 // all alignments are a power of two
108 return detail::max_of(Archs::alignment()..., static_cast<size_t>(0));
109 }
110 };
111
112 struct unavailable
113 {
114 static constexpr bool supported() noexcept { return false; }
115 static constexpr bool available() noexcept { return false; }
116 static constexpr unsigned version() noexcept { return 0; }
117 static constexpr std::size_t alignment() noexcept { return 0; }
118 static constexpr bool requires_alignment() noexcept { return false; }
119 static constexpr char const* name() noexcept { return "<none>"; }
120 };
121
122 namespace detail
123 {
124 // Pick the best architecture in arch_list L, which is the last
125 // because architectures are sorted by version.
126 template <class L>
127 struct best;
128
129 template <>
130 struct best<arch_list<>>
131 {
132 using type = unavailable;
133 };
134
135 template <class Arch, class... Archs>
136 struct best<arch_list<Arch, Archs...>>
137 {
138 using type = Arch;
139 };
140
141 // Filter archlists Archs, picking only supported archs and adding
142 // them to L.
143 template <class L, class... Archs>
144 struct supported_helper;
145
146 template <class L>
147 struct supported_helper<L, arch_list<>>
148 {
149 using type = L;
150 };
151
152 template <class L, class Arch, class... Archs>
153 struct supported_helper<L, arch_list<Arch, Archs...>>
154 : supported_helper<
155 typename std::conditional<Arch::supported(),
156 typename L::template add<Arch>, L>::type,
157 arch_list<Archs...>>
158 {
159 };
160
161 template <class... Archs>
162 struct supported : supported_helper<arch_list<>, Archs...>
163 {
164 };
165
166 // Joins all arch_list Archs in a single arch_list.
167 template <class... Archs>
168 struct join;
169
170 template <class Arch>
171 struct join<Arch>
172 {
173 using type = Arch;
174 };
175
176 template <class Arch, class... Archs, class... Args>
177 struct join<Arch, arch_list<Archs...>, Args...>
178 : join<typename Arch::template extend<Archs...>, Args...>
179 {
180 };
181 } // namespace detail
182
183 struct unsupported
184 {
185 };
186 using all_x86_architectures = arch_list<avx512bw, avx512dq, avx512cd, avx512f, fma3<avx2>, avx2, fma3<avx>, avx, fma4, fma3<sse4_2>, sse4_2, sse4_1, /*sse4a,*/ ssse3, sse3, sse2>;
187 using all_sve_architectures = arch_list<detail::sve<512>, detail::sve<256>, detail::sve<128>>;
188 using all_arm_architectures = typename detail::join<all_sve_architectures, arch_list<neon64, neon>>::type;
189 using all_architectures = typename detail::join<all_arm_architectures, all_x86_architectures>::type;
190
191 using supported_architectures = typename detail::supported<all_architectures>::type;
192
193 using x86_arch = typename detail::best<typename detail::supported<all_x86_architectures>::type>::type;
194 using arm_arch = typename detail::best<typename detail::supported<all_arm_architectures>::type>::type;
195 // using default_arch = typename detail::best<typename detail::supported<arch_list</*arm_arch,*/ x86_arch>>::type>::type;
196 using default_arch = typename std::conditional<std::is_same<x86_arch, unavailable>::value,
197 arm_arch,
198 x86_arch>::type;
199
200 namespace detail
201 {
202 template <class F, class ArchList>
203 class dispatcher
204 {
205
206 const unsigned best_arch;
207 F functor;
208
209 template <class Arch, class... Tys>
210 auto walk_archs(arch_list<Arch>, Tys&&... args) noexcept -> decltype(functor(Arch {}, std::forward<Tys>(args)...))
211 {
212 assert(Arch::available() && "At least one arch must be supported during dispatch");
213 return functor(Arch {}, std::forward<Tys>(args)...);
214 }
215
216 template <class Arch, class ArchNext, class... Archs, class... Tys>
217 auto walk_archs(arch_list<Arch, ArchNext, Archs...>, Tys&&... args) noexcept -> decltype(functor(Arch {}, std::forward<Tys>(args)...))
218 {
219 if (Arch::version() <= best_arch)
220 return functor(Arch {}, std::forward<Tys>(args)...);
221 else
222 return walk_archs(arch_list<ArchNext, Archs...> {}, std::forward<Tys>(args)...);
223 }
224
225 public:
226 dispatcher(F f) noexcept
227 : best_arch(available_architectures().best)
228 , functor(f)
229 {
230 }
231
232 template <class... Tys>
233 auto operator()(Tys&&... args) noexcept -> decltype(functor(default_arch {}, std::forward<Tys>(args)...))
234 {
235 return walk_archs(ArchList {}, std::forward<Tys>(args)...);
236 }
237 };
238 }
239
240 // Generic function dispatch, à la ifunc
241 template <class ArchList = supported_architectures, class F>
242 inline detail::dispatcher<F, ArchList> dispatch(F&& f) noexcept
243 {
244 return { std::forward<F>(f) };
245 }
246
247} // namespace xsimd
248
249#endif
250