1 | /*************************************************************************** |
2 | * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * |
3 | * Martin Renou * |
4 | * Copyright (c) QuantStack * |
5 | * Copyright (c) Serge Guelton * |
6 | * * |
7 | * Distributed under the terms of the BSD 3-Clause License. * |
8 | * * |
9 | * The full license is in the file LICENSE, distributed with this software. * |
10 | ****************************************************************************/ |
11 | |
12 | #ifndef XSIMD_CPUID_HPP |
13 | #define XSIMD_CPUID_HPP |
14 | |
15 | #include <algorithm> |
16 | #include <cstring> |
17 | |
18 | #if defined(__linux__) && (defined(__ARM_NEON) || defined(_M_ARM)) |
19 | #include <asm/hwcap.h> |
20 | #include <sys/auxv.h> |
21 | #endif |
22 | |
23 | #if defined(_MSC_VER) |
24 | // Contains the definition of __cpuidex |
25 | #include <intrin.h> |
26 | #endif |
27 | |
28 | #include "../types/xsimd_all_registers.hpp" |
29 | |
30 | namespace xsimd |
31 | { |
32 | namespace detail |
33 | { |
34 | struct supported_arch |
35 | { |
36 | unsigned sse2 : 1; |
37 | unsigned sse3 : 1; |
38 | unsigned ssse3 : 1; |
39 | unsigned sse4_1 : 1; |
40 | unsigned sse4_2 : 1; |
41 | unsigned sse4a : 1; |
42 | unsigned fma3_sse : 1; |
43 | unsigned fma4 : 1; |
44 | unsigned xop : 1; |
45 | unsigned avx : 1; |
46 | unsigned fma3_avx : 1; |
47 | unsigned avx2 : 1; |
48 | unsigned fma3_avx2 : 1; |
49 | unsigned avx512f : 1; |
50 | unsigned avx512cd : 1; |
51 | unsigned avx512dq : 1; |
52 | unsigned avx512bw : 1; |
53 | unsigned neon : 1; |
54 | unsigned neon64 : 1; |
55 | |
56 | // version number of the best arch available |
57 | unsigned best; |
58 | |
59 | supported_arch() noexcept |
60 | { |
61 | memset(s: this, c: 0, n: sizeof(supported_arch)); |
62 | |
63 | #if defined(__aarch64__) || defined(_M_ARM64) |
64 | neon = 1; |
65 | neon64 = 1; |
66 | best = neon64::version(); |
67 | #elif defined(__ARM_NEON) || defined(_M_ARM) |
68 | #if defined(__linux__) |
69 | neon = bool(getauxval(AT_HWCAP) & HWCAP_NEON); |
70 | #else |
71 | // that's very conservative :-/ |
72 | neon = 0; |
73 | #endif |
74 | neon64 = 0; |
75 | best = neon::version() * neon; |
76 | |
77 | #elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86) |
78 | auto get_cpuid = [](int reg[4], int func_id) noexcept |
79 | { |
80 | |
81 | #if defined(_MSC_VER) |
82 | __cpuidex(reg, func_id, 0); |
83 | |
84 | #elif defined(__INTEL_COMPILER) |
85 | __cpuid(reg, func_id); |
86 | |
87 | #elif defined(__GNUC__) || defined(__clang__) |
88 | |
89 | #if defined(__i386__) && defined(__PIC__) |
90 | // %ebx may be the PIC register |
91 | __asm__("xchg{l}\t{%%}ebx, %1\n\t" |
92 | "cpuid\n\t" |
93 | "xchg{l}\t{%%}ebx, %1\n\t" |
94 | : "=a" (reg[0]), "=r" (reg[1]), "=c" (reg[2]), |
95 | "=d" (reg[3]) |
96 | : "a" (func_id), "c" (0)); |
97 | |
98 | #else |
99 | __asm__("cpuid\n\t" |
100 | : "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), |
101 | "=d" (reg[3]) |
102 | : "a" (func_id), "c" (0)); |
103 | #endif |
104 | |
105 | #else |
106 | #error "Unsupported configuration" |
107 | #endif |
108 | }; |
109 | |
110 | int regs[4]; |
111 | |
112 | get_cpuid(regs, 0x1); |
113 | |
114 | sse2 = regs[3] >> 26 & 1; |
115 | best = std::max(best, sse2::version() * sse2); |
116 | |
117 | sse3 = regs[2] >> 0 & 1; |
118 | best = std::max(best, sse3::version() * sse3); |
119 | |
120 | ssse3 = regs[2] >> 9 & 1; |
121 | best = std::max(best, ssse3::version() * ssse3); |
122 | |
123 | sse4_1 = regs[2] >> 19 & 1; |
124 | best = std::max(best, sse4_1::version() * sse4_1); |
125 | |
126 | sse4_2 = regs[2] >> 20 & 1; |
127 | best = std::max(best, sse4_2::version() * sse4_2); |
128 | |
129 | fma3_sse = regs[2] >> 12 & 1; |
130 | if (sse4_2) |
131 | best = std::max(best, fma3<xsimd::sse4_2>::version() * fma3_sse); |
132 | |
133 | get_cpuid(regs, 0x80000001); |
134 | fma4 = regs[2] >> 16 & 1; |
135 | best = std::max(best, fma4::version() * fma4); |
136 | |
137 | // sse4a = regs[2] >> 6 & 1; |
138 | // best = std::max(best, XSIMD_X86_AMD_SSE4A_VERSION * sse4a); |
139 | |
140 | // xop = regs[2] >> 11 & 1; |
141 | // best = std::max(best, XSIMD_X86_AMD_XOP_VERSION * xop); |
142 | |
143 | avx = regs[2] >> 28 & 1; |
144 | best = std::max(best, avx::version() * avx); |
145 | |
146 | fma3_avx = avx && fma3_sse; |
147 | best = std::max(best, fma3<xsimd::avx>::version() * fma3_avx); |
148 | |
149 | get_cpuid(regs, 0x7); |
150 | avx2 = regs[1] >> 5 & 1; |
151 | best = std::max(best, avx2::version() * avx2); |
152 | |
153 | fma3_avx2 = avx2 && fma3_sse; |
154 | best = std::max(best, fma3<xsimd::avx2>::version() * fma3_avx2); |
155 | |
156 | avx512f = regs[1] >> 16 & 1; |
157 | best = std::max(best, avx512f::version() * avx512f); |
158 | |
159 | avx512cd = regs[1] >> 28 & 1; |
160 | best = std::max(best, avx512cd::version() * avx512cd * avx512f); |
161 | |
162 | avx512dq = regs[1] >> 17 & 1; |
163 | best = std::max(best, avx512dq::version() * avx512dq * avx512cd * avx512f); |
164 | |
165 | avx512bw = regs[1] >> 30 & 1; |
166 | best = std::max(best, avx512bw::version() * avx512bw * avx512dq * avx512cd * avx512f); |
167 | |
168 | #endif |
169 | } |
170 | }; |
171 | } |
172 | |
173 | inline detail::supported_arch available_architectures() noexcept |
174 | { |
175 | static detail::supported_arch supported; |
176 | return supported; |
177 | } |
178 | } |
179 | |
180 | #endif |
181 | |