1 | #pragma once |
2 | |
3 | #include <Core/Types.h> |
4 | |
5 | #if defined(__x86_64__) || defined(__i386__) |
6 | #include <cpuid.h> |
7 | #endif |
8 | |
9 | #include <cstring> |
10 | |
11 | |
12 | namespace DB |
13 | { |
14 | namespace Cpu |
15 | { |
16 | |
17 | #if (defined(__x86_64__) || defined(__i386__)) |
18 | /// Our version is independent of -mxsave option, because we do dynamic dispatch. |
19 | inline UInt64 our_xgetbv(UInt32 xcr) noexcept |
20 | { |
21 | UInt32 eax; |
22 | UInt32 edx; |
23 | __asm__ volatile( |
24 | "xgetbv" |
25 | : "=a" (eax), "=d" (edx) |
26 | : "c" (xcr)); |
27 | return (static_cast<UInt64>(edx) << 32) | eax; |
28 | } |
29 | #endif |
30 | |
31 | inline bool cpuid(UInt32 op, UInt32 sub_op, UInt32 * res) noexcept |
32 | { |
33 | #if defined(__x86_64__) || defined(__i386__) |
34 | __cpuid_count(op, sub_op, res[0], res[1], res[2], res[3]); |
35 | return true; |
36 | #else |
37 | (void)op; |
38 | (void)sub_op; |
39 | |
40 | memset(res, 0, 4 * sizeof(*res)); |
41 | |
42 | return false; |
43 | #endif |
44 | } |
45 | |
46 | inline bool cpuid(UInt32 op, UInt32 * res) noexcept |
47 | { |
48 | #if defined(__x86_64__) || defined(__i386__) |
49 | __cpuid(op, res[0], res[1], res[2], res[3]); |
50 | return true; |
51 | #else |
52 | (void)op; |
53 | |
54 | memset(res, 0, 4 * sizeof(*res)); |
55 | |
56 | return false; |
57 | #endif |
58 | } |
59 | |
60 | #define CPU_ID_ENUMERATE(OP) \ |
61 | OP(SSE) \ |
62 | OP(SSE2) \ |
63 | OP(SSE3) \ |
64 | OP(SSSE3) \ |
65 | OP(SSE41) \ |
66 | OP(SSE42) \ |
67 | OP(F16C) \ |
68 | OP(POPCNT) \ |
69 | OP(BMI1) \ |
70 | OP(BMI2) \ |
71 | OP(PCLMUL) \ |
72 | OP(AES) \ |
73 | OP(AVX) \ |
74 | OP(FMA) \ |
75 | OP(AVX2) \ |
76 | OP(AVX512F) \ |
77 | OP(AVX512DQ) \ |
78 | OP(AVX512IFMA) \ |
79 | OP(AVX512PF) \ |
80 | OP(AVX512ER) \ |
81 | OP(AVX512CD) \ |
82 | OP(AVX512BW) \ |
83 | OP(AVX512VL) \ |
84 | OP(AVX512VBMI) \ |
85 | OP(PREFETCHWT1) \ |
86 | OP(SHA) \ |
87 | OP(ADX) \ |
88 | OP(RDRAND) \ |
89 | OP(RDSEED) \ |
90 | OP(PCOMMIT) \ |
91 | OP(RDTSCP) \ |
92 | OP(CLFLUSHOPT) \ |
93 | OP(CLWB) \ |
94 | OP(XSAVE) \ |
95 | OP(OSXSAVE) |
96 | |
97 | union CpuInfo |
98 | { |
99 | UInt32 info[4]; |
100 | |
101 | struct |
102 | { |
103 | UInt32 eax; |
104 | UInt32 ebx; |
105 | UInt32 ecx; |
106 | UInt32 edx; |
107 | }; |
108 | |
109 | inline CpuInfo(UInt32 op) noexcept { cpuid(op, info); } |
110 | |
111 | inline CpuInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); } |
112 | }; |
113 | |
114 | #define DEF_NAME(X) inline bool have##X() noexcept; |
115 | CPU_ID_ENUMERATE(DEF_NAME) |
116 | #undef DEF_NAME |
117 | |
118 | bool haveRDTSCP() noexcept |
119 | { |
120 | return (CpuInfo(0x80000001).edx >> 27) & 1u; |
121 | } |
122 | |
123 | bool haveSSE() noexcept |
124 | { |
125 | return (CpuInfo(0x1).edx >> 25) & 1u; |
126 | } |
127 | |
128 | bool haveSSE2() noexcept |
129 | { |
130 | return (CpuInfo(0x1).edx >> 26) & 1u; |
131 | } |
132 | |
133 | bool haveSSE3() noexcept |
134 | { |
135 | return CpuInfo(0x1).ecx & 1u; |
136 | } |
137 | |
138 | bool havePCLMUL() noexcept |
139 | { |
140 | return (CpuInfo(0x1).ecx >> 1) & 1u; |
141 | } |
142 | |
143 | bool haveSSSE3() noexcept |
144 | { |
145 | return (CpuInfo(0x1).ecx >> 9) & 1u; |
146 | } |
147 | |
148 | bool haveSSE41() noexcept |
149 | { |
150 | return (CpuInfo(0x1).ecx >> 19) & 1u; |
151 | } |
152 | |
153 | bool haveSSE42() noexcept |
154 | { |
155 | return (CpuInfo(0x1).ecx >> 20) & 1u; |
156 | } |
157 | |
158 | bool haveF16C() noexcept |
159 | { |
160 | return (CpuInfo(0x1).ecx >> 29) & 1u; |
161 | } |
162 | |
163 | bool havePOPCNT() noexcept |
164 | { |
165 | return (CpuInfo(0x1).ecx >> 23) & 1u; |
166 | } |
167 | |
168 | bool haveAES() noexcept |
169 | { |
170 | return (CpuInfo(0x1).ecx >> 25) & 1u; |
171 | } |
172 | |
173 | bool haveXSAVE() noexcept |
174 | { |
175 | return (CpuInfo(0x1).ecx >> 26) & 1u; |
176 | } |
177 | |
178 | bool haveOSXSAVE() noexcept |
179 | { |
180 | return (CpuInfo(0x1).ecx >> 27) & 1u; |
181 | } |
182 | |
183 | bool haveAVX() noexcept |
184 | { |
185 | #if defined(__x86_64__) || defined(__i386__) |
186 | // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf |
187 | // https://bugs.chromium.org/p/chromium/issues/detail?id=375968 |
188 | return haveOSXSAVE() // implies haveXSAVE() |
189 | && (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS |
190 | && ((CpuInfo(0x1).ecx >> 28) & 1u); // AVX bit |
191 | #else |
192 | return false; |
193 | #endif |
194 | } |
195 | |
196 | bool haveFMA() noexcept |
197 | { |
198 | return haveAVX() && ((CpuInfo(0x1).ecx >> 12) & 1u); |
199 | } |
200 | |
201 | bool haveAVX2() noexcept |
202 | { |
203 | return haveAVX() && ((CpuInfo(0x7, 0).ebx >> 5) & 1u); |
204 | } |
205 | |
206 | bool haveBMI1() noexcept |
207 | { |
208 | return (CpuInfo(0x7, 0).ebx >> 3) & 1u; |
209 | } |
210 | |
211 | bool haveBMI2() noexcept |
212 | { |
213 | return (CpuInfo(0x7, 0).ebx >> 8) & 1u; |
214 | } |
215 | |
216 | bool haveAVX512F() noexcept |
217 | { |
218 | #if defined(__x86_64__) || defined(__i386__) |
219 | // https://software.intel.com/en-us/articles/how-to-detect-knl-instruction-support |
220 | return haveOSXSAVE() // implies haveXSAVE() |
221 | && (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS |
222 | && ((our_xgetbv(0) >> 5) & 7u) == 7u // ZMM state is enabled by OS |
223 | && CpuInfo(0x0).eax >= 0x7 // leaf 7 is present |
224 | && ((CpuInfo(0x7).ebx >> 16) & 1u); // AVX512F bit |
225 | #else |
226 | return false; |
227 | #endif |
228 | } |
229 | |
230 | bool haveAVX512DQ() noexcept |
231 | { |
232 | return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 17) & 1u); |
233 | } |
234 | |
235 | bool haveRDSEED() noexcept |
236 | { |
237 | return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ebx >> 18) & 1u); |
238 | } |
239 | |
240 | bool haveADX() noexcept |
241 | { |
242 | return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ebx >> 19) & 1u); |
243 | } |
244 | |
245 | bool haveAVX512IFMA() noexcept |
246 | { |
247 | return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 21) & 1u); |
248 | } |
249 | |
250 | bool havePCOMMIT() noexcept |
251 | { |
252 | return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ebx >> 22) & 1u); |
253 | } |
254 | |
255 | bool haveCLFLUSHOPT() noexcept |
256 | { |
257 | return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ebx >> 23) & 1u); |
258 | } |
259 | |
260 | bool haveCLWB() noexcept |
261 | { |
262 | return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ebx >> 24) & 1u); |
263 | } |
264 | |
265 | bool haveAVX512PF() noexcept |
266 | { |
267 | return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 26) & 1u); |
268 | } |
269 | |
270 | bool haveAVX512ER() noexcept |
271 | { |
272 | return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 27) & 1u); |
273 | } |
274 | |
275 | bool haveAVX512CD() noexcept |
276 | { |
277 | return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 28) & 1u); |
278 | } |
279 | |
280 | bool haveSHA() noexcept |
281 | { |
282 | return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ebx >> 29) & 1u); |
283 | } |
284 | |
285 | bool haveAVX512BW() noexcept |
286 | { |
287 | return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 30) & 1u); |
288 | } |
289 | |
290 | bool haveAVX512VL() noexcept |
291 | { |
292 | return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 31) & 1u); |
293 | } |
294 | |
295 | bool havePREFETCHWT1() noexcept |
296 | { |
297 | return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ecx >> 0) & 1u); |
298 | } |
299 | |
300 | bool haveAVX512VBMI() noexcept |
301 | { |
302 | return haveAVX512F() && ((CpuInfo(0x7, 0).ecx >> 1) & 1u); |
303 | } |
304 | |
305 | bool haveRDRAND() noexcept |
306 | { |
307 | return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x1).ecx >> 30) & 1u); |
308 | } |
309 | |
310 | struct CpuFlagsCache |
311 | { |
312 | #define DEF_NAME(X) static inline bool have_##X = have##X(); |
313 | CPU_ID_ENUMERATE(DEF_NAME) |
314 | #undef DEF_NAME |
315 | }; |
316 | |
317 | } |
318 | } |
319 | |
320 | |