1#pragma once
2
3#include <Core/Types.h>
4
5#if defined(__x86_64__) || defined(__i386__)
6#include <cpuid.h>
7#endif
8
9#include <cstring>
10
11
12namespace DB
13{
14namespace Cpu
15{
16
17#if (defined(__x86_64__) || defined(__i386__))
18/// Our version is independent of -mxsave option, because we do dynamic dispatch.
19inline UInt64 our_xgetbv(UInt32 xcr) noexcept
20{
21 UInt32 eax;
22 UInt32 edx;
23 __asm__ volatile(
24 "xgetbv"
25 : "=a"(eax), "=d"(edx)
26 : "c"(xcr));
27 return (static_cast<UInt64>(edx) << 32) | eax;
28}
29#endif
30
31inline bool cpuid(UInt32 op, UInt32 sub_op, UInt32 * res) noexcept
32{
33#if defined(__x86_64__) || defined(__i386__)
34 __cpuid_count(op, sub_op, res[0], res[1], res[2], res[3]);
35 return true;
36#else
37 (void)op;
38 (void)sub_op;
39
40 memset(res, 0, 4 * sizeof(*res));
41
42 return false;
43#endif
44}
45
46inline bool cpuid(UInt32 op, UInt32 * res) noexcept
47{
48#if defined(__x86_64__) || defined(__i386__)
49 __cpuid(op, res[0], res[1], res[2], res[3]);
50 return true;
51#else
52 (void)op;
53
54 memset(res, 0, 4 * sizeof(*res));
55
56 return false;
57#endif
58}
59
60#define CPU_ID_ENUMERATE(OP) \
61 OP(SSE) \
62 OP(SSE2) \
63 OP(SSE3) \
64 OP(SSSE3) \
65 OP(SSE41) \
66 OP(SSE42) \
67 OP(F16C) \
68 OP(POPCNT) \
69 OP(BMI1) \
70 OP(BMI2) \
71 OP(PCLMUL) \
72 OP(AES) \
73 OP(AVX) \
74 OP(FMA) \
75 OP(AVX2) \
76 OP(AVX512F) \
77 OP(AVX512DQ) \
78 OP(AVX512IFMA) \
79 OP(AVX512PF) \
80 OP(AVX512ER) \
81 OP(AVX512CD) \
82 OP(AVX512BW) \
83 OP(AVX512VL) \
84 OP(AVX512VBMI) \
85 OP(PREFETCHWT1) \
86 OP(SHA) \
87 OP(ADX) \
88 OP(RDRAND) \
89 OP(RDSEED) \
90 OP(PCOMMIT) \
91 OP(RDTSCP) \
92 OP(CLFLUSHOPT) \
93 OP(CLWB) \
94 OP(XSAVE) \
95 OP(OSXSAVE)
96
97union CpuInfo
98{
99 UInt32 info[4];
100
101 struct
102 {
103 UInt32 eax;
104 UInt32 ebx;
105 UInt32 ecx;
106 UInt32 edx;
107 };
108
109 inline CpuInfo(UInt32 op) noexcept { cpuid(op, info); }
110
111 inline CpuInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); }
112};
113
114#define DEF_NAME(X) inline bool have##X() noexcept;
115 CPU_ID_ENUMERATE(DEF_NAME)
116#undef DEF_NAME
117
118bool haveRDTSCP() noexcept
119{
120 return (CpuInfo(0x80000001).edx >> 27) & 1u;
121}
122
123bool haveSSE() noexcept
124{
125 return (CpuInfo(0x1).edx >> 25) & 1u;
126}
127
128bool haveSSE2() noexcept
129{
130 return (CpuInfo(0x1).edx >> 26) & 1u;
131}
132
133bool haveSSE3() noexcept
134{
135 return CpuInfo(0x1).ecx & 1u;
136}
137
138bool havePCLMUL() noexcept
139{
140 return (CpuInfo(0x1).ecx >> 1) & 1u;
141}
142
143bool haveSSSE3() noexcept
144{
145 return (CpuInfo(0x1).ecx >> 9) & 1u;
146}
147
148bool haveSSE41() noexcept
149{
150 return (CpuInfo(0x1).ecx >> 19) & 1u;
151}
152
153bool haveSSE42() noexcept
154{
155 return (CpuInfo(0x1).ecx >> 20) & 1u;
156}
157
158bool haveF16C() noexcept
159{
160 return (CpuInfo(0x1).ecx >> 29) & 1u;
161}
162
163bool havePOPCNT() noexcept
164{
165 return (CpuInfo(0x1).ecx >> 23) & 1u;
166}
167
168bool haveAES() noexcept
169{
170 return (CpuInfo(0x1).ecx >> 25) & 1u;
171}
172
173bool haveXSAVE() noexcept
174{
175 return (CpuInfo(0x1).ecx >> 26) & 1u;
176}
177
178bool haveOSXSAVE() noexcept
179{
180 return (CpuInfo(0x1).ecx >> 27) & 1u;
181}
182
183bool haveAVX() noexcept
184{
185#if defined(__x86_64__) || defined(__i386__)
186 // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
187 // https://bugs.chromium.org/p/chromium/issues/detail?id=375968
188 return haveOSXSAVE() // implies haveXSAVE()
189 && (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS
190 && ((CpuInfo(0x1).ecx >> 28) & 1u); // AVX bit
191#else
192 return false;
193#endif
194}
195
196bool haveFMA() noexcept
197{
198 return haveAVX() && ((CpuInfo(0x1).ecx >> 12) & 1u);
199}
200
201bool haveAVX2() noexcept
202{
203 return haveAVX() && ((CpuInfo(0x7, 0).ebx >> 5) & 1u);
204}
205
206bool haveBMI1() noexcept
207{
208 return (CpuInfo(0x7, 0).ebx >> 3) & 1u;
209}
210
211bool haveBMI2() noexcept
212{
213 return (CpuInfo(0x7, 0).ebx >> 8) & 1u;
214}
215
216bool haveAVX512F() noexcept
217{
218#if defined(__x86_64__) || defined(__i386__)
219 // https://software.intel.com/en-us/articles/how-to-detect-knl-instruction-support
220 return haveOSXSAVE() // implies haveXSAVE()
221 && (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS
222 && ((our_xgetbv(0) >> 5) & 7u) == 7u // ZMM state is enabled by OS
223 && CpuInfo(0x0).eax >= 0x7 // leaf 7 is present
224 && ((CpuInfo(0x7).ebx >> 16) & 1u); // AVX512F bit
225#else
226 return false;
227#endif
228}
229
230bool haveAVX512DQ() noexcept
231{
232 return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 17) & 1u);
233}
234
235bool haveRDSEED() noexcept
236{
237 return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ebx >> 18) & 1u);
238}
239
240bool haveADX() noexcept
241{
242 return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ebx >> 19) & 1u);
243}
244
245bool haveAVX512IFMA() noexcept
246{
247 return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 21) & 1u);
248}
249
250bool havePCOMMIT() noexcept
251{
252 return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ebx >> 22) & 1u);
253}
254
255bool haveCLFLUSHOPT() noexcept
256{
257 return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ebx >> 23) & 1u);
258}
259
260bool haveCLWB() noexcept
261{
262 return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ebx >> 24) & 1u);
263}
264
265bool haveAVX512PF() noexcept
266{
267 return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 26) & 1u);
268}
269
270bool haveAVX512ER() noexcept
271{
272 return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 27) & 1u);
273}
274
275bool haveAVX512CD() noexcept
276{
277 return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 28) & 1u);
278}
279
280bool haveSHA() noexcept
281{
282 return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ebx >> 29) & 1u);
283}
284
285bool haveAVX512BW() noexcept
286{
287 return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 30) & 1u);
288}
289
290bool haveAVX512VL() noexcept
291{
292 return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 31) & 1u);
293}
294
295bool havePREFETCHWT1() noexcept
296{
297 return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ecx >> 0) & 1u);
298}
299
300bool haveAVX512VBMI() noexcept
301{
302 return haveAVX512F() && ((CpuInfo(0x7, 0).ecx >> 1) & 1u);
303}
304
305bool haveRDRAND() noexcept
306{
307 return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x1).ecx >> 30) & 1u);
308}
309
310struct CpuFlagsCache
311{
312#define DEF_NAME(X) static inline bool have_##X = have##X();
313 CPU_ID_ENUMERATE(DEF_NAME)
314#undef DEF_NAME
315};
316
317}
318}
319
320