| 1 | #pragma once |
| 2 | |
| 3 | #include <Core/Types.h> |
| 4 | |
| 5 | #if defined(__x86_64__) || defined(__i386__) |
| 6 | #include <cpuid.h> |
| 7 | #endif |
| 8 | |
| 9 | #include <cstring> |
| 10 | |
| 11 | |
| 12 | namespace DB |
| 13 | { |
| 14 | namespace Cpu |
| 15 | { |
| 16 | |
| 17 | #if (defined(__x86_64__) || defined(__i386__)) |
| 18 | /// Our version is independent of -mxsave option, because we do dynamic dispatch. |
| 19 | inline UInt64 our_xgetbv(UInt32 xcr) noexcept |
| 20 | { |
| 21 | UInt32 eax; |
| 22 | UInt32 edx; |
| 23 | __asm__ volatile( |
| 24 | "xgetbv" |
| 25 | : "=a" (eax), "=d" (edx) |
| 26 | : "c" (xcr)); |
| 27 | return (static_cast<UInt64>(edx) << 32) | eax; |
| 28 | } |
| 29 | #endif |
| 30 | |
| 31 | inline bool cpuid(UInt32 op, UInt32 sub_op, UInt32 * res) noexcept |
| 32 | { |
| 33 | #if defined(__x86_64__) || defined(__i386__) |
| 34 | __cpuid_count(op, sub_op, res[0], res[1], res[2], res[3]); |
| 35 | return true; |
| 36 | #else |
| 37 | (void)op; |
| 38 | (void)sub_op; |
| 39 | |
| 40 | memset(res, 0, 4 * sizeof(*res)); |
| 41 | |
| 42 | return false; |
| 43 | #endif |
| 44 | } |
| 45 | |
| 46 | inline bool cpuid(UInt32 op, UInt32 * res) noexcept |
| 47 | { |
| 48 | #if defined(__x86_64__) || defined(__i386__) |
| 49 | __cpuid(op, res[0], res[1], res[2], res[3]); |
| 50 | return true; |
| 51 | #else |
| 52 | (void)op; |
| 53 | |
| 54 | memset(res, 0, 4 * sizeof(*res)); |
| 55 | |
| 56 | return false; |
| 57 | #endif |
| 58 | } |
| 59 | |
| 60 | #define CPU_ID_ENUMERATE(OP) \ |
| 61 | OP(SSE) \ |
| 62 | OP(SSE2) \ |
| 63 | OP(SSE3) \ |
| 64 | OP(SSSE3) \ |
| 65 | OP(SSE41) \ |
| 66 | OP(SSE42) \ |
| 67 | OP(F16C) \ |
| 68 | OP(POPCNT) \ |
| 69 | OP(BMI1) \ |
| 70 | OP(BMI2) \ |
| 71 | OP(PCLMUL) \ |
| 72 | OP(AES) \ |
| 73 | OP(AVX) \ |
| 74 | OP(FMA) \ |
| 75 | OP(AVX2) \ |
| 76 | OP(AVX512F) \ |
| 77 | OP(AVX512DQ) \ |
| 78 | OP(AVX512IFMA) \ |
| 79 | OP(AVX512PF) \ |
| 80 | OP(AVX512ER) \ |
| 81 | OP(AVX512CD) \ |
| 82 | OP(AVX512BW) \ |
| 83 | OP(AVX512VL) \ |
| 84 | OP(AVX512VBMI) \ |
| 85 | OP(PREFETCHWT1) \ |
| 86 | OP(SHA) \ |
| 87 | OP(ADX) \ |
| 88 | OP(RDRAND) \ |
| 89 | OP(RDSEED) \ |
| 90 | OP(PCOMMIT) \ |
| 91 | OP(RDTSCP) \ |
| 92 | OP(CLFLUSHOPT) \ |
| 93 | OP(CLWB) \ |
| 94 | OP(XSAVE) \ |
| 95 | OP(OSXSAVE) |
| 96 | |
| 97 | union CpuInfo |
| 98 | { |
| 99 | UInt32 info[4]; |
| 100 | |
| 101 | struct |
| 102 | { |
| 103 | UInt32 eax; |
| 104 | UInt32 ebx; |
| 105 | UInt32 ecx; |
| 106 | UInt32 edx; |
| 107 | }; |
| 108 | |
| 109 | inline CpuInfo(UInt32 op) noexcept { cpuid(op, info); } |
| 110 | |
| 111 | inline CpuInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); } |
| 112 | }; |
| 113 | |
| 114 | #define DEF_NAME(X) inline bool have##X() noexcept; |
| 115 | CPU_ID_ENUMERATE(DEF_NAME) |
| 116 | #undef DEF_NAME |
| 117 | |
| 118 | bool haveRDTSCP() noexcept |
| 119 | { |
| 120 | return (CpuInfo(0x80000001).edx >> 27) & 1u; |
| 121 | } |
| 122 | |
| 123 | bool haveSSE() noexcept |
| 124 | { |
| 125 | return (CpuInfo(0x1).edx >> 25) & 1u; |
| 126 | } |
| 127 | |
| 128 | bool haveSSE2() noexcept |
| 129 | { |
| 130 | return (CpuInfo(0x1).edx >> 26) & 1u; |
| 131 | } |
| 132 | |
| 133 | bool haveSSE3() noexcept |
| 134 | { |
| 135 | return CpuInfo(0x1).ecx & 1u; |
| 136 | } |
| 137 | |
| 138 | bool havePCLMUL() noexcept |
| 139 | { |
| 140 | return (CpuInfo(0x1).ecx >> 1) & 1u; |
| 141 | } |
| 142 | |
| 143 | bool haveSSSE3() noexcept |
| 144 | { |
| 145 | return (CpuInfo(0x1).ecx >> 9) & 1u; |
| 146 | } |
| 147 | |
| 148 | bool haveSSE41() noexcept |
| 149 | { |
| 150 | return (CpuInfo(0x1).ecx >> 19) & 1u; |
| 151 | } |
| 152 | |
| 153 | bool haveSSE42() noexcept |
| 154 | { |
| 155 | return (CpuInfo(0x1).ecx >> 20) & 1u; |
| 156 | } |
| 157 | |
| 158 | bool haveF16C() noexcept |
| 159 | { |
| 160 | return (CpuInfo(0x1).ecx >> 29) & 1u; |
| 161 | } |
| 162 | |
| 163 | bool havePOPCNT() noexcept |
| 164 | { |
| 165 | return (CpuInfo(0x1).ecx >> 23) & 1u; |
| 166 | } |
| 167 | |
| 168 | bool haveAES() noexcept |
| 169 | { |
| 170 | return (CpuInfo(0x1).ecx >> 25) & 1u; |
| 171 | } |
| 172 | |
| 173 | bool haveXSAVE() noexcept |
| 174 | { |
| 175 | return (CpuInfo(0x1).ecx >> 26) & 1u; |
| 176 | } |
| 177 | |
| 178 | bool haveOSXSAVE() noexcept |
| 179 | { |
| 180 | return (CpuInfo(0x1).ecx >> 27) & 1u; |
| 181 | } |
| 182 | |
| 183 | bool haveAVX() noexcept |
| 184 | { |
| 185 | #if defined(__x86_64__) || defined(__i386__) |
| 186 | // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf |
| 187 | // https://bugs.chromium.org/p/chromium/issues/detail?id=375968 |
| 188 | return haveOSXSAVE() // implies haveXSAVE() |
| 189 | && (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS |
| 190 | && ((CpuInfo(0x1).ecx >> 28) & 1u); // AVX bit |
| 191 | #else |
| 192 | return false; |
| 193 | #endif |
| 194 | } |
| 195 | |
| 196 | bool haveFMA() noexcept |
| 197 | { |
| 198 | return haveAVX() && ((CpuInfo(0x1).ecx >> 12) & 1u); |
| 199 | } |
| 200 | |
| 201 | bool haveAVX2() noexcept |
| 202 | { |
| 203 | return haveAVX() && ((CpuInfo(0x7, 0).ebx >> 5) & 1u); |
| 204 | } |
| 205 | |
| 206 | bool haveBMI1() noexcept |
| 207 | { |
| 208 | return (CpuInfo(0x7, 0).ebx >> 3) & 1u; |
| 209 | } |
| 210 | |
| 211 | bool haveBMI2() noexcept |
| 212 | { |
| 213 | return (CpuInfo(0x7, 0).ebx >> 8) & 1u; |
| 214 | } |
| 215 | |
| 216 | bool haveAVX512F() noexcept |
| 217 | { |
| 218 | #if defined(__x86_64__) || defined(__i386__) |
| 219 | // https://software.intel.com/en-us/articles/how-to-detect-knl-instruction-support |
| 220 | return haveOSXSAVE() // implies haveXSAVE() |
| 221 | && (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS |
| 222 | && ((our_xgetbv(0) >> 5) & 7u) == 7u // ZMM state is enabled by OS |
| 223 | && CpuInfo(0x0).eax >= 0x7 // leaf 7 is present |
| 224 | && ((CpuInfo(0x7).ebx >> 16) & 1u); // AVX512F bit |
| 225 | #else |
| 226 | return false; |
| 227 | #endif |
| 228 | } |
| 229 | |
| 230 | bool haveAVX512DQ() noexcept |
| 231 | { |
| 232 | return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 17) & 1u); |
| 233 | } |
| 234 | |
| 235 | bool haveRDSEED() noexcept |
| 236 | { |
| 237 | return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ebx >> 18) & 1u); |
| 238 | } |
| 239 | |
| 240 | bool haveADX() noexcept |
| 241 | { |
| 242 | return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ebx >> 19) & 1u); |
| 243 | } |
| 244 | |
| 245 | bool haveAVX512IFMA() noexcept |
| 246 | { |
| 247 | return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 21) & 1u); |
| 248 | } |
| 249 | |
| 250 | bool havePCOMMIT() noexcept |
| 251 | { |
| 252 | return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ebx >> 22) & 1u); |
| 253 | } |
| 254 | |
| 255 | bool haveCLFLUSHOPT() noexcept |
| 256 | { |
| 257 | return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ebx >> 23) & 1u); |
| 258 | } |
| 259 | |
| 260 | bool haveCLWB() noexcept |
| 261 | { |
| 262 | return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ebx >> 24) & 1u); |
| 263 | } |
| 264 | |
| 265 | bool haveAVX512PF() noexcept |
| 266 | { |
| 267 | return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 26) & 1u); |
| 268 | } |
| 269 | |
| 270 | bool haveAVX512ER() noexcept |
| 271 | { |
| 272 | return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 27) & 1u); |
| 273 | } |
| 274 | |
| 275 | bool haveAVX512CD() noexcept |
| 276 | { |
| 277 | return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 28) & 1u); |
| 278 | } |
| 279 | |
| 280 | bool haveSHA() noexcept |
| 281 | { |
| 282 | return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ebx >> 29) & 1u); |
| 283 | } |
| 284 | |
| 285 | bool haveAVX512BW() noexcept |
| 286 | { |
| 287 | return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 30) & 1u); |
| 288 | } |
| 289 | |
| 290 | bool haveAVX512VL() noexcept |
| 291 | { |
| 292 | return haveAVX512F() && ((CpuInfo(0x7, 0).ebx >> 31) & 1u); |
| 293 | } |
| 294 | |
| 295 | bool havePREFETCHWT1() noexcept |
| 296 | { |
| 297 | return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x7, 0).ecx >> 0) & 1u); |
| 298 | } |
| 299 | |
| 300 | bool haveAVX512VBMI() noexcept |
| 301 | { |
| 302 | return haveAVX512F() && ((CpuInfo(0x7, 0).ecx >> 1) & 1u); |
| 303 | } |
| 304 | |
| 305 | bool haveRDRAND() noexcept |
| 306 | { |
| 307 | return CpuInfo(0x0).eax >= 0x7 && ((CpuInfo(0x1).ecx >> 30) & 1u); |
| 308 | } |
| 309 | |
| 310 | struct CpuFlagsCache |
| 311 | { |
| 312 | #define DEF_NAME(X) static inline bool have_##X = have##X(); |
| 313 | CPU_ID_ENUMERATE(DEF_NAME) |
| 314 | #undef DEF_NAME |
| 315 | }; |
| 316 | |
| 317 | } |
| 318 | } |
| 319 | |
| 320 | |