| 1 | /**************************************************************************** | 
|---|
| 2 | ** | 
|---|
| 3 | ** Copyright (C) 2016 The Qt Company Ltd. | 
|---|
| 4 | ** Copyright (C) 2019 Intel Corporation. | 
|---|
| 5 | ** Contact: https://www.qt.io/licensing/ | 
|---|
| 6 | ** | 
|---|
| 7 | ** This file is part of the QtCore module of the Qt Toolkit. | 
|---|
| 8 | ** | 
|---|
| 9 | ** $QT_BEGIN_LICENSE:LGPL$ | 
|---|
| 10 | ** Commercial License Usage | 
|---|
| 11 | ** Licensees holding valid commercial Qt licenses may use this file in | 
|---|
| 12 | ** accordance with the commercial license agreement provided with the | 
|---|
| 13 | ** Software or, alternatively, in accordance with the terms contained in | 
|---|
| 14 | ** a written agreement between you and The Qt Company. For licensing terms | 
|---|
| 15 | ** and conditions see https://www.qt.io/terms-conditions. For further | 
|---|
| 16 | ** information use the contact form at https://www.qt.io/contact-us. | 
|---|
| 17 | ** | 
|---|
| 18 | ** GNU Lesser General Public License Usage | 
|---|
| 19 | ** Alternatively, this file may be used under the terms of the GNU Lesser | 
|---|
| 20 | ** General Public License version 3 as published by the Free Software | 
|---|
| 21 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the | 
|---|
| 22 | ** packaging of this file. Please review the following information to | 
|---|
| 23 | ** ensure the GNU Lesser General Public License version 3 requirements | 
|---|
| 24 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. | 
|---|
| 25 | ** | 
|---|
| 26 | ** GNU General Public License Usage | 
|---|
| 27 | ** Alternatively, this file may be used under the terms of the GNU | 
|---|
| 28 | ** General Public License version 2.0 or (at your option) the GNU General | 
|---|
| 29 | ** Public license version 3 or any later version approved by the KDE Free | 
|---|
| 30 | ** Qt Foundation. The licenses are as published by the Free Software | 
|---|
| 31 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 | 
|---|
| 32 | ** included in the packaging of this file. Please review the following | 
|---|
| 33 | ** information to ensure the GNU General Public License requirements will | 
|---|
| 34 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and | 
|---|
| 35 | ** https://www.gnu.org/licenses/gpl-3.0.html. | 
|---|
| 36 | ** | 
|---|
| 37 | ** $QT_END_LICENSE$ | 
|---|
| 38 | ** | 
|---|
| 39 | ****************************************************************************/ | 
|---|
| 40 |  | 
|---|
| 41 | // we need ICC to define the prototype for _rdseed64_step | 
|---|
| 42 | #define __INTEL_COMPILER_USE_INTRINSIC_PROTOTYPES | 
|---|
| 43 |  | 
|---|
| 44 | #include "qsimd_p.h" | 
|---|
| 45 | #include "qalgorithms.h" | 
|---|
| 46 | #include <QByteArray> | 
|---|
| 47 | #include <stdio.h> | 
|---|
| 48 |  | 
|---|
| 49 | #ifdef Q_OS_LINUX | 
|---|
| 50 | #  include "../testlib/3rdparty/valgrind_p.h" | 
|---|
| 51 | #endif | 
|---|
| 52 |  | 
|---|
| 53 | #if defined(Q_OS_WIN) | 
|---|
| 54 | #  if !defined(Q_CC_GNU) | 
|---|
| 55 | #    include <intrin.h> | 
|---|
| 56 | #  endif | 
|---|
| 57 | #elif defined(Q_OS_LINUX) && (defined(Q_PROCESSOR_ARM) || defined(Q_PROCESSOR_MIPS_32)) | 
|---|
| 58 | #include "private/qcore_unix_p.h" | 
|---|
| 59 |  | 
|---|
| 60 | // the kernel header definitions for HWCAP_* | 
|---|
| 61 | // (the ones we need/may need anyway) | 
|---|
| 62 |  | 
|---|
| 63 | // copied from <asm/hwcap.h> (ARM) | 
|---|
| 64 | #define HWCAP_CRUNCH    1024 | 
|---|
| 65 | #define HWCAP_THUMBEE   2048 | 
|---|
| 66 | #define HWCAP_NEON      4096 | 
|---|
| 67 | #define HWCAP_VFPv3     8192 | 
|---|
| 68 | #define HWCAP_VFPv3D16  16384 | 
|---|
| 69 |  | 
|---|
| 70 | // copied from <asm/hwcap.h> (ARM): | 
|---|
| 71 | #define HWCAP2_CRC32 (1 << 4) | 
|---|
| 72 |  | 
|---|
| 73 | // copied from <asm/hwcap.h> (Aarch64) | 
|---|
| 74 | #define HWCAP_CRC32             (1 << 7) | 
|---|
| 75 |  | 
|---|
| 76 | // copied from <linux/auxvec.h> | 
|---|
| 77 | #define AT_HWCAP  16    /* arch dependent hints at CPU capabilities */ | 
|---|
| 78 | #define AT_HWCAP2 26    /* extension of AT_HWCAP */ | 
|---|
| 79 |  | 
|---|
| 80 | #elif defined(Q_CC_GHS) | 
|---|
| 81 | #include <INTEGRITY_types.h> | 
|---|
| 82 | #endif | 
|---|
| 83 |  | 
|---|
| 84 | QT_BEGIN_NAMESPACE | 
|---|
| 85 |  | 
|---|
| 86 | /* | 
|---|
| 87 | * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note | 
|---|
| 88 | * we remove the terminating -1 that the script adds. | 
|---|
| 89 | */ | 
|---|
| 90 |  | 
|---|
| 91 | // begin generated | 
|---|
| 92 | #if defined(Q_PROCESSOR_ARM) | 
|---|
| 93 | /* Data: | 
|---|
| 94 | neon | 
|---|
| 95 | crc32 | 
|---|
| 96 | */ | 
|---|
| 97 | static const char features_string[] = | 
|---|
| 98 | " neon\0" | 
|---|
| 99 | " crc32\0" | 
|---|
| 100 | "\0"; | 
|---|
| 101 | static const int features_indices[] = { 0, 6 }; | 
|---|
| 102 | #elif defined(Q_PROCESSOR_MIPS) | 
|---|
| 103 | /* Data: | 
|---|
| 104 | dsp | 
|---|
| 105 | dspr2 | 
|---|
| 106 | */ | 
|---|
| 107 | static const char features_string[] = | 
|---|
| 108 | " dsp\0" | 
|---|
| 109 | " dspr2\0" | 
|---|
| 110 | "\0"; | 
|---|
| 111 |  | 
|---|
| 112 | static const int features_indices[] = { | 
|---|
| 113 | 0,    5 | 
|---|
| 114 | }; | 
|---|
| 115 | #elif defined(Q_PROCESSOR_X86) | 
|---|
| 116 | #  include "qsimd_x86.cpp"                  // generated by util/x86simdgen | 
|---|
| 117 | #else | 
|---|
| 118 | static const char features_string[] = ""; | 
|---|
| 119 | static const int features_indices[] = { }; | 
|---|
| 120 | #endif | 
|---|
| 121 | // end generated | 
|---|
| 122 |  | 
|---|
| 123 | #if defined (Q_OS_NACL) | 
|---|
| 124 | static inline uint detectProcessorFeatures() | 
|---|
| 125 | { | 
|---|
| 126 | return 0; | 
|---|
| 127 | } | 
|---|
| 128 | #elif defined(Q_PROCESSOR_ARM) | 
|---|
| 129 | static inline quint64 detectProcessorFeatures() | 
|---|
| 130 | { | 
|---|
| 131 | quint64 features = 0; | 
|---|
| 132 |  | 
|---|
| 133 | #if defined(Q_OS_LINUX) | 
|---|
| 134 | #  if defined(Q_PROCESSOR_ARM_V8) && defined(Q_PROCESSOR_ARM_64) | 
|---|
| 135 | features |= Q_UINT64_C(1) << CpuFeatureNEON; // NEON is always available on ARMv8 64bit. | 
|---|
| 136 | #  endif | 
|---|
| 137 | int auxv = qt_safe_open( "/proc/self/auxv", O_RDONLY); | 
|---|
| 138 | if (auxv != -1) { | 
|---|
| 139 | unsigned long vector[64]; | 
|---|
| 140 | int nread; | 
|---|
| 141 | while (features == 0) { | 
|---|
| 142 | nread = qt_safe_read(auxv, (char *)vector, sizeof vector); | 
|---|
| 143 | if (nread <= 0) { | 
|---|
| 144 | // EOF or error | 
|---|
| 145 | break; | 
|---|
| 146 | } | 
|---|
| 147 |  | 
|---|
| 148 | int max = nread / (sizeof vector[0]); | 
|---|
| 149 | for (int i = 0; i < max; i += 2) { | 
|---|
| 150 | if (vector[i] == AT_HWCAP) { | 
|---|
| 151 | #  if defined(Q_PROCESSOR_ARM_V8) && defined(Q_PROCESSOR_ARM_64) | 
|---|
| 152 | // For Aarch64: | 
|---|
| 153 | if (vector[i+1] & HWCAP_CRC32) | 
|---|
| 154 | features |= Q_UINT64_C(1) << CpuFeatureCRC32; | 
|---|
| 155 | #  endif | 
|---|
| 156 | // Aarch32, or ARMv7 or before: | 
|---|
| 157 | if (vector[i+1] & HWCAP_NEON) | 
|---|
| 158 | features |= Q_UINT64_C(1) << CpuFeatureNEON; | 
|---|
| 159 | } | 
|---|
| 160 | #  if defined(Q_PROCESSOR_ARM_32) | 
|---|
| 161 | // For Aarch32: | 
|---|
| 162 | if (vector[i] == AT_HWCAP2) { | 
|---|
| 163 | if (vector[i+1] & HWCAP2_CRC32) | 
|---|
| 164 | features |= Q_UINT64_C(1) << CpuFeatureCRC32; | 
|---|
| 165 | } | 
|---|
| 166 | #  endif | 
|---|
| 167 | } | 
|---|
| 168 | } | 
|---|
| 169 |  | 
|---|
| 170 | qt_safe_close(auxv); | 
|---|
| 171 | return features; | 
|---|
| 172 | } | 
|---|
| 173 | // fall back if /proc/self/auxv wasn't found | 
|---|
| 174 | #endif | 
|---|
| 175 |  | 
|---|
| 176 | #if defined(__ARM_NEON__) | 
|---|
| 177 | features |= Q_UINT64_C(1) << CpuFeatureNEON; | 
|---|
| 178 | #endif | 
|---|
| 179 | #if defined(__ARM_FEATURE_CRC32) | 
|---|
| 180 | features |= Q_UINT64_C(1) << CpuFeatureCRC32; | 
|---|
| 181 | #endif | 
|---|
| 182 |  | 
|---|
| 183 | return features; | 
|---|
| 184 | } | 
|---|
| 185 |  | 
|---|
| 186 | #elif defined(Q_PROCESSOR_X86) | 
|---|
| 187 |  | 
|---|
| 188 | #ifdef Q_PROCESSOR_X86_32 | 
|---|
| 189 | # define PICreg "%%ebx" | 
|---|
| 190 | #else | 
|---|
| 191 | # define PICreg "%%rbx" | 
|---|
| 192 | #endif | 
|---|
| 193 |  | 
|---|
| 194 | static bool checkRdrndWorks() noexcept; | 
|---|
| 195 |  | 
|---|
| 196 | static int maxBasicCpuidSupported() | 
|---|
| 197 | { | 
|---|
| 198 | #if defined(Q_CC_EMSCRIPTEN) | 
|---|
| 199 | return 6; // All features supported by Emscripten | 
|---|
| 200 | #elif defined(Q_CC_GNU) | 
|---|
| 201 | qregisterint tmp1; | 
|---|
| 202 |  | 
|---|
| 203 | # if Q_PROCESSOR_X86 < 5 | 
|---|
| 204 | // check if the CPUID instruction is supported | 
|---|
| 205 | long cpuid_supported; | 
|---|
| 206 | asm ( "pushf\n" | 
|---|
| 207 | "pop %0\n" | 
|---|
| 208 | "mov %0, %1\n" | 
|---|
| 209 | "xor $0x00200000, %0\n" | 
|---|
| 210 | "push %0\n" | 
|---|
| 211 | "popf\n" | 
|---|
| 212 | "pushf\n" | 
|---|
| 213 | "pop %0\n" | 
|---|
| 214 | "xor %1, %0\n"// %eax is now 0 if CPUID is not supported | 
|---|
| 215 | : "=a"(cpuid_supported), "=r"(tmp1) | 
|---|
| 216 | ); | 
|---|
| 217 | if (!cpuid_supported) | 
|---|
| 218 | return 0; | 
|---|
| 219 | # endif | 
|---|
| 220 |  | 
|---|
| 221 | int result; | 
|---|
| 222 | asm ( "xchg "PICreg ", %1\n" | 
|---|
| 223 | "cpuid\n" | 
|---|
| 224 | "xchg "PICreg ", %1\n" | 
|---|
| 225 | : "=&a"(result), "=&r"(tmp1) | 
|---|
| 226 | : "0"(0) | 
|---|
| 227 | : "ecx", "edx"); | 
|---|
| 228 | return result; | 
|---|
| 229 | #elif defined(Q_OS_WIN) | 
|---|
| 230 | // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0 | 
|---|
| 231 | int info[4]; | 
|---|
| 232 | __cpuid(info, 0); | 
|---|
| 233 | return info[0]; | 
|---|
| 234 | #elif defined(Q_CC_GHS) | 
|---|
| 235 | unsigned int info[4]; | 
|---|
| 236 | __CPUID(0, info); | 
|---|
| 237 | return info[0]; | 
|---|
| 238 | #else | 
|---|
| 239 | return 0; | 
|---|
| 240 | #endif | 
|---|
| 241 | } | 
|---|
| 242 |  | 
|---|
| 243 | static void cpuidFeatures01(uint &ecx, uint &edx) | 
|---|
| 244 | { | 
|---|
| 245 | #if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN) | 
|---|
| 246 | qregisterint tmp1; | 
|---|
| 247 | asm ( "xchg "PICreg ", %2\n" | 
|---|
| 248 | "cpuid\n" | 
|---|
| 249 | "xchg "PICreg ", %2\n" | 
|---|
| 250 | : "=&c"(ecx), "=&d"(edx), "=&r"(tmp1) | 
|---|
| 251 | : "a"(1)); | 
|---|
| 252 | #elif defined(Q_OS_WIN) | 
|---|
| 253 | int info[4]; | 
|---|
| 254 | __cpuid(info, 1); | 
|---|
| 255 | ecx = info[2]; | 
|---|
| 256 | edx = info[3]; | 
|---|
| 257 | #elif defined(Q_CC_GHS) | 
|---|
| 258 | unsigned int info[4]; | 
|---|
| 259 | __CPUID(1, info); | 
|---|
| 260 | ecx = info[2]; | 
|---|
| 261 | edx = info[3]; | 
|---|
| 262 | #else | 
|---|
| 263 | Q_UNUSED(ecx); | 
|---|
| 264 | Q_UNUSED(edx); | 
|---|
| 265 | #endif | 
|---|
| 266 | } | 
|---|
| 267 |  | 
|---|
| 268 | #ifdef Q_OS_WIN | 
|---|
| 269 | inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));} | 
|---|
| 270 | #endif | 
|---|
| 271 |  | 
|---|
| 272 | static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx) | 
|---|
| 273 | { | 
|---|
| 274 | #if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN) | 
|---|
| 275 | qregisteruint rbx; // in case it's 64-bit | 
|---|
| 276 | qregisteruint rcx = 0; | 
|---|
| 277 | qregisteruint rdx = 0; | 
|---|
| 278 | asm ( "xchg "PICreg ", %0\n" | 
|---|
| 279 | "cpuid\n" | 
|---|
| 280 | "xchg "PICreg ", %0\n" | 
|---|
| 281 | : "=&r"(rbx), "+&c"(rcx), "+&d"(rdx) | 
|---|
| 282 | : "a"(7)); | 
|---|
| 283 | ebx = rbx; | 
|---|
| 284 | ecx = rcx; | 
|---|
| 285 | edx = rdx; | 
|---|
| 286 | #elif defined(Q_OS_WIN) | 
|---|
| 287 | int info[4]; | 
|---|
| 288 | __cpuidex(info, 7, 0); | 
|---|
| 289 | ebx = info[1]; | 
|---|
| 290 | ecx = info[2]; | 
|---|
| 291 | edx = info[3]; | 
|---|
| 292 | #elif defined(Q_CC_GHS) | 
|---|
| 293 | unsigned int info[4]; | 
|---|
| 294 | __CPUIDEX(7, 0, info); | 
|---|
| 295 | ebx = info[1]; | 
|---|
| 296 | ecx = info[2]; | 
|---|
| 297 | edx = info[3]; | 
|---|
| 298 | #else | 
|---|
| 299 | Q_UNUSED(ebx); | 
|---|
| 300 | Q_UNUSED(ecx); | 
|---|
| 301 | Q_UNUSED(edx); | 
|---|
| 302 | #endif | 
|---|
| 303 | } | 
|---|
| 304 |  | 
|---|
| 305 | #if defined(Q_OS_WIN) && !(defined(Q_CC_GNU) || defined(Q_CC_GHS)) | 
|---|
| 306 | // fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int); | 
|---|
| 307 | inline quint64 _xgetbv(__int64) { return 0; } | 
|---|
| 308 | #endif | 
|---|
| 309 | static void xgetbv(uint in, uint &eax, uint &edx) | 
|---|
| 310 | { | 
|---|
| 311 | #if (defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)) || defined(Q_CC_GHS) | 
|---|
| 312 | asm ( ".byte 0x0F, 0x01, 0xD0"// xgetbv instruction | 
|---|
| 313 | : "=a"(eax), "=d"(edx) | 
|---|
| 314 | : "c"(in)); | 
|---|
| 315 | #elif defined(Q_OS_WIN) | 
|---|
| 316 | quint64 result = _xgetbv(in); | 
|---|
| 317 | eax = result; | 
|---|
| 318 | edx = result >> 32; | 
|---|
| 319 | #else | 
|---|
| 320 | Q_UNUSED(in); | 
|---|
| 321 | Q_UNUSED(eax); | 
|---|
| 322 | Q_UNUSED(edx); | 
|---|
| 323 | #endif | 
|---|
| 324 | } | 
|---|
| 325 |  | 
|---|
| 326 | // Flags from the XCR0 state register | 
|---|
| 327 | enum XCR0Flags { | 
|---|
| 328 | X87             = 1 << 0, | 
|---|
| 329 | XMM0_15         = 1 << 1, | 
|---|
| 330 | YMM0_15Hi128    = 1 << 2, | 
|---|
| 331 | BNDRegs         = 1 << 3, | 
|---|
| 332 | BNDCSR          = 1 << 4, | 
|---|
| 333 | OpMask          = 1 << 5, | 
|---|
| 334 | ZMM0_15Hi256    = 1 << 6, | 
|---|
| 335 | ZMM16_31        = 1 << 7, | 
|---|
| 336 |  | 
|---|
| 337 | SSEState        = XMM0_15, | 
|---|
| 338 | AVXState        = XMM0_15 | YMM0_15Hi128, | 
|---|
| 339 | AVX512State     = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31 | 
|---|
| 340 | }; | 
|---|
| 341 |  | 
|---|
| 342 | static quint64 adjustedXcr0(quint64 xcr0) | 
|---|
| 343 | { | 
|---|
| 344 | /* | 
|---|
| 345 | * Some OSes hide their capability of context-switching the AVX512 state in | 
|---|
| 346 | * the XCR0 register. They do that so the first time we execute an | 
|---|
| 347 | * instruction that may access the AVX512 state (requiring the EVEX prefix) | 
|---|
| 348 | * they allocate the necessary context switch space. | 
|---|
| 349 | * | 
|---|
| 350 | * This behavior is deprecated with the XFD (Extended Feature Disable) | 
|---|
| 351 | * register, but we can't change existing OSes. | 
|---|
| 352 | */ | 
|---|
| 353 | #ifdef Q_OS_DARWIN | 
|---|
| 354 | // from <machine/cpu_capabilities.h> in xnu | 
|---|
| 355 | // <https://github.com/apple/darwin-xnu/blob/xnu-4903.221.2/osfmk/i386/cpu_capabilities.h> | 
|---|
| 356 | constexpr quint64 kHasAVX512F = Q_UINT64_C(0x0000004000000000); | 
|---|
| 357 | constexpr quintptr commpage = sizeof(void *) > 4 ? Q_UINT64_C(0x00007fffffe00000) : 0xffff0000; | 
|---|
| 358 | constexpr quintptr cpu_capabilities64 = commpage + 0x10; | 
|---|
| 359 | quint64 capab = *reinterpret_cast<quint64 *>(cpu_capabilities64); | 
|---|
| 360 | if (capab & kHasAVX512F) | 
|---|
| 361 | xcr0 |= AVX512State; | 
|---|
| 362 | #endif | 
|---|
| 363 |  | 
|---|
| 364 | return xcr0; | 
|---|
| 365 | } | 
|---|
| 366 |  | 
|---|
| 367 | static quint64 detectProcessorFeatures() | 
|---|
| 368 | { | 
|---|
| 369 | static const quint64 AllAVX2 = CpuFeatureAVX2 | AllAVX512; | 
|---|
| 370 | static const quint64 AllAVX = CpuFeatureAVX | AllAVX2; | 
|---|
| 371 |  | 
|---|
| 372 | quint64 features = 0; | 
|---|
| 373 | int cpuidLevel = maxBasicCpuidSupported(); | 
|---|
| 374 | #if Q_PROCESSOR_X86 < 5 | 
|---|
| 375 | if (cpuidLevel < 1) | 
|---|
| 376 | return 0; | 
|---|
| 377 | #else | 
|---|
| 378 | Q_ASSERT(cpuidLevel >= 1); | 
|---|
| 379 | #endif | 
|---|
| 380 |  | 
|---|
| 381 | uint results[X86CpuidMaxLeaf] = {}; | 
|---|
| 382 | cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]); | 
|---|
| 383 | if (cpuidLevel >= 7) | 
|---|
| 384 | cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]); | 
|---|
| 385 |  | 
|---|
| 386 | // populate our feature list | 
|---|
| 387 | for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) { | 
|---|
| 388 | uint word = x86_locators[i] / 32; | 
|---|
| 389 | uint bit = 1U << (x86_locators[i] % 32); | 
|---|
| 390 | quint64 feature = Q_UINT64_C(1) << (i + 1); | 
|---|
| 391 | if (results[word] & bit) | 
|---|
| 392 | features |= feature; | 
|---|
| 393 | } | 
|---|
| 394 |  | 
|---|
| 395 | // now check the AVX state | 
|---|
| 396 | quint64 xcr0 = 0; | 
|---|
| 397 | if (results[Leaf1ECX] & (1u << 27)) { | 
|---|
| 398 | // XGETBV enabled | 
|---|
| 399 | uint xgetbvA = 0, xgetbvD = 0; | 
|---|
| 400 | xgetbv(0, xgetbvA, xgetbvD); | 
|---|
| 401 |  | 
|---|
| 402 | xcr0 = xgetbvA; | 
|---|
| 403 | if (sizeof(XCR0Flags) > sizeof(xgetbvA)) | 
|---|
| 404 | xcr0 |= quint64(xgetbvD) << 32; | 
|---|
| 405 | xcr0 = adjustedXcr0(xcr0); | 
|---|
| 406 | } | 
|---|
| 407 |  | 
|---|
| 408 | if ((xcr0 & AVXState) != AVXState) { | 
|---|
| 409 | // support for YMM registers is disabled, disable all AVX | 
|---|
| 410 | features &= ~AllAVX; | 
|---|
| 411 | } else if ((xcr0 & AVX512State) != AVX512State) { | 
|---|
| 412 | // support for ZMM registers or mask registers is disabled, disable all AVX512 | 
|---|
| 413 | features &= ~AllAVX512; | 
|---|
| 414 | } | 
|---|
| 415 |  | 
|---|
| 416 | if (features & CpuFeatureRDRND && !checkRdrndWorks()) | 
|---|
| 417 | features &= ~(CpuFeatureRDRND | CpuFeatureRDSEED); | 
|---|
| 418 |  | 
|---|
| 419 | return features; | 
|---|
| 420 | } | 
|---|
| 421 |  | 
|---|
| 422 | #elif defined(Q_PROCESSOR_MIPS_32) | 
|---|
| 423 |  | 
|---|
| 424 | #if defined(Q_OS_LINUX) | 
|---|
| 425 | // | 
|---|
| 426 | // Do not use QByteArray: it could use SIMD instructions itself at | 
|---|
| 427 | // some point, thus creating a recursive dependency. Instead, use a | 
|---|
| 428 | // QSimpleBuffer, which has the bare minimum needed to use memory | 
|---|
| 429 | // dynamically and read lines from /proc/cpuinfo of arbitrary sizes. | 
|---|
| 430 | // | 
|---|
| 431 | struct QSimpleBuffer | 
|---|
| 432 | { | 
|---|
| 433 | static const int chunk_size = 256; | 
|---|
| 434 | char *data; | 
|---|
| 435 | unsigned alloc; | 
|---|
| 436 | unsigned size; | 
|---|
| 437 |  | 
|---|
| 438 | QSimpleBuffer() : data(nullptr), alloc(0), size(0) { } | 
|---|
| 439 | ~QSimpleBuffer() { ::free(data); } | 
|---|
| 440 |  | 
|---|
| 441 | void resize(unsigned newsize) | 
|---|
| 442 | { | 
|---|
| 443 | if (newsize > alloc) { | 
|---|
| 444 | unsigned newalloc = chunk_size * ((newsize / chunk_size) + 1); | 
|---|
| 445 | if (newalloc < newsize) | 
|---|
| 446 | newalloc = newsize; | 
|---|
| 447 | if (newalloc != alloc) { | 
|---|
| 448 | data = static_cast<char *>(::realloc(data, newalloc)); | 
|---|
| 449 | alloc = newalloc; | 
|---|
| 450 | } | 
|---|
| 451 | } | 
|---|
| 452 | size = newsize; | 
|---|
| 453 | } | 
|---|
| 454 | void append(const QSimpleBuffer &other, unsigned appendsize) | 
|---|
| 455 | { | 
|---|
| 456 | unsigned oldsize = size; | 
|---|
| 457 | resize(oldsize + appendsize); | 
|---|
| 458 | ::memcpy(data + oldsize, other.data, appendsize); | 
|---|
| 459 | } | 
|---|
| 460 | void popleft(unsigned amount) | 
|---|
| 461 | { | 
|---|
| 462 | if (amount >= size) | 
|---|
| 463 | return resize(0); | 
|---|
| 464 | size -= amount; | 
|---|
| 465 | ::memmove(data, data + amount, size); | 
|---|
| 466 | } | 
|---|
| 467 | char *cString() | 
|---|
| 468 | { | 
|---|
| 469 | if (!alloc) | 
|---|
| 470 | resize(1); | 
|---|
| 471 | return (data[size] = '\0', data); | 
|---|
| 472 | } | 
|---|
| 473 | }; | 
|---|
| 474 |  | 
|---|
| 475 | // | 
|---|
| 476 | // Uses a scratch "buffer" (which must be used for all reads done in the | 
|---|
| 477 | // same file descriptor) to read chunks of data from a file, to read | 
|---|
| 478 | // one line at a time. Lines include the trailing newline character ('\n'). | 
|---|
| 479 | // On EOF, line.size is zero. | 
|---|
| 480 | // | 
|---|
| 481 | static void bufReadLine(int fd, QSimpleBuffer &line, QSimpleBuffer &buffer) | 
|---|
| 482 | { | 
|---|
| 483 | for (;;) { | 
|---|
| 484 | char *newline = static_cast<char *>(::memchr(buffer.data, '\n', buffer.size)); | 
|---|
| 485 | if (newline) { | 
|---|
| 486 | unsigned piece_size = newline - buffer.data + 1; | 
|---|
| 487 | line.append(buffer, piece_size); | 
|---|
| 488 | buffer.popleft(piece_size); | 
|---|
| 489 | line.resize(line.size - 1); | 
|---|
| 490 | return; | 
|---|
| 491 | } | 
|---|
| 492 | if (buffer.size + QSimpleBuffer::chunk_size > buffer.alloc) { | 
|---|
| 493 | int oldsize = buffer.size; | 
|---|
| 494 | buffer.resize(buffer.size + QSimpleBuffer::chunk_size); | 
|---|
| 495 | buffer.size = oldsize; | 
|---|
| 496 | } | 
|---|
| 497 | ssize_t read_bytes = | 
|---|
| 498 | ::qt_safe_read(fd, buffer.data + buffer.size, QSimpleBuffer::chunk_size); | 
|---|
| 499 | if (read_bytes > 0) | 
|---|
| 500 | buffer.size += read_bytes; | 
|---|
| 501 | else | 
|---|
| 502 | return; | 
|---|
| 503 | } | 
|---|
| 504 | } | 
|---|
| 505 |  | 
|---|
| 506 | // | 
|---|
| 507 | // Checks if any line with a given prefix from /proc/cpuinfo contains | 
|---|
| 508 | // a certain string, surrounded by spaces. | 
|---|
| 509 | // | 
|---|
| 510 | static bool procCpuinfoContains(const char *prefix, const char *string) | 
|---|
| 511 | { | 
|---|
| 512 | int cpuinfo_fd = ::qt_safe_open( "/proc/cpuinfo", O_RDONLY); | 
|---|
| 513 | if (cpuinfo_fd == -1) | 
|---|
| 514 | return false; | 
|---|
| 515 |  | 
|---|
| 516 | unsigned string_len = ::strlen(string); | 
|---|
| 517 | unsigned prefix_len = ::strlen(prefix); | 
|---|
| 518 | QSimpleBuffer line, buffer; | 
|---|
| 519 | bool present = false; | 
|---|
| 520 | do { | 
|---|
| 521 | line.resize(0); | 
|---|
| 522 | bufReadLine(cpuinfo_fd, line, buffer); | 
|---|
| 523 | char *colon = static_cast<char *>(::memchr(line.data, ':', line.size)); | 
|---|
| 524 | if (colon && line.size > prefix_len + string_len) { | 
|---|
| 525 | if (!::strncmp(prefix, line.data, prefix_len)) { | 
|---|
| 526 | // prefix matches, next character must be ':' or space | 
|---|
| 527 | if (line.data[prefix_len] == ':' || ::isspace(line.data[prefix_len])) { | 
|---|
| 528 | // Does it contain the string? | 
|---|
| 529 | char *found = ::strstr(line.cString(), string); | 
|---|
| 530 | if (found && ::isspace(found[-1]) && | 
|---|
| 531 | (::isspace(found[string_len]) || found[string_len] == '\0')) { | 
|---|
| 532 | present = true; | 
|---|
| 533 | break; | 
|---|
| 534 | } | 
|---|
| 535 | } | 
|---|
| 536 | } | 
|---|
| 537 | } | 
|---|
| 538 | } while (line.size); | 
|---|
| 539 |  | 
|---|
| 540 | ::qt_safe_close(cpuinfo_fd); | 
|---|
| 541 | return present; | 
|---|
| 542 | } | 
|---|
| 543 | #endif | 
|---|
| 544 |  | 
|---|
| 545 | static inline quint64 detectProcessorFeatures() | 
|---|
| 546 | { | 
|---|
| 547 | // NOTE: MIPS 74K cores are the only ones supporting DSPr2. | 
|---|
| 548 | quint64 flags = 0; | 
|---|
| 549 |  | 
|---|
| 550 | #if defined __mips_dsp | 
|---|
| 551 | flags |= Q_UINT64_C(1) << CpuFeatureDSP; | 
|---|
| 552 | #  if defined __mips_dsp_rev && __mips_dsp_rev >= 2 | 
|---|
| 553 | flags |= Q_UINT64_C(1) << CpuFeatureDSPR2; | 
|---|
| 554 | #  elif defined(Q_OS_LINUX) | 
|---|
| 555 | if (procCpuinfoContains( "cpu model", "MIPS 74Kc") || procCpuinfoContains( "cpu model", "MIPS 74Kf")) | 
|---|
| 556 | flags |= Q_UINT64_C(1) << CpuFeatureDSPR2; | 
|---|
| 557 | #  endif | 
|---|
| 558 | #elif defined(Q_OS_LINUX) | 
|---|
| 559 | if (procCpuinfoContains( "ASEs implemented", "dsp")) { | 
|---|
| 560 | flags |= Q_UINT64_C(1) << CpuFeatureDSP; | 
|---|
| 561 | if (procCpuinfoContains( "cpu model", "MIPS 74Kc") || procCpuinfoContains( "cpu model", "MIPS 74Kf")) | 
|---|
| 562 | flags |= Q_UINT64_C(1) << CpuFeatureDSPR2; | 
|---|
| 563 | } | 
|---|
| 564 | #endif | 
|---|
| 565 |  | 
|---|
| 566 | return flags; | 
|---|
| 567 | } | 
|---|
| 568 |  | 
|---|
| 569 | #else | 
|---|
| 570 | static inline uint detectProcessorFeatures() | 
|---|
| 571 | { | 
|---|
| 572 | return 0; | 
|---|
| 573 | } | 
|---|
| 574 | #endif | 
|---|
| 575 |  | 
|---|
| 576 | static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]); | 
|---|
| 577 |  | 
|---|
| 578 | // record what CPU features were enabled by default in this Qt build | 
|---|
| 579 | static const quint64 minFeature = qCompilerCpuFeatures; | 
|---|
| 580 |  | 
|---|
| 581 | #ifdef Q_ATOMIC_INT64_IS_SUPPORTED | 
|---|
| 582 | Q_CORE_EXPORT QBasicAtomicInteger<quint64> qt_cpu_features[1] = { Q_BASIC_ATOMIC_INITIALIZER(0) }; | 
|---|
| 583 | #else | 
|---|
| 584 | Q_CORE_EXPORT QBasicAtomicInteger<unsigned> qt_cpu_features[2] = { Q_BASIC_ATOMIC_INITIALIZER(0), Q_BASIC_ATOMIC_INITIALIZER(0) }; | 
|---|
| 585 | #endif | 
|---|
| 586 |  | 
|---|
| 587 | quint64 qDetectCpuFeatures() | 
|---|
| 588 | { | 
|---|
| 589 | quint64 f = detectProcessorFeatures(); | 
|---|
| 590 | QByteArray disable = qgetenv( "QT_NO_CPU_FEATURE"); | 
|---|
| 591 | if (!disable.isEmpty()) { | 
|---|
| 592 | disable.prepend(' '); | 
|---|
| 593 | for (int i = 0; i < features_count; ++i) { | 
|---|
| 594 | if (disable.contains(features_string + features_indices[i])) | 
|---|
| 595 | f &= ~(Q_UINT64_C(1) << i); | 
|---|
| 596 | } | 
|---|
| 597 | } | 
|---|
| 598 |  | 
|---|
| 599 | #ifdef RUNNING_ON_VALGRIND | 
|---|
| 600 | bool runningOnValgrind = RUNNING_ON_VALGRIND; | 
|---|
| 601 | #else | 
|---|
| 602 | bool runningOnValgrind = false; | 
|---|
| 603 | #endif | 
|---|
| 604 | if (Q_UNLIKELY(!runningOnValgrind && minFeature != 0 && (f & minFeature) != minFeature)) { | 
|---|
| 605 | quint64 missing = minFeature & ~f; | 
|---|
| 606 | fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n   "); | 
|---|
| 607 | for (int i = 0; i < features_count; ++i) { | 
|---|
| 608 | if (missing & (Q_UINT64_C(1) << i)) | 
|---|
| 609 | fprintf(stderr, "%s", features_string + features_indices[i]); | 
|---|
| 610 | } | 
|---|
| 611 | fprintf(stderr, "\n"); | 
|---|
| 612 | fflush(stderr); | 
|---|
| 613 | qFatal( "Aborted. Incompatible processor: missing feature 0x%llx -%s.", missing, | 
|---|
| 614 | features_string + features_indices[qCountTrailingZeroBits(missing)]); | 
|---|
| 615 | } | 
|---|
| 616 |  | 
|---|
| 617 | qt_cpu_features[0].storeRelaxed(f | quint32(QSimdInitialized)); | 
|---|
| 618 | #ifndef Q_ATOMIC_INT64_IS_SUPPORTED | 
|---|
| 619 | qt_cpu_features[1].storeRelaxed(f >> 32); | 
|---|
| 620 | #endif | 
|---|
| 621 | return f; | 
|---|
| 622 | } | 
|---|
| 623 |  | 
|---|
| 624 | void qDumpCPUFeatures() | 
|---|
| 625 | { | 
|---|
| 626 | quint64 features = qCpuFeatures() & ~quint64(QSimdInitialized); | 
|---|
| 627 | printf( "Processor features: "); | 
|---|
| 628 | for (int i = 0; i < features_count; ++i) { | 
|---|
| 629 | if (features & (Q_UINT64_C(1) << i)) | 
|---|
| 630 | printf( "%s%s", features_string + features_indices[i], | 
|---|
| 631 | minFeature & (Q_UINT64_C(1) << i) ? "[required]": ""); | 
|---|
| 632 | } | 
|---|
| 633 | if ((features = (qCompilerCpuFeatures & ~features))) { | 
|---|
| 634 | printf( "\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:"); | 
|---|
| 635 | for (int i = 0; i < features_count; ++i) { | 
|---|
| 636 | if (features & (Q_UINT64_C(1) << i)) | 
|---|
| 637 | printf( "%s", features_string + features_indices[i]); | 
|---|
| 638 | } | 
|---|
| 639 | printf( "\n!!! Applications will likely crash with \"Invalid Instruction\"\n!!!!!!!!!!!!!!!!!!!!"); | 
|---|
| 640 | } | 
|---|
| 641 | puts( ""); | 
|---|
| 642 | } | 
|---|
| 643 |  | 
|---|
| 644 | #if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND) | 
|---|
| 645 |  | 
|---|
| 646 | #  ifdef Q_PROCESSOR_X86_64 | 
|---|
| 647 | #    define _rdrandXX_step _rdrand64_step | 
|---|
| 648 | #    define _rdseedXX_step _rdseed64_step | 
|---|
| 649 | #  else | 
|---|
| 650 | #    define _rdrandXX_step _rdrand32_step | 
|---|
| 651 | #    define _rdseedXX_step _rdseed32_step | 
|---|
| 652 | #  endif | 
|---|
| 653 |  | 
|---|
| 654 | // The parameter to _rdrand64_step & _rdseed64_step is unsigned long long for | 
|---|
| 655 | // Clang and GCC but unsigned __int64 for MSVC and ICC, which is unsigned long | 
|---|
| 656 | // long on Windows, but unsigned long on Linux. | 
|---|
| 657 | namespace { | 
|---|
| 658 | template <typename F> struct ; | 
|---|
| 659 | template <typename T> struct <int (T *)> { using  = T; }; | 
|---|
| 660 | using randuint = ExtractParameter<decltype(_rdrandXX_step)>::Type; | 
|---|
| 661 | } | 
|---|
| 662 |  | 
|---|
| 663 | #  if QT_COMPILER_SUPPORTS_HERE(RDSEED) | 
|---|
| 664 | static QT_FUNCTION_TARGET(RDSEED) unsigned *qt_random_rdseed(unsigned *ptr, unsigned *end) noexcept | 
|---|
| 665 | { | 
|---|
| 666 | // Unlike for the RDRAND code below, the Intel whitepaper describing the | 
|---|
| 667 | // use of the RDSEED instruction indicates we should not retry in a loop. | 
|---|
| 668 | // If the independent bit generator used by RDSEED is out of entropy, it | 
|---|
| 669 | // may take time to replenish. | 
|---|
| 670 | // https://software.intel.com/en-us/articles/intel-digital-random-number-generator-drng-software-implementation-guide | 
|---|
| 671 | while (ptr + sizeof(randuint) / sizeof(*ptr) <= end) { | 
|---|
| 672 | if (_rdseedXX_step(reinterpret_cast<randuint *>(ptr)) == 0) | 
|---|
| 673 | goto out; | 
|---|
| 674 | ptr += sizeof(randuint) / sizeof(*ptr); | 
|---|
| 675 | } | 
|---|
| 676 |  | 
|---|
| 677 | if (sizeof(*ptr) != sizeof(randuint) && ptr != end) { | 
|---|
| 678 | if (_rdseed32_step(ptr) == 0) | 
|---|
| 679 | goto out; | 
|---|
| 680 | ++ptr; | 
|---|
| 681 | } | 
|---|
| 682 |  | 
|---|
| 683 | out: | 
|---|
| 684 | return ptr; | 
|---|
| 685 | } | 
|---|
| 686 | #  else | 
|---|
| 687 | static unsigned *qt_random_rdseed(unsigned *ptr, unsigned *) | 
|---|
| 688 | { | 
|---|
| 689 | return ptr; | 
|---|
| 690 | } | 
|---|
| 691 | #  endif | 
|---|
| 692 |  | 
|---|
| 693 | static QT_FUNCTION_TARGET(RDRND) unsigned *qt_random_rdrnd(unsigned *ptr, unsigned *end) noexcept | 
|---|
| 694 | { | 
|---|
| 695 | int retries = 10; | 
|---|
| 696 | while (ptr + sizeof(randuint)/sizeof(*ptr) <= end) { | 
|---|
| 697 | if (_rdrandXX_step(reinterpret_cast<randuint *>(ptr))) | 
|---|
| 698 | ptr += sizeof(randuint)/sizeof(*ptr); | 
|---|
| 699 | else if (--retries == 0) | 
|---|
| 700 | goto out; | 
|---|
| 701 | } | 
|---|
| 702 |  | 
|---|
| 703 | while (sizeof(*ptr) != sizeof(randuint) && ptr != end) { | 
|---|
| 704 | bool ok = _rdrand32_step(ptr); | 
|---|
| 705 | if (!ok && --retries) | 
|---|
| 706 | continue; | 
|---|
| 707 | if (ok) | 
|---|
| 708 | ++ptr; | 
|---|
| 709 | break; | 
|---|
| 710 | } | 
|---|
| 711 |  | 
|---|
| 712 | out: | 
|---|
| 713 | return ptr; | 
|---|
| 714 | } | 
|---|
| 715 |  | 
|---|
| 716 | static QT_FUNCTION_TARGET(RDRND) Q_DECL_COLD_FUNCTION bool checkRdrndWorks() noexcept | 
|---|
| 717 | { | 
|---|
| 718 | /* | 
|---|
| 719 | * Some AMD CPUs (e.g. AMD A4-6250J and AMD Ryzen 3000-series) have a | 
|---|
| 720 | * failing random generation instruction, which always returns | 
|---|
| 721 | * 0xffffffff, even when generation was "successful". | 
|---|
| 722 | * | 
|---|
| 723 | * This code checks if hardware random generator generates four consecutive | 
|---|
| 724 | * equal numbers. If it does, then we probably have a failing one and | 
|---|
| 725 | * should disable it completely. | 
|---|
| 726 | * | 
|---|
| 727 | * https://bugreports.qt.io/browse/QTBUG-69423 | 
|---|
| 728 | */ | 
|---|
| 729 | constexpr qsizetype TestBufferSize = 4; | 
|---|
| 730 | unsigned testBuffer[TestBufferSize] = {}; | 
|---|
| 731 |  | 
|---|
| 732 | unsigned *end = qt_random_rdrnd(testBuffer, testBuffer + TestBufferSize); | 
|---|
| 733 | if (end < testBuffer + 3) { | 
|---|
| 734 | // Random generation didn't produce enough data for us to make a | 
|---|
| 735 | // determination whether it's working or not. Assume it isn't, but | 
|---|
| 736 | // don't print a warning. | 
|---|
| 737 | return false; | 
|---|
| 738 | } | 
|---|
| 739 |  | 
|---|
| 740 | // Check the results for equality | 
|---|
| 741 | if (testBuffer[0] == testBuffer[1] | 
|---|
| 742 | && testBuffer[0] == testBuffer[2] | 
|---|
| 743 | && (end < testBuffer + TestBufferSize || testBuffer[0] == testBuffer[3])) { | 
|---|
| 744 | fprintf(stderr, "WARNING: CPU random generator seem to be failing, " | 
|---|
| 745 | "disabling hardware random number generation\n" | 
|---|
| 746 | "WARNING: RDRND generated:"); | 
|---|
| 747 | for (unsigned *ptr = testBuffer; ptr < end; ++ptr) | 
|---|
| 748 | fprintf(stderr, " 0x%x", *ptr); | 
|---|
| 749 | fprintf(stderr, "\n"); | 
|---|
| 750 | return false; | 
|---|
| 751 | } | 
|---|
| 752 |  | 
|---|
| 753 | // We're good | 
|---|
| 754 | return true; | 
|---|
| 755 | } | 
|---|
| 756 |  | 
|---|
| 757 | QT_FUNCTION_TARGET(RDRND) qsizetype qRandomCpu(void *buffer, qsizetype count) noexcept | 
|---|
| 758 | { | 
|---|
| 759 | unsigned *ptr = reinterpret_cast<unsigned *>(buffer); | 
|---|
| 760 | unsigned *end = ptr + count; | 
|---|
| 761 |  | 
|---|
| 762 | if (qCpuHasFeature(RDSEED)) | 
|---|
| 763 | ptr = qt_random_rdseed(ptr, end); | 
|---|
| 764 |  | 
|---|
| 765 | // fill the buffer with RDRND if RDSEED didn't | 
|---|
| 766 | ptr = qt_random_rdrnd(ptr, end); | 
|---|
| 767 | return ptr - reinterpret_cast<unsigned *>(buffer); | 
|---|
| 768 | } | 
|---|
| 769 | #elif defined(Q_PROCESSOR_X86) && !defined(Q_OS_NACL) && !defined(Q_PROCESSOR_ARM) | 
|---|
| 770 | static bool checkRdrndWorks() noexcept { return false; } | 
|---|
| 771 | #endif // Q_PROCESSOR_X86 && RDRND | 
|---|
| 772 |  | 
|---|
| 773 | QT_END_NAMESPACE | 
|---|
| 774 |  | 
|---|