1/****************************************************************************
2**
3** Copyright (C) 2016 The Qt Company Ltd.
4** Copyright (C) 2019 Intel Corporation.
5** Contact: https://www.qt.io/licensing/
6**
7** This file is part of the QtCore module of the Qt Toolkit.
8**
9** $QT_BEGIN_LICENSE:LGPL$
10** Commercial License Usage
11** Licensees holding valid commercial Qt licenses may use this file in
12** accordance with the commercial license agreement provided with the
13** Software or, alternatively, in accordance with the terms contained in
14** a written agreement between you and The Qt Company. For licensing terms
15** and conditions see https://www.qt.io/terms-conditions. For further
16** information use the contact form at https://www.qt.io/contact-us.
17**
18** GNU Lesser General Public License Usage
19** Alternatively, this file may be used under the terms of the GNU Lesser
20** General Public License version 3 as published by the Free Software
21** Foundation and appearing in the file LICENSE.LGPL3 included in the
22** packaging of this file. Please review the following information to
23** ensure the GNU Lesser General Public License version 3 requirements
24** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
25**
26** GNU General Public License Usage
27** Alternatively, this file may be used under the terms of the GNU
28** General Public License version 2.0 or (at your option) the GNU General
29** Public license version 3 or any later version approved by the KDE Free
30** Qt Foundation. The licenses are as published by the Free Software
31** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
32** included in the packaging of this file. Please review the following
33** information to ensure the GNU General Public License requirements will
34** be met: https://www.gnu.org/licenses/gpl-2.0.html and
35** https://www.gnu.org/licenses/gpl-3.0.html.
36**
37** $QT_END_LICENSE$
38**
39****************************************************************************/
40
41// we need ICC to define the prototype for _rdseed64_step
42#define __INTEL_COMPILER_USE_INTRINSIC_PROTOTYPES
43
44#include "qsimd_p.h"
45#include "qalgorithms.h"
46#include <QByteArray>
47#include <stdio.h>
48
49#ifdef Q_OS_LINUX
50# include "../testlib/3rdparty/valgrind_p.h"
51#endif
52
53#if defined(Q_OS_WIN)
54# if !defined(Q_CC_GNU)
55# include <intrin.h>
56# endif
57#elif defined(Q_OS_LINUX) && (defined(Q_PROCESSOR_ARM) || defined(Q_PROCESSOR_MIPS_32))
58#include "private/qcore_unix_p.h"
59
60// the kernel header definitions for HWCAP_*
61// (the ones we need/may need anyway)
62
63// copied from <asm/hwcap.h> (ARM)
64#define HWCAP_CRUNCH 1024
65#define HWCAP_THUMBEE 2048
66#define HWCAP_NEON 4096
67#define HWCAP_VFPv3 8192
68#define HWCAP_VFPv3D16 16384
69
70// copied from <asm/hwcap.h> (ARM):
71#define HWCAP2_CRC32 (1 << 4)
72
73// copied from <asm/hwcap.h> (Aarch64)
74#define HWCAP_CRC32 (1 << 7)
75
76// copied from <linux/auxvec.h>
77#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */
78#define AT_HWCAP2 26 /* extension of AT_HWCAP */
79
80#elif defined(Q_CC_GHS)
81#include <INTEGRITY_types.h>
82#endif
83
84QT_BEGIN_NAMESPACE
85
86/*
87 * Use kdesdk/scripts/generate_string_table.pl to update the table below. Note
88 * we remove the terminating -1 that the script adds.
89 */
90
91// begin generated
92#if defined(Q_PROCESSOR_ARM)
93/* Data:
94 neon
95 crc32
96 */
97static const char features_string[] =
98 " neon\0"
99 " crc32\0"
100 "\0";
101static const int features_indices[] = { 0, 6 };
102#elif defined(Q_PROCESSOR_MIPS)
103/* Data:
104 dsp
105 dspr2
106*/
107static const char features_string[] =
108 " dsp\0"
109 " dspr2\0"
110 "\0";
111
112static const int features_indices[] = {
113 0, 5
114};
115#elif defined(Q_PROCESSOR_X86)
116# include "qsimd_x86.cpp" // generated by util/x86simdgen
117#else
118static const char features_string[] = "";
119static const int features_indices[] = { };
120#endif
121// end generated
122
123#if defined (Q_OS_NACL)
124static inline uint detectProcessorFeatures()
125{
126 return 0;
127}
128#elif defined(Q_PROCESSOR_ARM)
129static inline quint64 detectProcessorFeatures()
130{
131 quint64 features = 0;
132
133#if defined(Q_OS_LINUX)
134# if defined(Q_PROCESSOR_ARM_V8) && defined(Q_PROCESSOR_ARM_64)
135 features |= Q_UINT64_C(1) << CpuFeatureNEON; // NEON is always available on ARMv8 64bit.
136# endif
137 int auxv = qt_safe_open("/proc/self/auxv", O_RDONLY);
138 if (auxv != -1) {
139 unsigned long vector[64];
140 int nread;
141 while (features == 0) {
142 nread = qt_safe_read(auxv, (char *)vector, sizeof vector);
143 if (nread <= 0) {
144 // EOF or error
145 break;
146 }
147
148 int max = nread / (sizeof vector[0]);
149 for (int i = 0; i < max; i += 2) {
150 if (vector[i] == AT_HWCAP) {
151# if defined(Q_PROCESSOR_ARM_V8) && defined(Q_PROCESSOR_ARM_64)
152 // For Aarch64:
153 if (vector[i+1] & HWCAP_CRC32)
154 features |= Q_UINT64_C(1) << CpuFeatureCRC32;
155# endif
156 // Aarch32, or ARMv7 or before:
157 if (vector[i+1] & HWCAP_NEON)
158 features |= Q_UINT64_C(1) << CpuFeatureNEON;
159 }
160# if defined(Q_PROCESSOR_ARM_32)
161 // For Aarch32:
162 if (vector[i] == AT_HWCAP2) {
163 if (vector[i+1] & HWCAP2_CRC32)
164 features |= Q_UINT64_C(1) << CpuFeatureCRC32;
165 }
166# endif
167 }
168 }
169
170 qt_safe_close(auxv);
171 return features;
172 }
173 // fall back if /proc/self/auxv wasn't found
174#endif
175
176#if defined(__ARM_NEON__)
177 features |= Q_UINT64_C(1) << CpuFeatureNEON;
178#endif
179#if defined(__ARM_FEATURE_CRC32)
180 features |= Q_UINT64_C(1) << CpuFeatureCRC32;
181#endif
182
183 return features;
184}
185
186#elif defined(Q_PROCESSOR_X86)
187
188#ifdef Q_PROCESSOR_X86_32
189# define PICreg "%%ebx"
190#else
191# define PICreg "%%rbx"
192#endif
193
194static bool checkRdrndWorks() noexcept;
195
196static int maxBasicCpuidSupported()
197{
198#if defined(Q_CC_EMSCRIPTEN)
199 return 6; // All features supported by Emscripten
200#elif defined(Q_CC_GNU)
201 qregisterint tmp1;
202
203# if Q_PROCESSOR_X86 < 5
204 // check if the CPUID instruction is supported
205 long cpuid_supported;
206 asm ("pushf\n"
207 "pop %0\n"
208 "mov %0, %1\n"
209 "xor $0x00200000, %0\n"
210 "push %0\n"
211 "popf\n"
212 "pushf\n"
213 "pop %0\n"
214 "xor %1, %0\n" // %eax is now 0 if CPUID is not supported
215 : "=a" (cpuid_supported), "=r" (tmp1)
216 );
217 if (!cpuid_supported)
218 return 0;
219# endif
220
221 int result;
222 asm ("xchg " PICreg", %1\n"
223 "cpuid\n"
224 "xchg " PICreg", %1\n"
225 : "=&a" (result), "=&r" (tmp1)
226 : "0" (0)
227 : "ecx", "edx");
228 return result;
229#elif defined(Q_OS_WIN)
230 // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0
231 int info[4];
232 __cpuid(info, 0);
233 return info[0];
234#elif defined(Q_CC_GHS)
235 unsigned int info[4];
236 __CPUID(0, info);
237 return info[0];
238#else
239 return 0;
240#endif
241}
242
243static void cpuidFeatures01(uint &ecx, uint &edx)
244{
245#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
246 qregisterint tmp1;
247 asm ("xchg " PICreg", %2\n"
248 "cpuid\n"
249 "xchg " PICreg", %2\n"
250 : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1)
251 : "a" (1));
252#elif defined(Q_OS_WIN)
253 int info[4];
254 __cpuid(info, 1);
255 ecx = info[2];
256 edx = info[3];
257#elif defined(Q_CC_GHS)
258 unsigned int info[4];
259 __CPUID(1, info);
260 ecx = info[2];
261 edx = info[3];
262#else
263 Q_UNUSED(ecx);
264 Q_UNUSED(edx);
265#endif
266}
267
268#ifdef Q_OS_WIN
269inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));}
270#endif
271
272static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx)
273{
274#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
275 qregisteruint rbx; // in case it's 64-bit
276 qregisteruint rcx = 0;
277 qregisteruint rdx = 0;
278 asm ("xchg " PICreg", %0\n"
279 "cpuid\n"
280 "xchg " PICreg", %0\n"
281 : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx)
282 : "a" (7));
283 ebx = rbx;
284 ecx = rcx;
285 edx = rdx;
286#elif defined(Q_OS_WIN)
287 int info[4];
288 __cpuidex(info, 7, 0);
289 ebx = info[1];
290 ecx = info[2];
291 edx = info[3];
292#elif defined(Q_CC_GHS)
293 unsigned int info[4];
294 __CPUIDEX(7, 0, info);
295 ebx = info[1];
296 ecx = info[2];
297 edx = info[3];
298#else
299 Q_UNUSED(ebx);
300 Q_UNUSED(ecx);
301 Q_UNUSED(edx);
302#endif
303}
304
305#if defined(Q_OS_WIN) && !(defined(Q_CC_GNU) || defined(Q_CC_GHS))
306// fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int);
307inline quint64 _xgetbv(__int64) { return 0; }
308#endif
309static void xgetbv(uint in, uint &eax, uint &edx)
310{
311#if (defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)) || defined(Q_CC_GHS)
312 asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction
313 : "=a" (eax), "=d" (edx)
314 : "c" (in));
315#elif defined(Q_OS_WIN)
316 quint64 result = _xgetbv(in);
317 eax = result;
318 edx = result >> 32;
319#else
320 Q_UNUSED(in);
321 Q_UNUSED(eax);
322 Q_UNUSED(edx);
323#endif
324}
325
326// Flags from the XCR0 state register
327enum XCR0Flags {
328 X87 = 1 << 0,
329 XMM0_15 = 1 << 1,
330 YMM0_15Hi128 = 1 << 2,
331 BNDRegs = 1 << 3,
332 BNDCSR = 1 << 4,
333 OpMask = 1 << 5,
334 ZMM0_15Hi256 = 1 << 6,
335 ZMM16_31 = 1 << 7,
336
337 SSEState = XMM0_15,
338 AVXState = XMM0_15 | YMM0_15Hi128,
339 AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
340};
341
342static quint64 adjustedXcr0(quint64 xcr0)
343{
344 /*
345 * Some OSes hide their capability of context-switching the AVX512 state in
346 * the XCR0 register. They do that so the first time we execute an
347 * instruction that may access the AVX512 state (requiring the EVEX prefix)
348 * they allocate the necessary context switch space.
349 *
350 * This behavior is deprecated with the XFD (Extended Feature Disable)
351 * register, but we can't change existing OSes.
352 */
353#ifdef Q_OS_DARWIN
354 // from <machine/cpu_capabilities.h> in xnu
355 // <https://github.com/apple/darwin-xnu/blob/xnu-4903.221.2/osfmk/i386/cpu_capabilities.h>
356 constexpr quint64 kHasAVX512F = Q_UINT64_C(0x0000004000000000);
357 constexpr quintptr commpage = sizeof(void *) > 4 ? Q_UINT64_C(0x00007fffffe00000) : 0xffff0000;
358 constexpr quintptr cpu_capabilities64 = commpage + 0x10;
359 quint64 capab = *reinterpret_cast<quint64 *>(cpu_capabilities64);
360 if (capab & kHasAVX512F)
361 xcr0 |= AVX512State;
362#endif
363
364 return xcr0;
365}
366
367static quint64 detectProcessorFeatures()
368{
369 static const quint64 AllAVX2 = CpuFeatureAVX2 | AllAVX512;
370 static const quint64 AllAVX = CpuFeatureAVX | AllAVX2;
371
372 quint64 features = 0;
373 int cpuidLevel = maxBasicCpuidSupported();
374#if Q_PROCESSOR_X86 < 5
375 if (cpuidLevel < 1)
376 return 0;
377#else
378 Q_ASSERT(cpuidLevel >= 1);
379#endif
380
381 uint results[X86CpuidMaxLeaf] = {};
382 cpuidFeatures01(results[Leaf1ECX], results[Leaf1EDX]);
383 if (cpuidLevel >= 7)
384 cpuidFeatures07_00(results[Leaf7_0EBX], results[Leaf7_0ECX], results[Leaf7_0EDX]);
385
386 // populate our feature list
387 for (uint i = 0; i < sizeof(x86_locators) / sizeof(x86_locators[0]); ++i) {
388 uint word = x86_locators[i] / 32;
389 uint bit = 1U << (x86_locators[i] % 32);
390 quint64 feature = Q_UINT64_C(1) << (i + 1);
391 if (results[word] & bit)
392 features |= feature;
393 }
394
395 // now check the AVX state
396 quint64 xcr0 = 0;
397 if (results[Leaf1ECX] & (1u << 27)) {
398 // XGETBV enabled
399 uint xgetbvA = 0, xgetbvD = 0;
400 xgetbv(0, xgetbvA, xgetbvD);
401
402 xcr0 = xgetbvA;
403 if (sizeof(XCR0Flags) > sizeof(xgetbvA))
404 xcr0 |= quint64(xgetbvD) << 32;
405 xcr0 = adjustedXcr0(xcr0);
406 }
407
408 if ((xcr0 & AVXState) != AVXState) {
409 // support for YMM registers is disabled, disable all AVX
410 features &= ~AllAVX;
411 } else if ((xcr0 & AVX512State) != AVX512State) {
412 // support for ZMM registers or mask registers is disabled, disable all AVX512
413 features &= ~AllAVX512;
414 }
415
416 if (features & CpuFeatureRDRND && !checkRdrndWorks())
417 features &= ~(CpuFeatureRDRND | CpuFeatureRDSEED);
418
419 return features;
420}
421
422#elif defined(Q_PROCESSOR_MIPS_32)
423
424#if defined(Q_OS_LINUX)
425//
426// Do not use QByteArray: it could use SIMD instructions itself at
427// some point, thus creating a recursive dependency. Instead, use a
428// QSimpleBuffer, which has the bare minimum needed to use memory
429// dynamically and read lines from /proc/cpuinfo of arbitrary sizes.
430//
431struct QSimpleBuffer
432{
433 static const int chunk_size = 256;
434 char *data;
435 unsigned alloc;
436 unsigned size;
437
438 QSimpleBuffer() : data(nullptr), alloc(0), size(0) { }
439 ~QSimpleBuffer() { ::free(data); }
440
441 void resize(unsigned newsize)
442 {
443 if (newsize > alloc) {
444 unsigned newalloc = chunk_size * ((newsize / chunk_size) + 1);
445 if (newalloc < newsize)
446 newalloc = newsize;
447 if (newalloc != alloc) {
448 data = static_cast<char *>(::realloc(data, newalloc));
449 alloc = newalloc;
450 }
451 }
452 size = newsize;
453 }
454 void append(const QSimpleBuffer &other, unsigned appendsize)
455 {
456 unsigned oldsize = size;
457 resize(oldsize + appendsize);
458 ::memcpy(data + oldsize, other.data, appendsize);
459 }
460 void popleft(unsigned amount)
461 {
462 if (amount >= size)
463 return resize(0);
464 size -= amount;
465 ::memmove(data, data + amount, size);
466 }
467 char *cString()
468 {
469 if (!alloc)
470 resize(1);
471 return (data[size] = '\0', data);
472 }
473};
474
475//
476// Uses a scratch "buffer" (which must be used for all reads done in the
477// same file descriptor) to read chunks of data from a file, to read
478// one line at a time. Lines include the trailing newline character ('\n').
479// On EOF, line.size is zero.
480//
481static void bufReadLine(int fd, QSimpleBuffer &line, QSimpleBuffer &buffer)
482{
483 for (;;) {
484 char *newline = static_cast<char *>(::memchr(buffer.data, '\n', buffer.size));
485 if (newline) {
486 unsigned piece_size = newline - buffer.data + 1;
487 line.append(buffer, piece_size);
488 buffer.popleft(piece_size);
489 line.resize(line.size - 1);
490 return;
491 }
492 if (buffer.size + QSimpleBuffer::chunk_size > buffer.alloc) {
493 int oldsize = buffer.size;
494 buffer.resize(buffer.size + QSimpleBuffer::chunk_size);
495 buffer.size = oldsize;
496 }
497 ssize_t read_bytes =
498 ::qt_safe_read(fd, buffer.data + buffer.size, QSimpleBuffer::chunk_size);
499 if (read_bytes > 0)
500 buffer.size += read_bytes;
501 else
502 return;
503 }
504}
505
506//
507// Checks if any line with a given prefix from /proc/cpuinfo contains
508// a certain string, surrounded by spaces.
509//
510static bool procCpuinfoContains(const char *prefix, const char *string)
511{
512 int cpuinfo_fd = ::qt_safe_open("/proc/cpuinfo", O_RDONLY);
513 if (cpuinfo_fd == -1)
514 return false;
515
516 unsigned string_len = ::strlen(string);
517 unsigned prefix_len = ::strlen(prefix);
518 QSimpleBuffer line, buffer;
519 bool present = false;
520 do {
521 line.resize(0);
522 bufReadLine(cpuinfo_fd, line, buffer);
523 char *colon = static_cast<char *>(::memchr(line.data, ':', line.size));
524 if (colon && line.size > prefix_len + string_len) {
525 if (!::strncmp(prefix, line.data, prefix_len)) {
526 // prefix matches, next character must be ':' or space
527 if (line.data[prefix_len] == ':' || ::isspace(line.data[prefix_len])) {
528 // Does it contain the string?
529 char *found = ::strstr(line.cString(), string);
530 if (found && ::isspace(found[-1]) &&
531 (::isspace(found[string_len]) || found[string_len] == '\0')) {
532 present = true;
533 break;
534 }
535 }
536 }
537 }
538 } while (line.size);
539
540 ::qt_safe_close(cpuinfo_fd);
541 return present;
542}
543#endif
544
545static inline quint64 detectProcessorFeatures()
546{
547 // NOTE: MIPS 74K cores are the only ones supporting DSPr2.
548 quint64 flags = 0;
549
550#if defined __mips_dsp
551 flags |= Q_UINT64_C(1) << CpuFeatureDSP;
552# if defined __mips_dsp_rev && __mips_dsp_rev >= 2
553 flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
554# elif defined(Q_OS_LINUX)
555 if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
556 flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
557# endif
558#elif defined(Q_OS_LINUX)
559 if (procCpuinfoContains("ASEs implemented", "dsp")) {
560 flags |= Q_UINT64_C(1) << CpuFeatureDSP;
561 if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
562 flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
563 }
564#endif
565
566 return flags;
567}
568
569#else
570static inline uint detectProcessorFeatures()
571{
572 return 0;
573}
574#endif
575
576static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]);
577
578// record what CPU features were enabled by default in this Qt build
579static const quint64 minFeature = qCompilerCpuFeatures;
580
581#ifdef Q_ATOMIC_INT64_IS_SUPPORTED
582Q_CORE_EXPORT QBasicAtomicInteger<quint64> qt_cpu_features[1] = { Q_BASIC_ATOMIC_INITIALIZER(0) };
583#else
584Q_CORE_EXPORT QBasicAtomicInteger<unsigned> qt_cpu_features[2] = { Q_BASIC_ATOMIC_INITIALIZER(0), Q_BASIC_ATOMIC_INITIALIZER(0) };
585#endif
586
587quint64 qDetectCpuFeatures()
588{
589 quint64 f = detectProcessorFeatures();
590 QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
591 if (!disable.isEmpty()) {
592 disable.prepend(' ');
593 for (int i = 0; i < features_count; ++i) {
594 if (disable.contains(features_string + features_indices[i]))
595 f &= ~(Q_UINT64_C(1) << i);
596 }
597 }
598
599#ifdef RUNNING_ON_VALGRIND
600 bool runningOnValgrind = RUNNING_ON_VALGRIND;
601#else
602 bool runningOnValgrind = false;
603#endif
604 if (Q_UNLIKELY(!runningOnValgrind && minFeature != 0 && (f & minFeature) != minFeature)) {
605 quint64 missing = minFeature & ~f;
606 fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n ");
607 for (int i = 0; i < features_count; ++i) {
608 if (missing & (Q_UINT64_C(1) << i))
609 fprintf(stderr, "%s", features_string + features_indices[i]);
610 }
611 fprintf(stderr, "\n");
612 fflush(stderr);
613 qFatal("Aborted. Incompatible processor: missing feature 0x%llx -%s.", missing,
614 features_string + features_indices[qCountTrailingZeroBits(missing)]);
615 }
616
617 qt_cpu_features[0].storeRelaxed(f | quint32(QSimdInitialized));
618#ifndef Q_ATOMIC_INT64_IS_SUPPORTED
619 qt_cpu_features[1].storeRelaxed(f >> 32);
620#endif
621 return f;
622}
623
624void qDumpCPUFeatures()
625{
626 quint64 features = qCpuFeatures() & ~quint64(QSimdInitialized);
627 printf("Processor features: ");
628 for (int i = 0; i < features_count; ++i) {
629 if (features & (Q_UINT64_C(1) << i))
630 printf("%s%s", features_string + features_indices[i],
631 minFeature & (Q_UINT64_C(1) << i) ? "[required]" : "");
632 }
633 if ((features = (qCompilerCpuFeatures & ~features))) {
634 printf("\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:");
635 for (int i = 0; i < features_count; ++i) {
636 if (features & (Q_UINT64_C(1) << i))
637 printf("%s", features_string + features_indices[i]);
638 }
639 printf("\n!!! Applications will likely crash with \"Invalid Instruction\"\n!!!!!!!!!!!!!!!!!!!!");
640 }
641 puts("");
642}
643
644#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND)
645
646# ifdef Q_PROCESSOR_X86_64
647# define _rdrandXX_step _rdrand64_step
648# define _rdseedXX_step _rdseed64_step
649# else
650# define _rdrandXX_step _rdrand32_step
651# define _rdseedXX_step _rdseed32_step
652# endif
653
654// The parameter to _rdrand64_step & _rdseed64_step is unsigned long long for
655// Clang and GCC but unsigned __int64 for MSVC and ICC, which is unsigned long
656// long on Windows, but unsigned long on Linux.
657namespace {
658template <typename F> struct ExtractParameter;
659template <typename T> struct ExtractParameter<int (T *)> { using Type = T; };
660using randuint = ExtractParameter<decltype(_rdrandXX_step)>::Type;
661}
662
663# if QT_COMPILER_SUPPORTS_HERE(RDSEED)
664static QT_FUNCTION_TARGET(RDSEED) unsigned *qt_random_rdseed(unsigned *ptr, unsigned *end) noexcept
665{
666 // Unlike for the RDRAND code below, the Intel whitepaper describing the
667 // use of the RDSEED instruction indicates we should not retry in a loop.
668 // If the independent bit generator used by RDSEED is out of entropy, it
669 // may take time to replenish.
670 // https://software.intel.com/en-us/articles/intel-digital-random-number-generator-drng-software-implementation-guide
671 while (ptr + sizeof(randuint) / sizeof(*ptr) <= end) {
672 if (_rdseedXX_step(reinterpret_cast<randuint *>(ptr)) == 0)
673 goto out;
674 ptr += sizeof(randuint) / sizeof(*ptr);
675 }
676
677 if (sizeof(*ptr) != sizeof(randuint) && ptr != end) {
678 if (_rdseed32_step(ptr) == 0)
679 goto out;
680 ++ptr;
681 }
682
683out:
684 return ptr;
685}
686# else
687static unsigned *qt_random_rdseed(unsigned *ptr, unsigned *)
688{
689 return ptr;
690}
691# endif
692
693static QT_FUNCTION_TARGET(RDRND) unsigned *qt_random_rdrnd(unsigned *ptr, unsigned *end) noexcept
694{
695 int retries = 10;
696 while (ptr + sizeof(randuint)/sizeof(*ptr) <= end) {
697 if (_rdrandXX_step(reinterpret_cast<randuint *>(ptr)))
698 ptr += sizeof(randuint)/sizeof(*ptr);
699 else if (--retries == 0)
700 goto out;
701 }
702
703 while (sizeof(*ptr) != sizeof(randuint) && ptr != end) {
704 bool ok = _rdrand32_step(ptr);
705 if (!ok && --retries)
706 continue;
707 if (ok)
708 ++ptr;
709 break;
710 }
711
712out:
713 return ptr;
714}
715
716static QT_FUNCTION_TARGET(RDRND) Q_DECL_COLD_FUNCTION bool checkRdrndWorks() noexcept
717{
718 /*
719 * Some AMD CPUs (e.g. AMD A4-6250J and AMD Ryzen 3000-series) have a
720 * failing random generation instruction, which always returns
721 * 0xffffffff, even when generation was "successful".
722 *
723 * This code checks if hardware random generator generates four consecutive
724 * equal numbers. If it does, then we probably have a failing one and
725 * should disable it completely.
726 *
727 * https://bugreports.qt.io/browse/QTBUG-69423
728 */
729 constexpr qsizetype TestBufferSize = 4;
730 unsigned testBuffer[TestBufferSize] = {};
731
732 unsigned *end = qt_random_rdrnd(testBuffer, testBuffer + TestBufferSize);
733 if (end < testBuffer + 3) {
734 // Random generation didn't produce enough data for us to make a
735 // determination whether it's working or not. Assume it isn't, but
736 // don't print a warning.
737 return false;
738 }
739
740 // Check the results for equality
741 if (testBuffer[0] == testBuffer[1]
742 && testBuffer[0] == testBuffer[2]
743 && (end < testBuffer + TestBufferSize || testBuffer[0] == testBuffer[3])) {
744 fprintf(stderr, "WARNING: CPU random generator seem to be failing, "
745 "disabling hardware random number generation\n"
746 "WARNING: RDRND generated:");
747 for (unsigned *ptr = testBuffer; ptr < end; ++ptr)
748 fprintf(stderr, " 0x%x", *ptr);
749 fprintf(stderr, "\n");
750 return false;
751 }
752
753 // We're good
754 return true;
755}
756
757QT_FUNCTION_TARGET(RDRND) qsizetype qRandomCpu(void *buffer, qsizetype count) noexcept
758{
759 unsigned *ptr = reinterpret_cast<unsigned *>(buffer);
760 unsigned *end = ptr + count;
761
762 if (qCpuHasFeature(RDSEED))
763 ptr = qt_random_rdseed(ptr, end);
764
765 // fill the buffer with RDRND if RDSEED didn't
766 ptr = qt_random_rdrnd(ptr, end);
767 return ptr - reinterpret_cast<unsigned *>(buffer);
768}
769#elif defined(Q_PROCESSOR_X86) && !defined(Q_OS_NACL) && !defined(Q_PROCESSOR_ARM)
770static bool checkRdrndWorks() noexcept { return false; }
771#endif // Q_PROCESSOR_X86 && RDRND
772
773QT_END_NAMESPACE
774