1/*
2 Simple DirectMedia Layer
3 Copyright (C) 1997-2025 Sam Lantinga <slouken@libsdl.org>
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20*/
21
22#include "SDL_internal.h"
23
24#include "SDL_cpuinfo_c.h"
25
26#if defined(SDL_PLATFORM_WINDOWS)
27#include "../core/windows/SDL_windows.h"
28#endif
29
30// CPU feature detection for SDL
31
32#ifdef HAVE_SYSCONF
33#include <unistd.h>
34#endif
35#ifdef HAVE_SYSCTLBYNAME
36#include <sys/types.h>
37#include <sys/sysctl.h>
38#endif
39#if defined(SDL_PLATFORM_MACOS) && (defined(__ppc__) || defined(__ppc64__))
40#include <sys/sysctl.h> // For AltiVec check
41#elif defined(SDL_PLATFORM_OPENBSD) && defined(__powerpc__)
42#include <sys/types.h>
43#include <sys/sysctl.h> // For AltiVec check
44#include <machine/cpu.h>
45#elif defined(SDL_PLATFORM_FREEBSD) && defined(__powerpc__)
46#include <machine/cpu.h>
47#include <sys/auxv.h>
48#elif defined(SDL_ALTIVEC_BLITTERS) && defined(HAVE_SETJMP)
49#include <signal.h>
50#include <setjmp.h>
51#endif
52
53#if (defined(SDL_PLATFORM_LINUX) || defined(SDL_PLATFORM_ANDROID)) && defined(__arm__)
54#include <unistd.h>
55#include <sys/types.h>
56#include <sys/stat.h>
57#include <fcntl.h>
58#include <elf.h>
59
60// #include <asm/hwcap.h>
61#ifndef AT_HWCAP
62#define AT_HWCAP 16
63#endif
64#ifndef AT_PLATFORM
65#define AT_PLATFORM 15
66#endif
67#ifndef HWCAP_NEON
68#define HWCAP_NEON (1 << 12)
69#endif
70#endif
71
72#if defined (SDL_PLATFORM_FREEBSD)
73#include <sys/param.h>
74#endif
75
76#if defined(SDL_PLATFORM_ANDROID) && defined(__arm__) && !defined(HAVE_GETAUXVAL)
77#include <cpu-features.h>
78#endif
79
80#if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO)
81#include <sys/auxv.h>
82#endif
83
84#ifdef SDL_PLATFORM_RISCOS
85#include <kernel.h>
86#include <swis.h>
87#endif
88#ifdef SDL_PLATFORM_3DS
89#include <3ds.h>
90#endif
91#ifdef SDL_PLATFORM_PS2
92#include <kernel.h>
93#endif
94
95#ifdef SDL_PLATFORM_HAIKU
96#include <kernel/OS.h>
97#endif
98
99#define CPU_HAS_ALTIVEC (1 << 0)
100#define CPU_HAS_MMX (1 << 1)
101#define CPU_HAS_SSE (1 << 2)
102#define CPU_HAS_SSE2 (1 << 3)
103#define CPU_HAS_SSE3 (1 << 4)
104#define CPU_HAS_SSE41 (1 << 5)
105#define CPU_HAS_SSE42 (1 << 6)
106#define CPU_HAS_AVX (1 << 7)
107#define CPU_HAS_AVX2 (1 << 8)
108#define CPU_HAS_NEON (1 << 9)
109#define CPU_HAS_AVX512F (1 << 10)
110#define CPU_HAS_ARM_SIMD (1 << 11)
111#define CPU_HAS_LSX (1 << 12)
112#define CPU_HAS_LASX (1 << 13)
113
114#define CPU_CFG2 0x2
115#define CPU_CFG2_LSX (1 << 6)
116#define CPU_CFG2_LASX (1 << 7)
117
118#if defined(SDL_ALTIVEC_BLITTERS) && defined(HAVE_SETJMP) && !defined(SDL_PLATFORM_MACOS) && !defined(SDL_PLATFORM_OPENBSD) && !defined(SDL_PLATFORM_FREEBSD)
119/* This is the brute force way of detecting instruction sets...
120 the idea is borrowed from the libmpeg2 library - thanks!
121 */
122static jmp_buf jmpbuf;
123static void illegal_instruction(int sig)
124{
125 longjmp(jmpbuf, 1);
126}
127#endif // HAVE_SETJMP
128
129static int CPU_haveCPUID(void)
130{
131 int has_CPUID = 0;
132
133/* *INDENT-OFF* */ // clang-format off
134#ifndef SDL_PLATFORM_EMSCRIPTEN
135#if (defined(__GNUC__) || defined(__llvm__)) && defined(__i386__)
136 __asm__ (
137" pushfl # Get original EFLAGS \n"
138" popl %%eax \n"
139" movl %%eax,%%ecx \n"
140" xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n"
141" pushl %%eax # Save new EFLAGS value on stack \n"
142" popfl # Replace current EFLAGS value \n"
143" pushfl # Get new EFLAGS \n"
144" popl %%eax # Store new EFLAGS in EAX \n"
145" xorl %%ecx,%%eax # Can not toggle ID bit, \n"
146" jz 1f # Processor=80486 \n"
147" movl $1,%0 # We have CPUID support \n"
148"1: \n"
149 : "=m" (has_CPUID)
150 :
151 : "%eax", "%ecx"
152 );
153#elif (defined(__GNUC__) || defined(__llvm__)) && defined(__x86_64__)
154/* Technically, if this is being compiled under __x86_64__ then it has
155 CPUid by definition. But it's nice to be able to prove it. :) */
156 __asm__ (
157" pushfq # Get original EFLAGS \n"
158" popq %%rax \n"
159" movq %%rax,%%rcx \n"
160" xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n"
161" pushq %%rax # Save new EFLAGS value on stack \n"
162" popfq # Replace current EFLAGS value \n"
163" pushfq # Get new EFLAGS \n"
164" popq %%rax # Store new EFLAGS in EAX \n"
165" xorl %%ecx,%%eax # Can not toggle ID bit, \n"
166" jz 1f # Processor=80486 \n"
167" movl $1,%0 # We have CPUID support \n"
168"1: \n"
169 : "=m" (has_CPUID)
170 :
171 : "%rax", "%rcx"
172 );
173#elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
174 __asm {
175 pushfd ; Get original EFLAGS
176 pop eax
177 mov ecx, eax
178 xor eax, 200000h ; Flip ID bit in EFLAGS
179 push eax ; Save new EFLAGS value on stack
180 popfd ; Replace current EFLAGS value
181 pushfd ; Get new EFLAGS
182 pop eax ; Store new EFLAGS in EAX
183 xor eax, ecx ; Can not toggle ID bit,
184 jz done ; Processor=80486
185 mov has_CPUID,1 ; We have CPUID support
186done:
187 }
188#elif defined(_MSC_VER) && defined(_M_X64)
189 has_CPUID = 1;
190#elif defined(__sun) && defined(__i386)
191 __asm (
192" pushfl \n"
193" popl %eax \n"
194" movl %eax,%ecx \n"
195" xorl $0x200000,%eax \n"
196" pushl %eax \n"
197" popfl \n"
198" pushfl \n"
199" popl %eax \n"
200" xorl %ecx,%eax \n"
201" jz 1f \n"
202" movl $1,-8(%ebp) \n"
203"1: \n"
204 );
205#elif defined(__sun) && defined(__amd64)
206 __asm (
207" pushfq \n"
208" popq %rax \n"
209" movq %rax,%rcx \n"
210" xorl $0x200000,%eax \n"
211" pushq %rax \n"
212" popfq \n"
213" pushfq \n"
214" popq %rax \n"
215" xorl %ecx,%eax \n"
216" jz 1f \n"
217" movl $1,-8(%rbp) \n"
218"1: \n"
219 );
220#endif
221#endif // !SDL_PLATFORM_EMSCRIPTEN
222/* *INDENT-ON* */ // clang-format on
223 return has_CPUID;
224}
225
226#if (defined(__GNUC__) || defined(__llvm__)) && defined(__i386__)
227#define cpuid(func, a, b, c, d) \
228 __asm__ __volatile__( \
229 " pushl %%ebx \n" \
230 " xorl %%ecx,%%ecx \n" \
231 " cpuid \n" \
232 " movl %%ebx, %%esi \n" \
233 " popl %%ebx \n" \
234 : "=a"(a), "=S"(b), "=c"(c), "=d"(d) \
235 : "a"(func))
236#elif (defined(__GNUC__) || defined(__llvm__)) && defined(__x86_64__)
237#define cpuid(func, a, b, c, d) \
238 __asm__ __volatile__( \
239 " pushq %%rbx \n" \
240 " xorq %%rcx,%%rcx \n" \
241 " cpuid \n" \
242 " movq %%rbx, %%rsi \n" \
243 " popq %%rbx \n" \
244 : "=a"(a), "=S"(b), "=c"(c), "=d"(d) \
245 : "a"(func))
246#elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
247#define cpuid(func, a, b, c, d) \
248 __asm { \
249 __asm mov eax, func \
250 __asm xor ecx, ecx \
251 __asm cpuid \
252 __asm mov a, eax \
253 __asm mov b, ebx \
254 __asm mov c, ecx \
255 __asm mov d, edx \
256 }
257#elif (defined(_MSC_VER) && defined(_M_X64))
258// Use __cpuidex instead of __cpuid because ICL does not clear ecx register
259#define cpuid(func, a, b, c, d) \
260 { \
261 int CPUInfo[4]; \
262 __cpuidex(CPUInfo, func, 0); \
263 a = CPUInfo[0]; \
264 b = CPUInfo[1]; \
265 c = CPUInfo[2]; \
266 d = CPUInfo[3]; \
267 }
268#else
269#define cpuid(func, a, b, c, d) \
270 do { \
271 a = b = c = d = 0; \
272 (void)a; \
273 (void)b; \
274 (void)c; \
275 (void)d; \
276 } while (0)
277#endif
278
279static int CPU_CPUIDFeatures[4];
280static int CPU_CPUIDMaxFunction = 0;
281static bool CPU_OSSavesYMM = false;
282static bool CPU_OSSavesZMM = false;
283
284static void CPU_calcCPUIDFeatures(void)
285{
286 static bool checked = false;
287 if (!checked) {
288 checked = true;
289 if (CPU_haveCPUID()) {
290 int a, b, c, d;
291 cpuid(0, a, b, c, d);
292 CPU_CPUIDMaxFunction = a;
293 if (CPU_CPUIDMaxFunction >= 1) {
294 cpuid(1, a, b, c, d);
295 CPU_CPUIDFeatures[0] = a;
296 CPU_CPUIDFeatures[1] = b;
297 CPU_CPUIDFeatures[2] = c;
298 CPU_CPUIDFeatures[3] = d;
299
300 // Check to make sure we can call xgetbv
301 if (c & 0x08000000) {
302 // Call xgetbv to see if YMM (etc) register state is saved
303#if (defined(__GNUC__) || defined(__llvm__)) && (defined(__i386__) || defined(__x86_64__))
304 __asm__(".byte 0x0f, 0x01, 0xd0"
305 : "=a"(a)
306 : "c"(0)
307 : "%edx");
308#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) // VS2010 SP1
309 a = (int)_xgetbv(0);
310#elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
311 __asm
312 {
313 xor ecx, ecx
314 _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
315 mov a, eax
316 }
317#endif
318 CPU_OSSavesYMM = ((a & 6) == 6) ? true : false;
319 CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? true : false;
320 }
321 }
322 }
323 }
324}
325
326static int CPU_haveAltiVec(void)
327{
328 volatile int altivec = 0;
329#ifndef SDL_CPUINFO_DISABLED
330#if (defined(SDL_PLATFORM_MACOS) && (defined(__ppc__) || defined(__ppc64__))) || (defined(SDL_PLATFORM_OPENBSD) && defined(__powerpc__))
331#ifdef SDL_PLATFORM_OPENBSD
332 int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
333#else
334 int selectors[2] = { CTL_HW, HW_VECTORUNIT };
335#endif
336 int hasVectorUnit = 0;
337 size_t length = sizeof(hasVectorUnit);
338 int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
339 if (0 == error) {
340 altivec = (hasVectorUnit != 0);
341 }
342#elif defined(SDL_PLATFORM_FREEBSD) && defined(__powerpc__)
343 unsigned long cpufeatures = 0;
344 elf_aux_info(AT_HWCAP, &cpufeatures, sizeof(cpufeatures));
345 altivec = cpufeatures & PPC_FEATURE_HAS_ALTIVEC;
346 return altivec;
347#elif defined(SDL_ALTIVEC_BLITTERS) && defined(HAVE_SETJMP)
348 void (*handler)(int sig);
349 handler = signal(SIGILL, illegal_instruction);
350 if (setjmp(jmpbuf) == 0) {
351 asm volatile("mtspr 256, %0\n\t"
352 "vand %%v0, %%v0, %%v0" ::"r"(-1));
353 altivec = 1;
354 }
355 signal(SIGILL, handler);
356#endif
357#endif
358 return altivec;
359}
360
361#if (defined(__ARM_ARCH) && (__ARM_ARCH >= 6)) || defined(__aarch64__)
362static int CPU_haveARMSIMD(void)
363{
364 return 1;
365}
366
367#elif !defined(__arm__)
368static int CPU_haveARMSIMD(void)
369{
370 return 0;
371}
372
373#elif defined(SDL_PLATFORM_LINUX)
374static int CPU_haveARMSIMD(void)
375{
376 int arm_simd = 0;
377 int fd;
378
379 fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC);
380 if (fd >= 0) {
381 Elf32_auxv_t aux;
382 while (read(fd, &aux, sizeof(aux)) == sizeof(aux)) {
383 if (aux.a_type == AT_PLATFORM) {
384 const char *plat = (const char *)aux.a_un.a_val;
385 if (plat) {
386 arm_simd = SDL_strncmp(plat, "v6l", 3) == 0 ||
387 SDL_strncmp(plat, "v7l", 3) == 0;
388 }
389 }
390 }
391 close(fd);
392 }
393 return arm_simd;
394}
395
396#elif defined(SDL_PLATFORM_RISCOS)
397static int CPU_haveARMSIMD(void)
398{
399 _kernel_swi_regs regs;
400 regs.r[0] = 0;
401 if (_kernel_swi(OS_PlatformFeatures, &regs, &regs) != NULL) {
402 return 0;
403 }
404
405 if (!(regs.r[0] & (1 << 31))) {
406 return 0;
407 }
408
409 regs.r[0] = 34;
410 regs.r[1] = 29;
411 if (_kernel_swi(OS_PlatformFeatures, &regs, &regs) != NULL) {
412 return 0;
413 }
414
415 return regs.r[0];
416}
417
418#else
419static int CPU_haveARMSIMD(void)
420{
421#warning SDL_HasARMSIMD is not implemented for this ARM platform. Write me.
422 return 0;
423}
424#endif
425
426#if defined(SDL_PLATFORM_LINUX) && defined(__arm__) && !defined(HAVE_GETAUXVAL)
427static int readProcAuxvForNeon(void)
428{
429 int neon = 0;
430 int fd;
431
432 fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC);
433 if (fd >= 0) {
434 Elf32_auxv_t aux;
435 while (read(fd, &aux, sizeof(aux)) == sizeof(aux)) {
436 if (aux.a_type == AT_HWCAP) {
437 neon = (aux.a_un.a_val & HWCAP_NEON) == HWCAP_NEON;
438 break;
439 }
440 }
441 close(fd);
442 }
443 return neon;
444}
445#endif
446
447static int CPU_haveNEON(void)
448{
449/* The way you detect NEON is a privileged instruction on ARM, so you have
450 query the OS kernel in a platform-specific way. :/ */
451#if defined(SDL_PLATFORM_WINDOWS) && (defined(_M_ARM) || defined(_M_ARM64))
452// Visual Studio, for ARM, doesn't define __ARM_ARCH. Handle this first.
453// Seems to have been removed
454#ifndef PF_ARM_NEON_INSTRUCTIONS_AVAILABLE
455#define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19
456#endif
457 // All WinRT ARM devices are required to support NEON, but just in case.
458 return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != 0;
459#elif (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || defined(__aarch64__)
460 return 1; // ARMv8 always has non-optional NEON support.
461#elif defined(SDL_PLATFORM_VITA)
462 return 1;
463#elif defined(SDL_PLATFORM_3DS)
464 return 0;
465#elif defined(SDL_PLATFORM_APPLE) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7)
466 // (note that sysctlbyname("hw.optional.neon") doesn't work!)
467 return 1; // all Apple ARMv7 chips and later have NEON.
468#elif defined(SDL_PLATFORM_APPLE)
469 return 0; // assume anything else from Apple doesn't have NEON.
470#elif !defined(__arm__)
471 return 0; // not an ARM CPU at all.
472#elif defined(SDL_PLATFORM_OPENBSD)
473 return 1; // OpenBSD only supports ARMv7 CPUs that have NEON.
474#elif defined(HAVE_ELF_AUX_INFO)
475 unsigned long hasneon = 0;
476 if (elf_aux_info(AT_HWCAP, (void *)&hasneon, (int)sizeof(hasneon)) != 0) {
477 return 0;
478 }
479 return (hasneon & HWCAP_NEON) == HWCAP_NEON;
480#elif (defined(SDL_PLATFORM_LINUX) || defined(SDL_PLATFORM_ANDROID)) && defined(HAVE_GETAUXVAL)
481 return (getauxval(AT_HWCAP) & HWCAP_NEON) == HWCAP_NEON;
482#elif defined(SDL_PLATFORM_LINUX)
483 return readProcAuxvForNeon();
484#elif defined(SDL_PLATFORM_ANDROID)
485 // Use NDK cpufeatures to read either /proc/self/auxv or /proc/cpuinfo
486 {
487 AndroidCpuFamily cpu_family = android_getCpuFamily();
488 if (cpu_family == ANDROID_CPU_FAMILY_ARM) {
489 uint64_t cpu_features = android_getCpuFeatures();
490 if (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) {
491 return 1;
492 }
493 }
494 return 0;
495 }
496#elif defined(SDL_PLATFORM_RISCOS)
497 // Use the VFPSupport_Features SWI to access the MVFR registers
498 {
499 _kernel_swi_regs regs;
500 regs.r[0] = 0;
501 if (_kernel_swi(VFPSupport_Features, &regs, &regs) == NULL) {
502 if ((regs.r[2] & 0xFFF000) == 0x111000) {
503 return 1;
504 }
505 }
506 return 0;
507 }
508#elif defined(SDL_PLATFORM_EMSCRIPTEN)
509 return 0;
510#else
511#warning SDL_HasNEON is not implemented for this ARM platform. Write me.
512 return 0;
513#endif
514}
515
516static int CPU_readCPUCFG(void)
517{
518 uint32_t cfg2 = 0;
519#if defined __loongarch__
520 __asm__ volatile(
521 "cpucfg %0, %1 \n\t"
522 : "+&r"(cfg2)
523 : "r"(CPU_CFG2));
524#endif
525 return cfg2;
526}
527
528#define CPU_haveLSX() (CPU_readCPUCFG() & CPU_CFG2_LSX)
529#define CPU_haveLASX() (CPU_readCPUCFG() & CPU_CFG2_LASX)
530
531#ifdef __e2k__
532#ifdef __MMX__
533#define CPU_haveMMX() (1)
534#else
535#define CPU_haveMMX() (0)
536#endif
537#ifdef __SSE__
538#define CPU_haveSSE() (1)
539#else
540#define CPU_haveSSE() (0)
541#endif
542#ifdef __SSE2__
543#define CPU_haveSSE2() (1)
544#else
545#define CPU_haveSSE2() (0)
546#endif
547#ifdef __SSE3__
548#define CPU_haveSSE3() (1)
549#else
550#define CPU_haveSSE3() (0)
551#endif
552#ifdef __SSE4_1__
553#define CPU_haveSSE41() (1)
554#else
555#define CPU_haveSSE41() (0)
556#endif
557#ifdef __SSE4_2__
558#define CPU_haveSSE42() (1)
559#else
560#define CPU_haveSSE42() (0)
561#endif
562#ifdef __AVX__
563#define CPU_haveAVX() (1)
564#else
565#define CPU_haveAVX() (0)
566#endif
567#else
568#define CPU_haveMMX() (CPU_CPUIDFeatures[3] & 0x00800000)
569#define CPU_haveSSE() (CPU_CPUIDFeatures[3] & 0x02000000)
570#define CPU_haveSSE2() (CPU_CPUIDFeatures[3] & 0x04000000)
571#define CPU_haveSSE3() (CPU_CPUIDFeatures[2] & 0x00000001)
572#define CPU_haveSSE41() (CPU_CPUIDFeatures[2] & 0x00080000)
573#define CPU_haveSSE42() (CPU_CPUIDFeatures[2] & 0x00100000)
574#define CPU_haveAVX() (CPU_OSSavesYMM && (CPU_CPUIDFeatures[2] & 0x10000000))
575#endif
576
577#ifdef __e2k__
578inline int
579CPU_haveAVX2(void)
580{
581#ifdef __AVX2__
582 return 1;
583#else
584 return 0;
585#endif
586}
587#else
588static int CPU_haveAVX2(void)
589{
590 if (CPU_OSSavesYMM && (CPU_CPUIDMaxFunction >= 7)) {
591 int a, b, c, d;
592 (void)a;
593 (void)b;
594 (void)c;
595 (void)d; // compiler warnings...
596 cpuid(7, a, b, c, d);
597 return b & 0x00000020;
598 }
599 return 0;
600}
601#endif
602
603#ifdef __e2k__
604inline int
605CPU_haveAVX512F(void)
606{
607 return 0;
608}
609#else
610static int CPU_haveAVX512F(void)
611{
612 if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) {
613 int a, b, c, d;
614 (void)a;
615 (void)b;
616 (void)c;
617 (void)d; // compiler warnings...
618 cpuid(7, a, b, c, d);
619 return b & 0x00010000;
620 }
621 return 0;
622}
623#endif
624
625static int SDL_NumLogicalCPUCores = 0;
626
627int SDL_GetNumLogicalCPUCores(void)
628{
629 if (!SDL_NumLogicalCPUCores) {
630#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
631 if (SDL_NumLogicalCPUCores <= 0) {
632 SDL_NumLogicalCPUCores = (int)sysconf(_SC_NPROCESSORS_ONLN);
633 }
634#endif
635#ifdef HAVE_SYSCTLBYNAME
636 if (SDL_NumLogicalCPUCores <= 0) {
637 size_t size = sizeof(SDL_NumLogicalCPUCores);
638 sysctlbyname("hw.ncpu", &SDL_NumLogicalCPUCores, &size, NULL, 0);
639 }
640#endif
641#if defined(SDL_PLATFORM_WINDOWS)
642 if (SDL_NumLogicalCPUCores <= 0) {
643 SYSTEM_INFO info;
644 GetSystemInfo(&info);
645 SDL_NumLogicalCPUCores = info.dwNumberOfProcessors;
646 }
647#endif
648#ifdef SDL_PLATFORM_3DS
649 if (SDL_NumLogicalCPUCores <= 0) {
650 bool isNew3DS = false;
651 APT_CheckNew3DS(&isNew3DS);
652 // 1 core is always dedicated to the OS
653 // Meaning that the New3DS has 3 available core, and the Old3DS only one.
654 SDL_NumLogicalCPUCores = isNew3DS ? 4 : 2;
655 }
656#endif
657 // There has to be at least 1, right? :)
658 if (SDL_NumLogicalCPUCores <= 0) {
659 SDL_NumLogicalCPUCores = 1;
660 }
661 }
662 return SDL_NumLogicalCPUCores;
663}
664
665#ifdef __e2k__
666inline const char *
667SDL_GetCPUType(void)
668{
669 static char SDL_CPUType[13];
670
671 SDL_strlcpy(SDL_CPUType, "E2K MACHINE", sizeof(SDL_CPUType));
672
673 return SDL_CPUType;
674}
675#else
676// Oh, such a sweet sweet trick, just not very useful. :)
677static const char *SDL_GetCPUType(void)
678{
679 static char SDL_CPUType[13];
680
681 if (!SDL_CPUType[0]) {
682 int i = 0;
683
684 CPU_calcCPUIDFeatures();
685 if (CPU_CPUIDMaxFunction > 0) { // do we have CPUID at all?
686 int a, b, c, d;
687 cpuid(0x00000000, a, b, c, d);
688 (void)a;
689 SDL_CPUType[i++] = (char)(b & 0xff);
690 b >>= 8;
691 SDL_CPUType[i++] = (char)(b & 0xff);
692 b >>= 8;
693 SDL_CPUType[i++] = (char)(b & 0xff);
694 b >>= 8;
695 SDL_CPUType[i++] = (char)(b & 0xff);
696
697 SDL_CPUType[i++] = (char)(d & 0xff);
698 d >>= 8;
699 SDL_CPUType[i++] = (char)(d & 0xff);
700 d >>= 8;
701 SDL_CPUType[i++] = (char)(d & 0xff);
702 d >>= 8;
703 SDL_CPUType[i++] = (char)(d & 0xff);
704
705 SDL_CPUType[i++] = (char)(c & 0xff);
706 c >>= 8;
707 SDL_CPUType[i++] = (char)(c & 0xff);
708 c >>= 8;
709 SDL_CPUType[i++] = (char)(c & 0xff);
710 c >>= 8;
711 SDL_CPUType[i++] = (char)(c & 0xff);
712 }
713 if (!SDL_CPUType[0]) {
714 SDL_strlcpy(SDL_CPUType, "Unknown", sizeof(SDL_CPUType));
715 }
716 }
717 return SDL_CPUType;
718}
719#endif
720
721#if 0
722!!! FIXME: Not used at the moment. */
723#ifdef __e2k__
724inline const char *
725SDL_GetCPUName(void)
726{
727 static char SDL_CPUName[48];
728
729 SDL_strlcpy(SDL_CPUName, __builtin_cpu_name(), sizeof(SDL_CPUName));
730
731 return SDL_CPUName;
732}
733#else
734static const char *SDL_GetCPUName(void)
735{
736 static char SDL_CPUName[48];
737
738 if (!SDL_CPUName[0]) {
739 int i = 0;
740 int a, b, c, d;
741
742 CPU_calcCPUIDFeatures();
743 if (CPU_CPUIDMaxFunction > 0) { // do we have CPUID at all?
744 cpuid(0x80000000, a, b, c, d);
745 if (a >= 0x80000004) {
746 cpuid(0x80000002, a, b, c, d);
747 SDL_CPUName[i++] = (char)(a & 0xff);
748 a >>= 8;
749 SDL_CPUName[i++] = (char)(a & 0xff);
750 a >>= 8;
751 SDL_CPUName[i++] = (char)(a & 0xff);
752 a >>= 8;
753 SDL_CPUName[i++] = (char)(a & 0xff);
754 a >>= 8;
755 SDL_CPUName[i++] = (char)(b & 0xff);
756 b >>= 8;
757 SDL_CPUName[i++] = (char)(b & 0xff);
758 b >>= 8;
759 SDL_CPUName[i++] = (char)(b & 0xff);
760 b >>= 8;
761 SDL_CPUName[i++] = (char)(b & 0xff);
762 b >>= 8;
763 SDL_CPUName[i++] = (char)(c & 0xff);
764 c >>= 8;
765 SDL_CPUName[i++] = (char)(c & 0xff);
766 c >>= 8;
767 SDL_CPUName[i++] = (char)(c & 0xff);
768 c >>= 8;
769 SDL_CPUName[i++] = (char)(c & 0xff);
770 c >>= 8;
771 SDL_CPUName[i++] = (char)(d & 0xff);
772 d >>= 8;
773 SDL_CPUName[i++] = (char)(d & 0xff);
774 d >>= 8;
775 SDL_CPUName[i++] = (char)(d & 0xff);
776 d >>= 8;
777 SDL_CPUName[i++] = (char)(d & 0xff);
778 d >>= 8;
779 cpuid(0x80000003, a, b, c, d);
780 SDL_CPUName[i++] = (char)(a & 0xff);
781 a >>= 8;
782 SDL_CPUName[i++] = (char)(a & 0xff);
783 a >>= 8;
784 SDL_CPUName[i++] = (char)(a & 0xff);
785 a >>= 8;
786 SDL_CPUName[i++] = (char)(a & 0xff);
787 a >>= 8;
788 SDL_CPUName[i++] = (char)(b & 0xff);
789 b >>= 8;
790 SDL_CPUName[i++] = (char)(b & 0xff);
791 b >>= 8;
792 SDL_CPUName[i++] = (char)(b & 0xff);
793 b >>= 8;
794 SDL_CPUName[i++] = (char)(b & 0xff);
795 b >>= 8;
796 SDL_CPUName[i++] = (char)(c & 0xff);
797 c >>= 8;
798 SDL_CPUName[i++] = (char)(c & 0xff);
799 c >>= 8;
800 SDL_CPUName[i++] = (char)(c & 0xff);
801 c >>= 8;
802 SDL_CPUName[i++] = (char)(c & 0xff);
803 c >>= 8;
804 SDL_CPUName[i++] = (char)(d & 0xff);
805 d >>= 8;
806 SDL_CPUName[i++] = (char)(d & 0xff);
807 d >>= 8;
808 SDL_CPUName[i++] = (char)(d & 0xff);
809 d >>= 8;
810 SDL_CPUName[i++] = (char)(d & 0xff);
811 d >>= 8;
812 cpuid(0x80000004, a, b, c, d);
813 SDL_CPUName[i++] = (char)(a & 0xff);
814 a >>= 8;
815 SDL_CPUName[i++] = (char)(a & 0xff);
816 a >>= 8;
817 SDL_CPUName[i++] = (char)(a & 0xff);
818 a >>= 8;
819 SDL_CPUName[i++] = (char)(a & 0xff);
820 a >>= 8;
821 SDL_CPUName[i++] = (char)(b & 0xff);
822 b >>= 8;
823 SDL_CPUName[i++] = (char)(b & 0xff);
824 b >>= 8;
825 SDL_CPUName[i++] = (char)(b & 0xff);
826 b >>= 8;
827 SDL_CPUName[i++] = (char)(b & 0xff);
828 b >>= 8;
829 SDL_CPUName[i++] = (char)(c & 0xff);
830 c >>= 8;
831 SDL_CPUName[i++] = (char)(c & 0xff);
832 c >>= 8;
833 SDL_CPUName[i++] = (char)(c & 0xff);
834 c >>= 8;
835 SDL_CPUName[i++] = (char)(c & 0xff);
836 c >>= 8;
837 SDL_CPUName[i++] = (char)(d & 0xff);
838 d >>= 8;
839 SDL_CPUName[i++] = (char)(d & 0xff);
840 d >>= 8;
841 SDL_CPUName[i++] = (char)(d & 0xff);
842 d >>= 8;
843 SDL_CPUName[i++] = (char)(d & 0xff);
844 d >>= 8;
845 }
846 }
847 if (!SDL_CPUName[0]) {
848 SDL_strlcpy(SDL_CPUName, "Unknown", sizeof(SDL_CPUName));
849 }
850 }
851 return SDL_CPUName;
852}
853#endif
854#endif
855
856int SDL_GetCPUCacheLineSize(void)
857{
858 const char *cpuType = SDL_GetCPUType();
859 int cacheline_size = SDL_CACHELINE_SIZE; // initial guess
860 int a, b, c, d;
861 (void)a;
862 (void)b;
863 (void)c;
864 (void)d;
865 if (SDL_strcmp(cpuType, "GenuineIntel") == 0 || SDL_strcmp(cpuType, "CentaurHauls") == 0 || SDL_strcmp(cpuType, " Shanghai ") == 0) {
866 cpuid(0x00000001, a, b, c, d);
867 cacheline_size = ((b >> 8) & 0xff) * 8;
868 } else if (SDL_strcmp(cpuType, "AuthenticAMD") == 0 || SDL_strcmp(cpuType, "HygonGenuine") == 0) {
869 cpuid(0x80000005, a, b, c, d);
870 cacheline_size = c & 0xff;
871 } else {
872#if defined(HAVE_SYSCONF) && defined(_SC_LEVEL1_DCACHE_LINESIZE)
873 if ((cacheline_size = (int)sysconf(_SC_LEVEL1_DCACHE_LINESIZE)) > 0) {
874 return cacheline_size;
875 } else {
876 cacheline_size = SDL_CACHELINE_SIZE;
877 }
878#endif
879#if defined(SDL_PLATFORM_LINUX)
880 {
881 FILE *f = fopen("/sys/devices/system/cpu/cpu0/cache/index0/coherency_line_size", "r");
882 if (f) {
883 int size;
884 if (fscanf(f, "%d", &size) == 1) {
885 cacheline_size = size;
886 }
887 fclose(f);
888 }
889 }
890#elif defined(__FREEBSD__) && defined(CACHE_LINE_SIZE)
891 cacheline_size = CACHE_LINE_SIZE;
892#endif
893 }
894 return cacheline_size;
895}
896
897#define SDL_CPUFEATURES_RESET_VALUE 0xFFFFFFFF
898
899static Uint32 SDL_CPUFeatures = SDL_CPUFEATURES_RESET_VALUE;
900static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF;
901
902static bool ref_string_equals(const char *ref, const char *test, const char *end_test) {
903 size_t len_test = end_test - test;
904 return SDL_strncmp(ref, test, len_test) == 0 && ref[len_test] == '\0' && (test[len_test] == '\0' || test[len_test] == ',');
905}
906
907static Uint32 SDLCALL SDL_CPUFeatureMaskFromHint(void)
908{
909 Uint32 result_mask = SDL_CPUFEATURES_RESET_VALUE;
910
911 const char *hint = SDL_GetHint(SDL_HINT_CPU_FEATURE_MASK);
912
913 if (hint) {
914 for (const char *spot = hint, *next; *spot; spot = next) {
915 const char *end = SDL_strchr(spot, ',');
916 Uint32 spot_mask;
917 bool add_spot_mask = true;
918 if (end) {
919 next = end + 1;
920 } else {
921 size_t len = SDL_strlen(spot);
922 end = spot + len;
923 next = end;
924 }
925 if (spot[0] == '+') {
926 add_spot_mask = true;
927 spot += 1;
928 } else if (spot[0] == '-') {
929 add_spot_mask = false;
930 spot += 1;
931 }
932 if (ref_string_equals("all", spot, end)) {
933 spot_mask = SDL_CPUFEATURES_RESET_VALUE;
934 } else if (ref_string_equals("altivec", spot, end)) {
935 spot_mask= CPU_HAS_ALTIVEC;
936 } else if (ref_string_equals("mmx", spot, end)) {
937 spot_mask = CPU_HAS_MMX;
938 } else if (ref_string_equals("sse", spot, end)) {
939 spot_mask = CPU_HAS_SSE;
940 } else if (ref_string_equals("sse2", spot, end)) {
941 spot_mask = CPU_HAS_SSE2;
942 } else if (ref_string_equals("sse3", spot, end)) {
943 spot_mask = CPU_HAS_SSE3;
944 } else if (ref_string_equals("sse41", spot, end)) {
945 spot_mask = CPU_HAS_SSE41;
946 } else if (ref_string_equals("sse42", spot, end)) {
947 spot_mask = CPU_HAS_SSE42;
948 } else if (ref_string_equals("avx", spot, end)) {
949 spot_mask = CPU_HAS_AVX;
950 } else if (ref_string_equals("avx2", spot, end)) {
951 spot_mask = CPU_HAS_AVX2;
952 } else if (ref_string_equals("avx512f", spot, end)) {
953 spot_mask = CPU_HAS_AVX512F;
954 } else if (ref_string_equals("arm-simd", spot, end)) {
955 spot_mask = CPU_HAS_ARM_SIMD;
956 } else if (ref_string_equals("neon", spot, end)) {
957 spot_mask = CPU_HAS_NEON;
958 } else if (ref_string_equals("lsx", spot, end)) {
959 spot_mask = CPU_HAS_LSX;
960 } else if (ref_string_equals("lasx", spot, end)) {
961 spot_mask = CPU_HAS_LASX;
962 } else {
963 // Ignore unknown/incorrect cpu feature(s)
964 continue;
965 }
966 if (add_spot_mask) {
967 result_mask |= spot_mask;
968 } else {
969 result_mask &= ~spot_mask;
970 }
971 }
972 }
973 return result_mask;
974}
975
976static Uint32 SDL_GetCPUFeatures(void)
977{
978 if (SDL_CPUFeatures == SDL_CPUFEATURES_RESET_VALUE) {
979 CPU_calcCPUIDFeatures();
980 SDL_CPUFeatures = 0;
981 SDL_SIMDAlignment = sizeof(void *); // a good safe base value
982 if (CPU_haveAltiVec()) {
983 SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
984 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
985 }
986 if (CPU_haveMMX()) {
987 SDL_CPUFeatures |= CPU_HAS_MMX;
988 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
989 }
990 if (CPU_haveSSE()) {
991 SDL_CPUFeatures |= CPU_HAS_SSE;
992 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
993 }
994 if (CPU_haveSSE2()) {
995 SDL_CPUFeatures |= CPU_HAS_SSE2;
996 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
997 }
998 if (CPU_haveSSE3()) {
999 SDL_CPUFeatures |= CPU_HAS_SSE3;
1000 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
1001 }
1002 if (CPU_haveSSE41()) {
1003 SDL_CPUFeatures |= CPU_HAS_SSE41;
1004 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
1005 }
1006 if (CPU_haveSSE42()) {
1007 SDL_CPUFeatures |= CPU_HAS_SSE42;
1008 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
1009 }
1010 if (CPU_haveAVX()) {
1011 SDL_CPUFeatures |= CPU_HAS_AVX;
1012 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
1013 }
1014 if (CPU_haveAVX2()) {
1015 SDL_CPUFeatures |= CPU_HAS_AVX2;
1016 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
1017 }
1018 if (CPU_haveAVX512F()) {
1019 SDL_CPUFeatures |= CPU_HAS_AVX512F;
1020 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64);
1021 }
1022 if (CPU_haveARMSIMD()) {
1023 SDL_CPUFeatures |= CPU_HAS_ARM_SIMD;
1024 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
1025 }
1026 if (CPU_haveNEON()) {
1027 SDL_CPUFeatures |= CPU_HAS_NEON;
1028 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
1029 }
1030 if (CPU_haveLSX()) {
1031 SDL_CPUFeatures |= CPU_HAS_LSX;
1032 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
1033 }
1034 if (CPU_haveLASX()) {
1035 SDL_CPUFeatures |= CPU_HAS_LASX;
1036 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
1037 }
1038 SDL_CPUFeatures &= SDL_CPUFeatureMaskFromHint();
1039 }
1040 return SDL_CPUFeatures;
1041}
1042
1043void SDL_QuitCPUInfo(void) {
1044 SDL_CPUFeatures = SDL_CPUFEATURES_RESET_VALUE;
1045}
1046
1047#define CPU_FEATURE_AVAILABLE(f) ((SDL_GetCPUFeatures() & (f)) ? true : false)
1048
1049bool SDL_HasAltiVec(void)
1050{
1051 return CPU_FEATURE_AVAILABLE(CPU_HAS_ALTIVEC);
1052}
1053
1054bool SDL_HasMMX(void)
1055{
1056 return CPU_FEATURE_AVAILABLE(CPU_HAS_MMX);
1057}
1058
1059bool SDL_HasSSE(void)
1060{
1061 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE);
1062}
1063
1064bool SDL_HasSSE2(void)
1065{
1066 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE2);
1067}
1068
1069bool SDL_HasSSE3(void)
1070{
1071 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE3);
1072}
1073
1074bool SDL_HasSSE41(void)
1075{
1076 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE41);
1077}
1078
1079bool SDL_HasSSE42(void)
1080{
1081 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE42);
1082}
1083
1084bool SDL_HasAVX(void)
1085{
1086 return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX);
1087}
1088
1089bool SDL_HasAVX2(void)
1090{
1091 return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX2);
1092}
1093
1094bool SDL_HasAVX512F(void)
1095{
1096 return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F);
1097}
1098
1099bool SDL_HasARMSIMD(void)
1100{
1101 return CPU_FEATURE_AVAILABLE(CPU_HAS_ARM_SIMD);
1102}
1103
1104bool SDL_HasNEON(void)
1105{
1106 return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON);
1107}
1108
1109bool SDL_HasLSX(void)
1110{
1111 return CPU_FEATURE_AVAILABLE(CPU_HAS_LSX);
1112}
1113
1114bool SDL_HasLASX(void)
1115{
1116 return CPU_FEATURE_AVAILABLE(CPU_HAS_LASX);
1117}
1118
1119static int SDL_SystemRAM = 0;
1120
1121int SDL_GetSystemRAM(void)
1122{
1123 if (!SDL_SystemRAM) {
1124#if defined(HAVE_SYSCONF) && defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
1125 if (SDL_SystemRAM <= 0) {
1126 SDL_SystemRAM = (int)((Sint64)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE) / (1024 * 1024));
1127 }
1128#endif
1129#ifdef HAVE_SYSCTLBYNAME
1130 if (SDL_SystemRAM <= 0) {
1131#ifdef HW_PHYSMEM64
1132 // (64-bit): NetBSD since 2003, OpenBSD
1133 int mib[2] = { CTL_HW, HW_PHYSMEM64 };
1134#elif defined(HW_REALMEM)
1135 // (64-bit): FreeBSD since 2005, DragonFly
1136 int mib[2] = { CTL_HW, HW_REALMEM };
1137#elif defined(HW_MEMSIZE)
1138 // (64-bit): Darwin
1139 int mib[2] = { CTL_HW, HW_MEMSIZE };
1140#else
1141 // (32-bit): very old BSD, might only report up to 2 GiB
1142 int mib[2] = { CTL_HW, HW_PHYSMEM };
1143#endif // HW_PHYSMEM64
1144 Uint64 memsize = 0;
1145 size_t len = sizeof(memsize);
1146
1147 if (sysctl(mib, 2, &memsize, &len, NULL, 0) == 0) {
1148 SDL_SystemRAM = (int)(memsize / (1024 * 1024));
1149 }
1150 }
1151#endif
1152#if defined(SDL_PLATFORM_WINDOWS)
1153 if (SDL_SystemRAM <= 0) {
1154 MEMORYSTATUSEX stat;
1155 stat.dwLength = sizeof(stat);
1156 if (GlobalMemoryStatusEx(&stat)) {
1157 SDL_SystemRAM = (int)(stat.ullTotalPhys / (1024 * 1024));
1158 }
1159 }
1160#endif
1161#ifdef SDL_PLATFORM_RISCOS
1162 if (SDL_SystemRAM <= 0) {
1163 _kernel_swi_regs regs;
1164 regs.r[0] = 0x108;
1165 if (_kernel_swi(OS_Memory, &regs, &regs) == NULL) {
1166 SDL_SystemRAM = (int)(regs.r[1] * regs.r[2] / (1024 * 1024));
1167 }
1168 }
1169#endif
1170#ifdef SDL_PLATFORM_3DS
1171 if (SDL_SystemRAM <= 0) {
1172 // The New3DS has 255MiB, the Old3DS 127MiB
1173 SDL_SystemRAM = (int)(osGetMemRegionSize(MEMREGION_ALL) / (1024 * 1024));
1174 }
1175#endif
1176#ifdef SDL_PLATFORM_VITA
1177 if (SDL_SystemRAM <= 0) {
1178 /* Vita has 512MiB on SoC, that's split into 256MiB(+109MiB in extended memory mode) for app
1179 +26MiB of physically continuous memory, +112MiB of CDRAM(VRAM) + system reserved memory. */
1180 SDL_SystemRAM = 536870912;
1181 }
1182#endif
1183#ifdef SDL_PLATFORM_PS2
1184 if (SDL_SystemRAM <= 0) {
1185 // PlayStation 2 has 32MiB however there are some special models with 64 and 128
1186 SDL_SystemRAM = GetMemorySize();
1187 }
1188#endif
1189#ifdef SDL_PLATFORM_HAIKU
1190 if (SDL_SystemRAM <= 0) {
1191 system_info info;
1192 if (get_system_info(&info) == B_OK) {
1193 /* To have an accurate amount, we also take in account the inaccessible pages (aka ignored)
1194 which is a bit handier compared to the legacy system's api (i.e. used_pages).*/
1195 SDL_SystemRAM = (int)SDL_round((info.max_pages + info.ignored_pages > 0 ? info.ignored_pages : 0) * B_PAGE_SIZE / 1048576.0);
1196 }
1197 }
1198#endif
1199 }
1200 return SDL_SystemRAM;
1201}
1202
1203size_t SDL_GetSIMDAlignment(void)
1204{
1205 if (SDL_SIMDAlignment == 0xFFFFFFFF) {
1206 SDL_GetCPUFeatures(); // make sure this has been calculated
1207 }
1208 SDL_assert(SDL_SIMDAlignment != 0);
1209 return SDL_SIMDAlignment;
1210}
1211