1/*
2 Simple DirectMedia Layer
3 Copyright (C) 1997-2021 Sam Lantinga <slouken@libsdl.org>
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20*/
21#ifdef TEST_MAIN
22#include "SDL_config.h"
23#else
24#include "../SDL_internal.h"
25#endif
26
27#if defined(__WIN32__) || defined(__WINRT__)
28#include "../core/windows/SDL_windows.h"
29#endif
30#if defined(__OS2__)
31#undef HAVE_SYSCTLBYNAME
32#define INCL_DOS
33#include <os2.h>
34#ifndef QSV_NUMPROCESSORS
35#define QSV_NUMPROCESSORS 26
36#endif
37#endif
38
39/* CPU feature detection for SDL */
40
41#include "SDL_cpuinfo.h"
42#include "SDL_assert.h"
43
44#ifdef HAVE_SYSCONF
45#include <unistd.h>
46#endif
47#ifdef HAVE_SYSCTLBYNAME
48#include <sys/types.h>
49#include <sys/sysctl.h>
50#endif
51#if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
52#include <sys/sysctl.h> /* For AltiVec check */
53#elif (defined(__OpenBSD__) || defined(__FreeBSD__)) && defined(__powerpc__)
54#include <sys/param.h>
55#include <sys/sysctl.h> /* For AltiVec check */
56#include <machine/cpu.h>
57#elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
58#include <signal.h>
59#include <setjmp.h>
60#endif
61
62#if defined(__QNXNTO__)
63#include <sys/syspage.h>
64#endif
65
66#if (defined(__LINUX__) || defined(__ANDROID__)) && defined(__arm__)
67#include <unistd.h>
68#include <sys/types.h>
69#include <sys/stat.h>
70#include <fcntl.h>
71#include <elf.h>
72
73/*#include <asm/hwcap.h>*/
74#ifndef AT_HWCAP
75#define AT_HWCAP 16
76#endif
77#ifndef AT_PLATFORM
78#define AT_PLATFORM 15
79#endif
80#ifndef HWCAP_NEON
81#define HWCAP_NEON (1 << 12)
82#endif
83#endif
84
85#if defined(__ANDROID__) && defined(__arm__) && !defined(HAVE_GETAUXVAL)
86#include <cpu-features.h>
87#endif
88
89#if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO)
90#include <sys/auxv.h>
91#endif
92
93#ifdef __RISCOS__
94#include <kernel.h>
95#include <swis.h>
96#endif
97
98#define CPU_HAS_RDTSC (1 << 0)
99#define CPU_HAS_ALTIVEC (1 << 1)
100#define CPU_HAS_MMX (1 << 2)
101#define CPU_HAS_3DNOW (1 << 3)
102#define CPU_HAS_SSE (1 << 4)
103#define CPU_HAS_SSE2 (1 << 5)
104#define CPU_HAS_SSE3 (1 << 6)
105#define CPU_HAS_SSE41 (1 << 7)
106#define CPU_HAS_SSE42 (1 << 8)
107#define CPU_HAS_AVX (1 << 9)
108#define CPU_HAS_AVX2 (1 << 10)
109#define CPU_HAS_NEON (1 << 11)
110#define CPU_HAS_AVX512F (1 << 12)
111#define CPU_HAS_ARM_SIMD (1 << 13)
112
113#if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__
114/* This is the brute force way of detecting instruction sets...
115 the idea is borrowed from the libmpeg2 library - thanks!
116 */
117static jmp_buf jmpbuf;
118static void
119illegal_instruction(int sig)
120{
121 longjmp(jmpbuf, 1);
122}
123#endif /* HAVE_SETJMP */
124
125static int
126CPU_haveCPUID(void)
127{
128 int has_CPUID = 0;
129
130/* *INDENT-OFF* */
131#ifndef SDL_CPUINFO_DISABLED
132#if (defined(__GNUC__) || defined(__llvm__)) && defined(__i386__)
133 __asm__ (
134" pushfl # Get original EFLAGS \n"
135" popl %%eax \n"
136" movl %%eax,%%ecx \n"
137" xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n"
138" pushl %%eax # Save new EFLAGS value on stack \n"
139" popfl # Replace current EFLAGS value \n"
140" pushfl # Get new EFLAGS \n"
141" popl %%eax # Store new EFLAGS in EAX \n"
142" xorl %%ecx,%%eax # Can not toggle ID bit, \n"
143" jz 1f # Processor=80486 \n"
144" movl $1,%0 # We have CPUID support \n"
145"1: \n"
146 : "=m" (has_CPUID)
147 :
148 : "%eax", "%ecx"
149 );
150#elif (defined(__GNUC__) || defined(__llvm__)) && defined(__x86_64__)
151/* Technically, if this is being compiled under __x86_64__ then it has
152 CPUid by definition. But it's nice to be able to prove it. :) */
153 __asm__ (
154" pushfq # Get original EFLAGS \n"
155" popq %%rax \n"
156" movq %%rax,%%rcx \n"
157" xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n"
158" pushq %%rax # Save new EFLAGS value on stack \n"
159" popfq # Replace current EFLAGS value \n"
160" pushfq # Get new EFLAGS \n"
161" popq %%rax # Store new EFLAGS in EAX \n"
162" xorl %%ecx,%%eax # Can not toggle ID bit, \n"
163" jz 1f # Processor=80486 \n"
164" movl $1,%0 # We have CPUID support \n"
165"1: \n"
166 : "=m" (has_CPUID)
167 :
168 : "%rax", "%rcx"
169 );
170#elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
171 __asm {
172 pushfd ; Get original EFLAGS
173 pop eax
174 mov ecx, eax
175 xor eax, 200000h ; Flip ID bit in EFLAGS
176 push eax ; Save new EFLAGS value on stack
177 popfd ; Replace current EFLAGS value
178 pushfd ; Get new EFLAGS
179 pop eax ; Store new EFLAGS in EAX
180 xor eax, ecx ; Can not toggle ID bit,
181 jz done ; Processor=80486
182 mov has_CPUID,1 ; We have CPUID support
183done:
184 }
185#elif defined(_MSC_VER) && defined(_M_X64)
186 has_CPUID = 1;
187#elif defined(__sun) && defined(__i386)
188 __asm (
189" pushfl \n"
190" popl %eax \n"
191" movl %eax,%ecx \n"
192" xorl $0x200000,%eax \n"
193" pushl %eax \n"
194" popfl \n"
195" pushfl \n"
196" popl %eax \n"
197" xorl %ecx,%eax \n"
198" jz 1f \n"
199" movl $1,-8(%ebp) \n"
200"1: \n"
201 );
202#elif defined(__sun) && defined(__amd64)
203 __asm (
204" pushfq \n"
205" popq %rax \n"
206" movq %rax,%rcx \n"
207" xorl $0x200000,%eax \n"
208" pushq %rax \n"
209" popfq \n"
210" pushfq \n"
211" popq %rax \n"
212" xorl %ecx,%eax \n"
213" jz 1f \n"
214" movl $1,-8(%rbp) \n"
215"1: \n"
216 );
217#endif
218#endif
219/* *INDENT-ON* */
220 return has_CPUID;
221}
222
223#if (defined(__GNUC__) || defined(__llvm__)) && defined(__i386__)
224#define cpuid(func, a, b, c, d) \
225 __asm__ __volatile__ ( \
226" pushl %%ebx \n" \
227" xorl %%ecx,%%ecx \n" \
228" cpuid \n" \
229" movl %%ebx, %%esi \n" \
230" popl %%ebx \n" : \
231 "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
232#elif (defined(__GNUC__) || defined(__llvm__)) && defined(__x86_64__)
233#define cpuid(func, a, b, c, d) \
234 __asm__ __volatile__ ( \
235" pushq %%rbx \n" \
236" xorq %%rcx,%%rcx \n" \
237" cpuid \n" \
238" movq %%rbx, %%rsi \n" \
239" popq %%rbx \n" : \
240 "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
241#elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
242#define cpuid(func, a, b, c, d) \
243 __asm { \
244 __asm mov eax, func \
245 __asm xor ecx, ecx \
246 __asm cpuid \
247 __asm mov a, eax \
248 __asm mov b, ebx \
249 __asm mov c, ecx \
250 __asm mov d, edx \
251}
252#elif defined(_MSC_VER) && defined(_M_X64)
253#define cpuid(func, a, b, c, d) \
254{ \
255 int CPUInfo[4]; \
256 __cpuid(CPUInfo, func); \
257 a = CPUInfo[0]; \
258 b = CPUInfo[1]; \
259 c = CPUInfo[2]; \
260 d = CPUInfo[3]; \
261}
262#else
263#define cpuid(func, a, b, c, d) \
264 do { a = b = c = d = 0; (void) a; (void) b; (void) c; (void) d; } while (0)
265#endif
266
267static int CPU_CPUIDFeatures[4];
268static int CPU_CPUIDMaxFunction = 0;
269static SDL_bool CPU_OSSavesYMM = SDL_FALSE;
270static SDL_bool CPU_OSSavesZMM = SDL_FALSE;
271
272static void
273CPU_calcCPUIDFeatures(void)
274{
275 static SDL_bool checked = SDL_FALSE;
276 if (!checked) {
277 checked = SDL_TRUE;
278 if (CPU_haveCPUID()) {
279 int a, b, c, d;
280 cpuid(0, a, b, c, d);
281 CPU_CPUIDMaxFunction = a;
282 if (CPU_CPUIDMaxFunction >= 1) {
283 cpuid(1, a, b, c, d);
284 CPU_CPUIDFeatures[0] = a;
285 CPU_CPUIDFeatures[1] = b;
286 CPU_CPUIDFeatures[2] = c;
287 CPU_CPUIDFeatures[3] = d;
288
289 /* Check to make sure we can call xgetbv */
290 if (c & 0x08000000) {
291 /* Call xgetbv to see if YMM (etc) register state is saved */
292#if (defined(__GNUC__) || defined(__llvm__)) && (defined(__i386__) || defined(__x86_64__))
293 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
294#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
295 a = (int)_xgetbv(0);
296#elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
297 __asm
298 {
299 xor ecx, ecx
300 _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
301 mov a, eax
302 }
303#endif
304 CPU_OSSavesYMM = ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
305 CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? SDL_TRUE : SDL_FALSE;
306 }
307 }
308 }
309 }
310}
311
312static int
313CPU_haveAltiVec(void)
314{
315 volatile int altivec = 0;
316#ifndef SDL_CPUINFO_DISABLED
317#if (defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))) || (defined(__OpenBSD__) && defined(__powerpc__)) || (defined(__FreeBSD__) && defined(__powerpc__))
318#ifdef __OpenBSD__
319 int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
320#elif defined(__FreeBSD__)
321 int selectors[2] = { CTL_HW, PPC_FEATURE_HAS_ALTIVEC };
322#else
323 int selectors[2] = { CTL_HW, HW_VECTORUNIT };
324#endif
325 int hasVectorUnit = 0;
326 size_t length = sizeof(hasVectorUnit);
327 int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
328 if (0 == error)
329 altivec = (hasVectorUnit != 0);
330#elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
331 void (*handler) (int sig);
332 handler = signal(SIGILL, illegal_instruction);
333 if (setjmp(jmpbuf) == 0) {
334 asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0"::"r" (-1));
335 altivec = 1;
336 }
337 signal(SIGILL, handler);
338#endif
339#endif
340 return altivec;
341}
342
343#if (defined(__ARM_ARCH) && (__ARM_ARCH >= 6)) || defined(__aarch64__)
344static int
345CPU_haveARMSIMD(void)
346{
347 return 1;
348}
349
350#elif !defined(__arm__)
351static int
352CPU_haveARMSIMD(void)
353{
354 return 0;
355}
356
357#elif defined(__LINUX__)
358static int
359CPU_haveARMSIMD(void)
360{
361 int arm_simd = 0;
362 int fd;
363
364 fd = open("/proc/self/auxv", O_RDONLY);
365 if (fd >= 0)
366 {
367 Elf32_auxv_t aux;
368 while (read(fd, &aux, sizeof aux) == sizeof aux)
369 {
370 if (aux.a_type == AT_PLATFORM)
371 {
372 const char *plat = (const char *) aux.a_un.a_val;
373 if (plat) {
374 arm_simd = strncmp(plat, "v6l", 3) == 0 ||
375 strncmp(plat, "v7l", 3) == 0;
376 }
377 }
378 }
379 close(fd);
380 }
381 return arm_simd;
382}
383
384#elif defined(__RISCOS__)
385static int
386CPU_haveARMSIMD(void)
387{
388 _kernel_swi_regs regs;
389 regs.r[0] = 0;
390 if (_kernel_swi(OS_PlatformFeatures, &regs, &regs) != NULL)
391 return 0;
392
393 if (!(regs.r[0] & (1<<31)))
394 return 0;
395
396 regs.r[0] = 34;
397 regs.r[1] = 29;
398 if (_kernel_swi(OS_PlatformFeatures, &regs, &regs) != NULL)
399 return 0;
400
401 return regs.r[0];
402}
403
404#else
405static int
406CPU_haveARMSIMD(void)
407{
408#warning SDL_HasARMSIMD is not implemented for this ARM platform. Write me.
409 return 0;
410}
411#endif
412
413#if defined(__LINUX__) && defined(__arm__) && !defined(HAVE_GETAUXVAL)
414static int
415readProcAuxvForNeon(void)
416{
417 int neon = 0;
418 int fd;
419
420 fd = open("/proc/self/auxv", O_RDONLY);
421 if (fd >= 0)
422 {
423 Elf32_auxv_t aux;
424 while (read(fd, &aux, sizeof (aux)) == sizeof (aux)) {
425 if (aux.a_type == AT_HWCAP) {
426 neon = (aux.a_un.a_val & HWCAP_NEON) == HWCAP_NEON;
427 break;
428 }
429 }
430 close(fd);
431 }
432 return neon;
433}
434#endif
435
436static int
437CPU_haveNEON(void)
438{
439/* The way you detect NEON is a privileged instruction on ARM, so you have
440 query the OS kernel in a platform-specific way. :/ */
441#if defined(SDL_CPUINFO_DISABLED)
442 return 0; /* disabled */
443#elif (defined(__WINDOWS__) || defined(__WINRT__)) && (defined(_M_ARM) || defined(_M_ARM64))
444/* Visual Studio, for ARM, doesn't define __ARM_ARCH. Handle this first. */
445/* Seems to have been removed */
446# if !defined(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)
447# define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19
448# endif
449/* All WinRT ARM devices are required to support NEON, but just in case. */
450 return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != 0;
451#elif (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || defined(__aarch64__)
452 return 1; /* ARMv8 always has non-optional NEON support. */
453#elif __VITA__
454 return 1;
455#elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7)
456 /* (note that sysctlbyname("hw.optional.neon") doesn't work!) */
457 return 1; /* all Apple ARMv7 chips and later have NEON. */
458#elif defined(__APPLE__)
459 return 0; /* assume anything else from Apple doesn't have NEON. */
460#elif !defined(__arm__)
461 return 0; /* not an ARM CPU at all. */
462#elif defined(__OpenBSD__)
463 return 1; /* OpenBSD only supports ARMv7 CPUs that have NEON. */
464#elif defined(HAVE_ELF_AUX_INFO)
465 unsigned long hasneon = 0;
466 if (elf_aux_info(AT_HWCAP, (void *)&hasneon, (int)sizeof(hasneon)) != 0)
467 return 0;
468 return ((hasneon & HWCAP_NEON) == HWCAP_NEON);
469#elif defined(__QNXNTO__)
470 return SYSPAGE_ENTRY(cpuinfo)->flags & ARM_CPU_FLAG_NEON;
471#elif (defined(__LINUX__) || defined(__ANDROID__)) && defined(HAVE_GETAUXVAL)
472 return ((getauxval(AT_HWCAP) & HWCAP_NEON) == HWCAP_NEON);
473#elif defined(__LINUX__)
474 return readProcAuxvForNeon();
475#elif defined(__ANDROID__)
476 /* Use NDK cpufeatures to read either /proc/self/auxv or /proc/cpuinfo */
477 {
478 AndroidCpuFamily cpu_family = android_getCpuFamily();
479 if (cpu_family == ANDROID_CPU_FAMILY_ARM) {
480 uint64_t cpu_features = android_getCpuFeatures();
481 if ((cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0) {
482 return 1;
483 }
484 }
485 return 0;
486 }
487#elif defined(__RISCOS__)
488 /* Use the VFPSupport_Features SWI to access the MVFR registers */
489 {
490 _kernel_swi_regs regs;
491 regs.r[0] = 0;
492 if (_kernel_swi(VFPSupport_Features, &regs, &regs) == NULL) {
493 if ((regs.r[2] & 0xFFF000) == 0x111000) {
494 return 1;
495 }
496 }
497 return 0;
498 }
499#else
500#warning SDL_HasNEON is not implemented for this ARM platform. Write me.
501 return 0;
502#endif
503}
504
505#if defined(__e2k__)
506inline int
507CPU_have3DNow(void)
508{
509#if defined(__3dNOW__)
510 return 1;
511#else
512 return 0;
513#endif
514}
515#else
516static int
517CPU_have3DNow(void)
518{
519 if (CPU_CPUIDMaxFunction > 0) { /* that is, do we have CPUID at all? */
520 int a, b, c, d;
521 cpuid(0x80000000, a, b, c, d);
522 if (a >= 0x80000001) {
523 cpuid(0x80000001, a, b, c, d);
524 return (d & 0x80000000);
525 }
526 }
527 return 0;
528}
529#endif
530
531#if defined(__e2k__)
532#define CPU_haveRDTSC() (0)
533#if defined(__MMX__)
534#define CPU_haveMMX() (1)
535#else
536#define CPU_haveMMX() (0)
537#endif
538#if defined(__SSE__)
539#define CPU_haveSSE() (1)
540#else
541#define CPU_haveSSE() (0)
542#endif
543#if defined(__SSE2__)
544#define CPU_haveSSE2() (1)
545#else
546#define CPU_haveSSE2() (0)
547#endif
548#if defined(__SSE3__)
549#define CPU_haveSSE3() (1)
550#else
551#define CPU_haveSSE3() (0)
552#endif
553#if defined(__SSE4_1__)
554#define CPU_haveSSE41() (1)
555#else
556#define CPU_haveSSE41() (0)
557#endif
558#if defined(__SSE4_2__)
559#define CPU_haveSSE42() (1)
560#else
561#define CPU_haveSSE42() (0)
562#endif
563#if defined(__AVX__)
564#define CPU_haveAVX() (1)
565#else
566#define CPU_haveAVX() (0)
567#endif
568#else
569#define CPU_haveRDTSC() (CPU_CPUIDFeatures[3] & 0x00000010)
570#define CPU_haveMMX() (CPU_CPUIDFeatures[3] & 0x00800000)
571#define CPU_haveSSE() (CPU_CPUIDFeatures[3] & 0x02000000)
572#define CPU_haveSSE2() (CPU_CPUIDFeatures[3] & 0x04000000)
573#define CPU_haveSSE3() (CPU_CPUIDFeatures[2] & 0x00000001)
574#define CPU_haveSSE41() (CPU_CPUIDFeatures[2] & 0x00080000)
575#define CPU_haveSSE42() (CPU_CPUIDFeatures[2] & 0x00100000)
576#define CPU_haveAVX() (CPU_OSSavesYMM && (CPU_CPUIDFeatures[2] & 0x10000000))
577#endif
578
579#if defined(__e2k__)
580inline int
581CPU_haveAVX2(void)
582{
583#if defined(__AVX2__)
584 return 1;
585#else
586 return 0;
587#endif
588}
589#else
590static int
591CPU_haveAVX2(void)
592{
593 if (CPU_OSSavesYMM && (CPU_CPUIDMaxFunction >= 7)) {
594 int a, b, c, d;
595 (void) a; (void) b; (void) c; (void) d; /* compiler warnings... */
596 cpuid(7, a, b, c, d);
597 return (b & 0x00000020);
598 }
599 return 0;
600}
601#endif
602
603#if defined(__e2k__)
604inline int
605CPU_haveAVX512F(void)
606{
607 return 0;
608}
609#else
610static int
611CPU_haveAVX512F(void)
612{
613 if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) {
614 int a, b, c, d;
615 (void) a; (void) b; (void) c; (void) d; /* compiler warnings... */
616 cpuid(7, a, b, c, d);
617 return (b & 0x00010000);
618 }
619 return 0;
620}
621#endif
622
623static int SDL_CPUCount = 0;
624
625int
626SDL_GetCPUCount(void)
627{
628 if (!SDL_CPUCount) {
629#ifndef SDL_CPUINFO_DISABLED
630#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
631 if (SDL_CPUCount <= 0) {
632 SDL_CPUCount = (int)sysconf(_SC_NPROCESSORS_ONLN);
633 }
634#endif
635#ifdef HAVE_SYSCTLBYNAME
636 if (SDL_CPUCount <= 0) {
637 size_t size = sizeof(SDL_CPUCount);
638 sysctlbyname("hw.ncpu", &SDL_CPUCount, &size, NULL, 0);
639 }
640#endif
641#ifdef __WIN32__
642 if (SDL_CPUCount <= 0) {
643 SYSTEM_INFO info;
644 GetSystemInfo(&info);
645 SDL_CPUCount = info.dwNumberOfProcessors;
646 }
647#endif
648#ifdef __OS2__
649 if (SDL_CPUCount <= 0) {
650 DosQuerySysInfo(QSV_NUMPROCESSORS, QSV_NUMPROCESSORS,
651 &SDL_CPUCount, sizeof(SDL_CPUCount) );
652 }
653#endif
654#endif
655 /* There has to be at least 1, right? :) */
656 if (SDL_CPUCount <= 0) {
657 SDL_CPUCount = 1;
658 }
659 }
660 return SDL_CPUCount;
661}
662
663#if defined(__e2k__)
664inline const char *
665SDL_GetCPUType(void)
666{
667 static char SDL_CPUType[13];
668
669 SDL_strlcpy(SDL_CPUType, "E2K MACHINE", sizeof(SDL_CPUType));
670
671 return SDL_CPUType;
672}
673#else
674/* Oh, such a sweet sweet trick, just not very useful. :) */
675static const char *
676SDL_GetCPUType(void)
677{
678 static char SDL_CPUType[13];
679
680 if (!SDL_CPUType[0]) {
681 int i = 0;
682
683 CPU_calcCPUIDFeatures();
684 if (CPU_CPUIDMaxFunction > 0) { /* do we have CPUID at all? */
685 int a, b, c, d;
686 cpuid(0x00000000, a, b, c, d);
687 (void) a;
688 SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
689 SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
690 SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
691 SDL_CPUType[i++] = (char)(b & 0xff);
692
693 SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
694 SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
695 SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
696 SDL_CPUType[i++] = (char)(d & 0xff);
697
698 SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
699 SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
700 SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
701 SDL_CPUType[i++] = (char)(c & 0xff);
702 }
703 if (!SDL_CPUType[0]) {
704 SDL_strlcpy(SDL_CPUType, "Unknown", sizeof(SDL_CPUType));
705 }
706 }
707 return SDL_CPUType;
708}
709#endif
710
711
712#ifdef TEST_MAIN /* !!! FIXME: only used for test at the moment. */
713#if defined(__e2k__)
714inline const char *
715SDL_GetCPUName(void)
716{
717 static char SDL_CPUName[48];
718
719 SDL_strlcpy(SDL_CPUName, __builtin_cpu_name(), sizeof(SDL_CPUName));
720
721 return SDL_CPUName;
722}
723#else
724static const char *
725SDL_GetCPUName(void)
726{
727 static char SDL_CPUName[48];
728
729 if (!SDL_CPUName[0]) {
730 int i = 0;
731 int a, b, c, d;
732
733 CPU_calcCPUIDFeatures();
734 if (CPU_CPUIDMaxFunction > 0) { /* do we have CPUID at all? */
735 cpuid(0x80000000, a, b, c, d);
736 if (a >= 0x80000004) {
737 cpuid(0x80000002, a, b, c, d);
738 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
739 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
740 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
741 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
742 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
743 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
744 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
745 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
746 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
747 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
748 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
749 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
750 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
751 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
752 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
753 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
754 cpuid(0x80000003, a, b, c, d);
755 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
756 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
757 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
758 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
759 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
760 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
761 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
762 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
763 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
764 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
765 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
766 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
767 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
768 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
769 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
770 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
771 cpuid(0x80000004, a, b, c, d);
772 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
773 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
774 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
775 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
776 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
777 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
778 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
779 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
780 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
781 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
782 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
783 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
784 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
785 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
786 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
787 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
788 }
789 }
790 if (!SDL_CPUName[0]) {
791 SDL_strlcpy(SDL_CPUName, "Unknown", sizeof(SDL_CPUName));
792 }
793 }
794 return SDL_CPUName;
795}
796#endif
797#endif
798
799int
800SDL_GetCPUCacheLineSize(void)
801{
802 const char *cpuType = SDL_GetCPUType();
803 int a, b, c, d;
804 (void) a; (void) b; (void) c; (void) d;
805 if (SDL_strcmp(cpuType, "GenuineIntel") == 0 || SDL_strcmp(cpuType, "CentaurHauls") == 0 || SDL_strcmp(cpuType, " Shanghai ") == 0) {
806 cpuid(0x00000001, a, b, c, d);
807 return (((b >> 8) & 0xff) * 8);
808 } else if (SDL_strcmp(cpuType, "AuthenticAMD") == 0 || SDL_strcmp(cpuType, "HygonGenuine") == 0) {
809 cpuid(0x80000005, a, b, c, d);
810 return (c & 0xff);
811 } else {
812 /* Just make a guess here... */
813 return SDL_CACHELINE_SIZE;
814 }
815}
816
817static Uint32 SDL_CPUFeatures = 0xFFFFFFFF;
818static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF;
819
820static Uint32
821SDL_GetCPUFeatures(void)
822{
823 if (SDL_CPUFeatures == 0xFFFFFFFF) {
824 CPU_calcCPUIDFeatures();
825 SDL_CPUFeatures = 0;
826 SDL_SIMDAlignment = sizeof(void *); /* a good safe base value */
827 if (CPU_haveRDTSC()) {
828 SDL_CPUFeatures |= CPU_HAS_RDTSC;
829 }
830 if (CPU_haveAltiVec()) {
831 SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
832 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
833 }
834 if (CPU_haveMMX()) {
835 SDL_CPUFeatures |= CPU_HAS_MMX;
836 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
837 }
838 if (CPU_have3DNow()) {
839 SDL_CPUFeatures |= CPU_HAS_3DNOW;
840 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
841 }
842 if (CPU_haveSSE()) {
843 SDL_CPUFeatures |= CPU_HAS_SSE;
844 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
845 }
846 if (CPU_haveSSE2()) {
847 SDL_CPUFeatures |= CPU_HAS_SSE2;
848 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
849 }
850 if (CPU_haveSSE3()) {
851 SDL_CPUFeatures |= CPU_HAS_SSE3;
852 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
853 }
854 if (CPU_haveSSE41()) {
855 SDL_CPUFeatures |= CPU_HAS_SSE41;
856 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
857 }
858 if (CPU_haveSSE42()) {
859 SDL_CPUFeatures |= CPU_HAS_SSE42;
860 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
861 }
862 if (CPU_haveAVX()) {
863 SDL_CPUFeatures |= CPU_HAS_AVX;
864 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
865 }
866 if (CPU_haveAVX2()) {
867 SDL_CPUFeatures |= CPU_HAS_AVX2;
868 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
869 }
870 if (CPU_haveAVX512F()) {
871 SDL_CPUFeatures |= CPU_HAS_AVX512F;
872 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64);
873 }
874 if (CPU_haveARMSIMD()) {
875 SDL_CPUFeatures |= CPU_HAS_ARM_SIMD;
876 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
877 }
878 if (CPU_haveNEON()) {
879 SDL_CPUFeatures |= CPU_HAS_NEON;
880 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
881 }
882 }
883 return SDL_CPUFeatures;
884}
885
886#define CPU_FEATURE_AVAILABLE(f) ((SDL_GetCPUFeatures() & f) ? SDL_TRUE : SDL_FALSE)
887
888SDL_bool SDL_HasRDTSC(void)
889{
890 return CPU_FEATURE_AVAILABLE(CPU_HAS_RDTSC);
891}
892
893SDL_bool
894SDL_HasAltiVec(void)
895{
896 return CPU_FEATURE_AVAILABLE(CPU_HAS_ALTIVEC);
897}
898
899SDL_bool
900SDL_HasMMX(void)
901{
902 return CPU_FEATURE_AVAILABLE(CPU_HAS_MMX);
903}
904
905SDL_bool
906SDL_Has3DNow(void)
907{
908 return CPU_FEATURE_AVAILABLE(CPU_HAS_3DNOW);
909}
910
911SDL_bool
912SDL_HasSSE(void)
913{
914 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE);
915}
916
917SDL_bool
918SDL_HasSSE2(void)
919{
920 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE2);
921}
922
923SDL_bool
924SDL_HasSSE3(void)
925{
926 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE3);
927}
928
929SDL_bool
930SDL_HasSSE41(void)
931{
932 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE41);
933}
934
935SDL_bool
936SDL_HasSSE42(void)
937{
938 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE42);
939}
940
941SDL_bool
942SDL_HasAVX(void)
943{
944 return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX);
945}
946
947SDL_bool
948SDL_HasAVX2(void)
949{
950 return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX2);
951}
952
953SDL_bool
954SDL_HasAVX512F(void)
955{
956 return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F);
957}
958
959SDL_bool
960SDL_HasARMSIMD(void)
961{
962 return CPU_FEATURE_AVAILABLE(CPU_HAS_ARM_SIMD);
963}
964
965SDL_bool
966SDL_HasNEON(void)
967{
968 return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON);
969}
970
971static int SDL_SystemRAM = 0;
972
973int
974SDL_GetSystemRAM(void)
975{
976 if (!SDL_SystemRAM) {
977#ifndef SDL_CPUINFO_DISABLED
978#if defined(HAVE_SYSCONF) && defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
979 if (SDL_SystemRAM <= 0) {
980 SDL_SystemRAM = (int)((Sint64)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE) / (1024*1024));
981 }
982#endif
983#ifdef HAVE_SYSCTLBYNAME
984 if (SDL_SystemRAM <= 0) {
985#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__DragonFly__)
986#ifdef HW_REALMEM
987 int mib[2] = {CTL_HW, HW_REALMEM};
988#else
989 /* might only report up to 2 GiB */
990 int mib[2] = {CTL_HW, HW_PHYSMEM};
991#endif /* HW_REALMEM */
992#else
993 int mib[2] = {CTL_HW, HW_MEMSIZE};
994#endif /* __FreeBSD__ || __FreeBSD_kernel__ */
995 Uint64 memsize = 0;
996 size_t len = sizeof(memsize);
997
998 if (sysctl(mib, 2, &memsize, &len, NULL, 0) == 0) {
999 SDL_SystemRAM = (int)(memsize / (1024*1024));
1000 }
1001 }
1002#endif
1003#ifdef __WIN32__
1004 if (SDL_SystemRAM <= 0) {
1005 MEMORYSTATUSEX stat;
1006 stat.dwLength = sizeof(stat);
1007 if (GlobalMemoryStatusEx(&stat)) {
1008 SDL_SystemRAM = (int)(stat.ullTotalPhys / (1024 * 1024));
1009 }
1010 }
1011#endif
1012#ifdef __OS2__
1013 if (SDL_SystemRAM <= 0) {
1014 Uint32 sysram = 0;
1015 DosQuerySysInfo(QSV_TOTPHYSMEM, QSV_TOTPHYSMEM, &sysram, 4);
1016 SDL_SystemRAM = (int) (sysram / 0x100000U);
1017 }
1018#endif
1019#ifdef __RISCOS__
1020 if (SDL_SystemRAM <= 0) {
1021 _kernel_swi_regs regs;
1022 regs.r[0] = 0x108;
1023 if (_kernel_swi(OS_Memory, &regs, &regs) == NULL) {
1024 SDL_SystemRAM = (int)(regs.r[1] * regs.r[2] / (1024 * 1024));
1025 }
1026 }
1027#endif
1028#endif
1029 }
1030 return SDL_SystemRAM;
1031}
1032
1033
1034size_t
1035SDL_SIMDGetAlignment(void)
1036{
1037 if (SDL_SIMDAlignment == 0xFFFFFFFF) {
1038 SDL_GetCPUFeatures(); /* make sure this has been calculated */
1039 }
1040 SDL_assert(SDL_SIMDAlignment != 0);
1041 return SDL_SIMDAlignment;
1042}
1043
1044void *
1045SDL_SIMDAlloc(const size_t len)
1046{
1047 const size_t alignment = SDL_SIMDGetAlignment();
1048 const size_t padding = alignment - (len % alignment);
1049 const size_t padded = (padding != alignment) ? (len + padding) : len;
1050 Uint8 *retval = NULL;
1051 Uint8 *ptr = (Uint8 *) SDL_malloc(padded + alignment + sizeof (void *));
1052 if (ptr) {
1053 /* store the actual malloc pointer right before our aligned pointer. */
1054 retval = ptr + sizeof (void *);
1055 retval += alignment - (((size_t) retval) % alignment);
1056 *(((void **) retval) - 1) = ptr;
1057 }
1058 return retval;
1059}
1060
1061void *
1062SDL_SIMDRealloc(void *mem, const size_t len)
1063{
1064 const size_t alignment = SDL_SIMDGetAlignment();
1065 const size_t padding = alignment - (len % alignment);
1066 const size_t padded = (padding != alignment) ? (len + padding) : len;
1067 Uint8 *retval = (Uint8*) mem;
1068 void *oldmem = mem;
1069 size_t memdiff = 0, ptrdiff;
1070 Uint8 *ptr;
1071
1072 if (mem) {
1073 void **realptr = (void **) mem;
1074 realptr--;
1075 mem = *(((void **) mem) - 1);
1076
1077 /* Check the delta between the real pointer and user pointer */
1078 memdiff = ((size_t) oldmem) - ((size_t) mem);
1079 }
1080
1081 ptr = (Uint8 *) SDL_realloc(mem, padded + alignment + sizeof (void *));
1082
1083 if (ptr == mem) {
1084 return retval; /* Pointer didn't change, nothing to do */
1085 }
1086 if (ptr == NULL) {
1087 return NULL; /* Out of memory, bail! */
1088 }
1089
1090 /* Store the actual malloc pointer right before our aligned pointer. */
1091 retval = ptr + sizeof (void *);
1092 retval += alignment - (((size_t) retval) % alignment);
1093
1094 /* Make sure the delta is the same! */
1095 if (mem) {
1096 ptrdiff = ((size_t) retval) - ((size_t) ptr);
1097 if (memdiff != ptrdiff) { /* Delta has changed, copy to new offset! */
1098 oldmem = (void*) (((size_t) ptr) + memdiff);
1099
1100 /* Even though the data past the old `len` is undefined, this is the
1101 * only length value we have, and it guarantees that we copy all the
1102 * previous memory anyhow.
1103 */
1104 SDL_memmove(retval, oldmem, len);
1105 }
1106 }
1107
1108 /* Actually store the malloc pointer, finally. */
1109 *(((void **) retval) - 1) = ptr;
1110 return retval;
1111}
1112
1113void
1114SDL_SIMDFree(void *ptr)
1115{
1116 if (ptr) {
1117 void **realptr = (void **) ptr;
1118 realptr--;
1119 SDL_free(*(((void **) ptr) - 1));
1120 }
1121}
1122
1123
1124#ifdef TEST_MAIN
1125
1126#include <stdio.h>
1127
1128int
1129main()
1130{
1131 printf("CPU count: %d\n", SDL_GetCPUCount());
1132 printf("CPU type: %s\n", SDL_GetCPUType());
1133 printf("CPU name: %s\n", SDL_GetCPUName());
1134 printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize());
1135 printf("RDTSC: %d\n", SDL_HasRDTSC());
1136 printf("Altivec: %d\n", SDL_HasAltiVec());
1137 printf("MMX: %d\n", SDL_HasMMX());
1138 printf("3DNow: %d\n", SDL_Has3DNow());
1139 printf("SSE: %d\n", SDL_HasSSE());
1140 printf("SSE2: %d\n", SDL_HasSSE2());
1141 printf("SSE3: %d\n", SDL_HasSSE3());
1142 printf("SSE4.1: %d\n", SDL_HasSSE41());
1143 printf("SSE4.2: %d\n", SDL_HasSSE42());
1144 printf("AVX: %d\n", SDL_HasAVX());
1145 printf("AVX2: %d\n", SDL_HasAVX2());
1146 printf("AVX-512F: %d\n", SDL_HasAVX512F());
1147 printf("ARM SIMD: %d\n", SDL_HasARMSIMD());
1148 printf("NEON: %d\n", SDL_HasNEON());
1149 printf("RAM: %d MB\n", SDL_GetSystemRAM());
1150 return 0;
1151}
1152
1153#endif /* TEST_MAIN */
1154
1155/* vi: set ts=4 sw=4 expandtab: */
1156