1 | /* |
2 | Simple DirectMedia Layer |
3 | Copyright (C) 1997-2021 Sam Lantinga <slouken@libsdl.org> |
4 | |
5 | This software is provided 'as-is', without any express or implied |
6 | warranty. In no event will the authors be held liable for any damages |
7 | arising from the use of this software. |
8 | |
9 | Permission is granted to anyone to use this software for any purpose, |
10 | including commercial applications, and to alter it and redistribute it |
11 | freely, subject to the following restrictions: |
12 | |
13 | 1. The origin of this software must not be misrepresented; you must not |
14 | claim that you wrote the original software. If you use this software |
15 | in a product, an acknowledgment in the product documentation would be |
16 | appreciated but is not required. |
17 | 2. Altered source versions must be plainly marked as such, and must not be |
18 | misrepresented as being the original software. |
19 | 3. This notice may not be removed or altered from any source distribution. |
20 | */ |
21 | #ifdef TEST_MAIN |
22 | #include "SDL_config.h" |
23 | #else |
24 | #include "../SDL_internal.h" |
25 | #endif |
26 | |
27 | #if defined(__WIN32__) || defined(__WINRT__) |
28 | #include "../core/windows/SDL_windows.h" |
29 | #endif |
30 | #if defined(__OS2__) |
31 | #undef HAVE_SYSCTLBYNAME |
32 | #define INCL_DOS |
33 | #include <os2.h> |
34 | #ifndef QSV_NUMPROCESSORS |
35 | #define QSV_NUMPROCESSORS 26 |
36 | #endif |
37 | #endif |
38 | |
39 | /* CPU feature detection for SDL */ |
40 | |
41 | #include "SDL_cpuinfo.h" |
42 | #include "SDL_assert.h" |
43 | |
44 | #ifdef HAVE_SYSCONF |
45 | #include <unistd.h> |
46 | #endif |
47 | #ifdef HAVE_SYSCTLBYNAME |
48 | #include <sys/types.h> |
49 | #include <sys/sysctl.h> |
50 | #endif |
51 | #if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__)) |
52 | #include <sys/sysctl.h> /* For AltiVec check */ |
53 | #elif (defined(__OpenBSD__) || defined(__FreeBSD__)) && defined(__powerpc__) |
54 | #include <sys/param.h> |
55 | #include <sys/sysctl.h> /* For AltiVec check */ |
56 | #include <machine/cpu.h> |
57 | #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP |
58 | #include <signal.h> |
59 | #include <setjmp.h> |
60 | #endif |
61 | |
62 | #if defined(__QNXNTO__) |
63 | #include <sys/syspage.h> |
64 | #endif |
65 | |
66 | #if (defined(__LINUX__) || defined(__ANDROID__)) && defined(__arm__) |
67 | #include <unistd.h> |
68 | #include <sys/types.h> |
69 | #include <sys/stat.h> |
70 | #include <fcntl.h> |
71 | #include <elf.h> |
72 | |
73 | /*#include <asm/hwcap.h>*/ |
74 | #ifndef AT_HWCAP |
75 | #define AT_HWCAP 16 |
76 | #endif |
77 | #ifndef AT_PLATFORM |
78 | #define AT_PLATFORM 15 |
79 | #endif |
80 | #ifndef HWCAP_NEON |
81 | #define HWCAP_NEON (1 << 12) |
82 | #endif |
83 | #endif |
84 | |
85 | #if defined(__ANDROID__) && defined(__arm__) && !defined(HAVE_GETAUXVAL) |
86 | #include <cpu-features.h> |
87 | #endif |
88 | |
89 | #if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO) |
90 | #include <sys/auxv.h> |
91 | #endif |
92 | |
93 | #ifdef __RISCOS__ |
94 | #include <kernel.h> |
95 | #include <swis.h> |
96 | #endif |
97 | |
98 | #define CPU_HAS_RDTSC (1 << 0) |
99 | #define CPU_HAS_ALTIVEC (1 << 1) |
100 | #define CPU_HAS_MMX (1 << 2) |
101 | #define CPU_HAS_3DNOW (1 << 3) |
102 | #define CPU_HAS_SSE (1 << 4) |
103 | #define CPU_HAS_SSE2 (1 << 5) |
104 | #define CPU_HAS_SSE3 (1 << 6) |
105 | #define CPU_HAS_SSE41 (1 << 7) |
106 | #define CPU_HAS_SSE42 (1 << 8) |
107 | #define CPU_HAS_AVX (1 << 9) |
108 | #define CPU_HAS_AVX2 (1 << 10) |
109 | #define CPU_HAS_NEON (1 << 11) |
110 | #define CPU_HAS_AVX512F (1 << 12) |
111 | #define CPU_HAS_ARM_SIMD (1 << 13) |
112 | |
113 | #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__ |
114 | /* This is the brute force way of detecting instruction sets... |
115 | the idea is borrowed from the libmpeg2 library - thanks! |
116 | */ |
117 | static jmp_buf jmpbuf; |
118 | static void |
119 | illegal_instruction(int sig) |
120 | { |
121 | longjmp(jmpbuf, 1); |
122 | } |
123 | #endif /* HAVE_SETJMP */ |
124 | |
125 | static int |
126 | CPU_haveCPUID(void) |
127 | { |
128 | int has_CPUID = 0; |
129 | |
130 | /* *INDENT-OFF* */ |
131 | #ifndef SDL_CPUINFO_DISABLED |
132 | #if (defined(__GNUC__) || defined(__llvm__)) && defined(__i386__) |
133 | __asm__ ( |
134 | " pushfl # Get original EFLAGS \n" |
135 | " popl %%eax \n" |
136 | " movl %%eax,%%ecx \n" |
137 | " xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n" |
138 | " pushl %%eax # Save new EFLAGS value on stack \n" |
139 | " popfl # Replace current EFLAGS value \n" |
140 | " pushfl # Get new EFLAGS \n" |
141 | " popl %%eax # Store new EFLAGS in EAX \n" |
142 | " xorl %%ecx,%%eax # Can not toggle ID bit, \n" |
143 | " jz 1f # Processor=80486 \n" |
144 | " movl $1,%0 # We have CPUID support \n" |
145 | "1: \n" |
146 | : "=m" (has_CPUID) |
147 | : |
148 | : "%eax" , "%ecx" |
149 | ); |
150 | #elif (defined(__GNUC__) || defined(__llvm__)) && defined(__x86_64__) |
151 | /* Technically, if this is being compiled under __x86_64__ then it has |
152 | CPUid by definition. But it's nice to be able to prove it. :) */ |
153 | __asm__ ( |
154 | " pushfq # Get original EFLAGS \n" |
155 | " popq %%rax \n" |
156 | " movq %%rax,%%rcx \n" |
157 | " xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n" |
158 | " pushq %%rax # Save new EFLAGS value on stack \n" |
159 | " popfq # Replace current EFLAGS value \n" |
160 | " pushfq # Get new EFLAGS \n" |
161 | " popq %%rax # Store new EFLAGS in EAX \n" |
162 | " xorl %%ecx,%%eax # Can not toggle ID bit, \n" |
163 | " jz 1f # Processor=80486 \n" |
164 | " movl $1,%0 # We have CPUID support \n" |
165 | "1: \n" |
166 | : "=m" (has_CPUID) |
167 | : |
168 | : "%rax" , "%rcx" |
169 | ); |
170 | #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__) |
171 | __asm { |
172 | pushfd ; Get original EFLAGS |
173 | pop eax |
174 | mov ecx, eax |
175 | xor eax, 200000h ; Flip ID bit in EFLAGS |
176 | push eax ; Save new EFLAGS value on stack |
177 | popfd ; Replace current EFLAGS value |
178 | pushfd ; Get new EFLAGS |
179 | pop eax ; Store new EFLAGS in EAX |
180 | xor eax, ecx ; Can not toggle ID bit, |
181 | jz done ; Processor=80486 |
182 | mov has_CPUID,1 ; We have CPUID support |
183 | done: |
184 | } |
185 | #elif defined(_MSC_VER) && defined(_M_X64) |
186 | has_CPUID = 1; |
187 | #elif defined(__sun) && defined(__i386) |
188 | __asm ( |
189 | " pushfl \n" |
190 | " popl %eax \n" |
191 | " movl %eax,%ecx \n" |
192 | " xorl $0x200000,%eax \n" |
193 | " pushl %eax \n" |
194 | " popfl \n" |
195 | " pushfl \n" |
196 | " popl %eax \n" |
197 | " xorl %ecx,%eax \n" |
198 | " jz 1f \n" |
199 | " movl $1,-8(%ebp) \n" |
200 | "1: \n" |
201 | ); |
202 | #elif defined(__sun) && defined(__amd64) |
203 | __asm ( |
204 | " pushfq \n" |
205 | " popq %rax \n" |
206 | " movq %rax,%rcx \n" |
207 | " xorl $0x200000,%eax \n" |
208 | " pushq %rax \n" |
209 | " popfq \n" |
210 | " pushfq \n" |
211 | " popq %rax \n" |
212 | " xorl %ecx,%eax \n" |
213 | " jz 1f \n" |
214 | " movl $1,-8(%rbp) \n" |
215 | "1: \n" |
216 | ); |
217 | #endif |
218 | #endif |
219 | /* *INDENT-ON* */ |
220 | return has_CPUID; |
221 | } |
222 | |
223 | #if (defined(__GNUC__) || defined(__llvm__)) && defined(__i386__) |
224 | #define cpuid(func, a, b, c, d) \ |
225 | __asm__ __volatile__ ( \ |
226 | " pushl %%ebx \n" \ |
227 | " xorl %%ecx,%%ecx \n" \ |
228 | " cpuid \n" \ |
229 | " movl %%ebx, %%esi \n" \ |
230 | " popl %%ebx \n" : \ |
231 | "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func)) |
232 | #elif (defined(__GNUC__) || defined(__llvm__)) && defined(__x86_64__) |
233 | #define cpuid(func, a, b, c, d) \ |
234 | __asm__ __volatile__ ( \ |
235 | " pushq %%rbx \n" \ |
236 | " xorq %%rcx,%%rcx \n" \ |
237 | " cpuid \n" \ |
238 | " movq %%rbx, %%rsi \n" \ |
239 | " popq %%rbx \n" : \ |
240 | "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func)) |
241 | #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__) |
242 | #define cpuid(func, a, b, c, d) \ |
243 | __asm { \ |
244 | __asm mov eax, func \ |
245 | __asm xor ecx, ecx \ |
246 | __asm cpuid \ |
247 | __asm mov a, eax \ |
248 | __asm mov b, ebx \ |
249 | __asm mov c, ecx \ |
250 | __asm mov d, edx \ |
251 | } |
252 | #elif defined(_MSC_VER) && defined(_M_X64) |
253 | #define cpuid(func, a, b, c, d) \ |
254 | { \ |
255 | int CPUInfo[4]; \ |
256 | __cpuid(CPUInfo, func); \ |
257 | a = CPUInfo[0]; \ |
258 | b = CPUInfo[1]; \ |
259 | c = CPUInfo[2]; \ |
260 | d = CPUInfo[3]; \ |
261 | } |
262 | #else |
263 | #define cpuid(func, a, b, c, d) \ |
264 | do { a = b = c = d = 0; (void) a; (void) b; (void) c; (void) d; } while (0) |
265 | #endif |
266 | |
267 | static int CPU_CPUIDFeatures[4]; |
268 | static int CPU_CPUIDMaxFunction = 0; |
269 | static SDL_bool CPU_OSSavesYMM = SDL_FALSE; |
270 | static SDL_bool CPU_OSSavesZMM = SDL_FALSE; |
271 | |
272 | static void |
273 | CPU_calcCPUIDFeatures(void) |
274 | { |
275 | static SDL_bool checked = SDL_FALSE; |
276 | if (!checked) { |
277 | checked = SDL_TRUE; |
278 | if (CPU_haveCPUID()) { |
279 | int a, b, c, d; |
280 | cpuid(0, a, b, c, d); |
281 | CPU_CPUIDMaxFunction = a; |
282 | if (CPU_CPUIDMaxFunction >= 1) { |
283 | cpuid(1, a, b, c, d); |
284 | CPU_CPUIDFeatures[0] = a; |
285 | CPU_CPUIDFeatures[1] = b; |
286 | CPU_CPUIDFeatures[2] = c; |
287 | CPU_CPUIDFeatures[3] = d; |
288 | |
289 | /* Check to make sure we can call xgetbv */ |
290 | if (c & 0x08000000) { |
291 | /* Call xgetbv to see if YMM (etc) register state is saved */ |
292 | #if (defined(__GNUC__) || defined(__llvm__)) && (defined(__i386__) || defined(__x86_64__)) |
293 | __asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx" ); |
294 | #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */ |
295 | a = (int)_xgetbv(0); |
296 | #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__) |
297 | __asm |
298 | { |
299 | xor ecx, ecx |
300 | _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 |
301 | mov a, eax |
302 | } |
303 | #endif |
304 | CPU_OSSavesYMM = ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE; |
305 | CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? SDL_TRUE : SDL_FALSE; |
306 | } |
307 | } |
308 | } |
309 | } |
310 | } |
311 | |
312 | static int |
313 | CPU_haveAltiVec(void) |
314 | { |
315 | volatile int altivec = 0; |
316 | #ifndef SDL_CPUINFO_DISABLED |
317 | #if (defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))) || (defined(__OpenBSD__) && defined(__powerpc__)) || (defined(__FreeBSD__) && defined(__powerpc__)) |
318 | #ifdef __OpenBSD__ |
319 | int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC }; |
320 | #elif defined(__FreeBSD__) |
321 | int selectors[2] = { CTL_HW, PPC_FEATURE_HAS_ALTIVEC }; |
322 | #else |
323 | int selectors[2] = { CTL_HW, HW_VECTORUNIT }; |
324 | #endif |
325 | int hasVectorUnit = 0; |
326 | size_t length = sizeof(hasVectorUnit); |
327 | int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0); |
328 | if (0 == error) |
329 | altivec = (hasVectorUnit != 0); |
330 | #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP |
331 | void (*handler) (int sig); |
332 | handler = signal(SIGILL, illegal_instruction); |
333 | if (setjmp(jmpbuf) == 0) { |
334 | asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0" ::"r" (-1)); |
335 | altivec = 1; |
336 | } |
337 | signal(SIGILL, handler); |
338 | #endif |
339 | #endif |
340 | return altivec; |
341 | } |
342 | |
343 | #if (defined(__ARM_ARCH) && (__ARM_ARCH >= 6)) || defined(__aarch64__) |
344 | static int |
345 | CPU_haveARMSIMD(void) |
346 | { |
347 | return 1; |
348 | } |
349 | |
350 | #elif !defined(__arm__) |
351 | static int |
352 | CPU_haveARMSIMD(void) |
353 | { |
354 | return 0; |
355 | } |
356 | |
357 | #elif defined(__LINUX__) |
358 | static int |
359 | CPU_haveARMSIMD(void) |
360 | { |
361 | int arm_simd = 0; |
362 | int fd; |
363 | |
364 | fd = open("/proc/self/auxv" , O_RDONLY); |
365 | if (fd >= 0) |
366 | { |
367 | Elf32_auxv_t aux; |
368 | while (read(fd, &aux, sizeof aux) == sizeof aux) |
369 | { |
370 | if (aux.a_type == AT_PLATFORM) |
371 | { |
372 | const char *plat = (const char *) aux.a_un.a_val; |
373 | if (plat) { |
374 | arm_simd = strncmp(plat, "v6l" , 3) == 0 || |
375 | strncmp(plat, "v7l" , 3) == 0; |
376 | } |
377 | } |
378 | } |
379 | close(fd); |
380 | } |
381 | return arm_simd; |
382 | } |
383 | |
384 | #elif defined(__RISCOS__) |
385 | static int |
386 | CPU_haveARMSIMD(void) |
387 | { |
388 | _kernel_swi_regs regs; |
389 | regs.r[0] = 0; |
390 | if (_kernel_swi(OS_PlatformFeatures, ®s, ®s) != NULL) |
391 | return 0; |
392 | |
393 | if (!(regs.r[0] & (1<<31))) |
394 | return 0; |
395 | |
396 | regs.r[0] = 34; |
397 | regs.r[1] = 29; |
398 | if (_kernel_swi(OS_PlatformFeatures, ®s, ®s) != NULL) |
399 | return 0; |
400 | |
401 | return regs.r[0]; |
402 | } |
403 | |
404 | #else |
405 | static int |
406 | CPU_haveARMSIMD(void) |
407 | { |
408 | #warning SDL_HasARMSIMD is not implemented for this ARM platform. Write me. |
409 | return 0; |
410 | } |
411 | #endif |
412 | |
413 | #if defined(__LINUX__) && defined(__arm__) && !defined(HAVE_GETAUXVAL) |
414 | static int |
415 | readProcAuxvForNeon(void) |
416 | { |
417 | int neon = 0; |
418 | int fd; |
419 | |
420 | fd = open("/proc/self/auxv" , O_RDONLY); |
421 | if (fd >= 0) |
422 | { |
423 | Elf32_auxv_t aux; |
424 | while (read(fd, &aux, sizeof (aux)) == sizeof (aux)) { |
425 | if (aux.a_type == AT_HWCAP) { |
426 | neon = (aux.a_un.a_val & HWCAP_NEON) == HWCAP_NEON; |
427 | break; |
428 | } |
429 | } |
430 | close(fd); |
431 | } |
432 | return neon; |
433 | } |
434 | #endif |
435 | |
436 | static int |
437 | CPU_haveNEON(void) |
438 | { |
439 | /* The way you detect NEON is a privileged instruction on ARM, so you have |
440 | query the OS kernel in a platform-specific way. :/ */ |
441 | #if defined(SDL_CPUINFO_DISABLED) |
442 | return 0; /* disabled */ |
443 | #elif (defined(__WINDOWS__) || defined(__WINRT__)) && (defined(_M_ARM) || defined(_M_ARM64)) |
444 | /* Visual Studio, for ARM, doesn't define __ARM_ARCH. Handle this first. */ |
445 | /* Seems to have been removed */ |
446 | # if !defined(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) |
447 | # define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19 |
448 | # endif |
449 | /* All WinRT ARM devices are required to support NEON, but just in case. */ |
450 | return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != 0; |
451 | #elif (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || defined(__aarch64__) |
452 | return 1; /* ARMv8 always has non-optional NEON support. */ |
453 | #elif __VITA__ |
454 | return 1; |
455 | #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7) |
456 | /* (note that sysctlbyname("hw.optional.neon") doesn't work!) */ |
457 | return 1; /* all Apple ARMv7 chips and later have NEON. */ |
458 | #elif defined(__APPLE__) |
459 | return 0; /* assume anything else from Apple doesn't have NEON. */ |
460 | #elif !defined(__arm__) |
461 | return 0; /* not an ARM CPU at all. */ |
462 | #elif defined(__OpenBSD__) |
463 | return 1; /* OpenBSD only supports ARMv7 CPUs that have NEON. */ |
464 | #elif defined(HAVE_ELF_AUX_INFO) |
465 | unsigned long hasneon = 0; |
466 | if (elf_aux_info(AT_HWCAP, (void *)&hasneon, (int)sizeof(hasneon)) != 0) |
467 | return 0; |
468 | return ((hasneon & HWCAP_NEON) == HWCAP_NEON); |
469 | #elif defined(__QNXNTO__) |
470 | return SYSPAGE_ENTRY(cpuinfo)->flags & ARM_CPU_FLAG_NEON; |
471 | #elif (defined(__LINUX__) || defined(__ANDROID__)) && defined(HAVE_GETAUXVAL) |
472 | return ((getauxval(AT_HWCAP) & HWCAP_NEON) == HWCAP_NEON); |
473 | #elif defined(__LINUX__) |
474 | return readProcAuxvForNeon(); |
475 | #elif defined(__ANDROID__) |
476 | /* Use NDK cpufeatures to read either /proc/self/auxv or /proc/cpuinfo */ |
477 | { |
478 | AndroidCpuFamily cpu_family = android_getCpuFamily(); |
479 | if (cpu_family == ANDROID_CPU_FAMILY_ARM) { |
480 | uint64_t cpu_features = android_getCpuFeatures(); |
481 | if ((cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0) { |
482 | return 1; |
483 | } |
484 | } |
485 | return 0; |
486 | } |
487 | #elif defined(__RISCOS__) |
488 | /* Use the VFPSupport_Features SWI to access the MVFR registers */ |
489 | { |
490 | _kernel_swi_regs regs; |
491 | regs.r[0] = 0; |
492 | if (_kernel_swi(VFPSupport_Features, ®s, ®s) == NULL) { |
493 | if ((regs.r[2] & 0xFFF000) == 0x111000) { |
494 | return 1; |
495 | } |
496 | } |
497 | return 0; |
498 | } |
499 | #else |
500 | #warning SDL_HasNEON is not implemented for this ARM platform. Write me. |
501 | return 0; |
502 | #endif |
503 | } |
504 | |
505 | #if defined(__e2k__) |
506 | inline int |
507 | CPU_have3DNow(void) |
508 | { |
509 | #if defined(__3dNOW__) |
510 | return 1; |
511 | #else |
512 | return 0; |
513 | #endif |
514 | } |
515 | #else |
516 | static int |
517 | CPU_have3DNow(void) |
518 | { |
519 | if (CPU_CPUIDMaxFunction > 0) { /* that is, do we have CPUID at all? */ |
520 | int a, b, c, d; |
521 | cpuid(0x80000000, a, b, c, d); |
522 | if (a >= 0x80000001) { |
523 | cpuid(0x80000001, a, b, c, d); |
524 | return (d & 0x80000000); |
525 | } |
526 | } |
527 | return 0; |
528 | } |
529 | #endif |
530 | |
531 | #if defined(__e2k__) |
532 | #define CPU_haveRDTSC() (0) |
533 | #if defined(__MMX__) |
534 | #define CPU_haveMMX() (1) |
535 | #else |
536 | #define CPU_haveMMX() (0) |
537 | #endif |
538 | #if defined(__SSE__) |
539 | #define CPU_haveSSE() (1) |
540 | #else |
541 | #define CPU_haveSSE() (0) |
542 | #endif |
543 | #if defined(__SSE2__) |
544 | #define CPU_haveSSE2() (1) |
545 | #else |
546 | #define CPU_haveSSE2() (0) |
547 | #endif |
548 | #if defined(__SSE3__) |
549 | #define CPU_haveSSE3() (1) |
550 | #else |
551 | #define CPU_haveSSE3() (0) |
552 | #endif |
553 | #if defined(__SSE4_1__) |
554 | #define CPU_haveSSE41() (1) |
555 | #else |
556 | #define CPU_haveSSE41() (0) |
557 | #endif |
558 | #if defined(__SSE4_2__) |
559 | #define CPU_haveSSE42() (1) |
560 | #else |
561 | #define CPU_haveSSE42() (0) |
562 | #endif |
563 | #if defined(__AVX__) |
564 | #define CPU_haveAVX() (1) |
565 | #else |
566 | #define CPU_haveAVX() (0) |
567 | #endif |
568 | #else |
569 | #define CPU_haveRDTSC() (CPU_CPUIDFeatures[3] & 0x00000010) |
570 | #define CPU_haveMMX() (CPU_CPUIDFeatures[3] & 0x00800000) |
571 | #define CPU_haveSSE() (CPU_CPUIDFeatures[3] & 0x02000000) |
572 | #define CPU_haveSSE2() (CPU_CPUIDFeatures[3] & 0x04000000) |
573 | #define CPU_haveSSE3() (CPU_CPUIDFeatures[2] & 0x00000001) |
574 | #define CPU_haveSSE41() (CPU_CPUIDFeatures[2] & 0x00080000) |
575 | #define CPU_haveSSE42() (CPU_CPUIDFeatures[2] & 0x00100000) |
576 | #define CPU_haveAVX() (CPU_OSSavesYMM && (CPU_CPUIDFeatures[2] & 0x10000000)) |
577 | #endif |
578 | |
579 | #if defined(__e2k__) |
580 | inline int |
581 | CPU_haveAVX2(void) |
582 | { |
583 | #if defined(__AVX2__) |
584 | return 1; |
585 | #else |
586 | return 0; |
587 | #endif |
588 | } |
589 | #else |
590 | static int |
591 | CPU_haveAVX2(void) |
592 | { |
593 | if (CPU_OSSavesYMM && (CPU_CPUIDMaxFunction >= 7)) { |
594 | int a, b, c, d; |
595 | (void) a; (void) b; (void) c; (void) d; /* compiler warnings... */ |
596 | cpuid(7, a, b, c, d); |
597 | return (b & 0x00000020); |
598 | } |
599 | return 0; |
600 | } |
601 | #endif |
602 | |
603 | #if defined(__e2k__) |
604 | inline int |
605 | CPU_haveAVX512F(void) |
606 | { |
607 | return 0; |
608 | } |
609 | #else |
610 | static int |
611 | CPU_haveAVX512F(void) |
612 | { |
613 | if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) { |
614 | int a, b, c, d; |
615 | (void) a; (void) b; (void) c; (void) d; /* compiler warnings... */ |
616 | cpuid(7, a, b, c, d); |
617 | return (b & 0x00010000); |
618 | } |
619 | return 0; |
620 | } |
621 | #endif |
622 | |
623 | static int SDL_CPUCount = 0; |
624 | |
625 | int |
626 | SDL_GetCPUCount(void) |
627 | { |
628 | if (!SDL_CPUCount) { |
629 | #ifndef SDL_CPUINFO_DISABLED |
630 | #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) |
631 | if (SDL_CPUCount <= 0) { |
632 | SDL_CPUCount = (int)sysconf(_SC_NPROCESSORS_ONLN); |
633 | } |
634 | #endif |
635 | #ifdef HAVE_SYSCTLBYNAME |
636 | if (SDL_CPUCount <= 0) { |
637 | size_t size = sizeof(SDL_CPUCount); |
638 | sysctlbyname("hw.ncpu" , &SDL_CPUCount, &size, NULL, 0); |
639 | } |
640 | #endif |
641 | #ifdef __WIN32__ |
642 | if (SDL_CPUCount <= 0) { |
643 | SYSTEM_INFO info; |
644 | GetSystemInfo(&info); |
645 | SDL_CPUCount = info.dwNumberOfProcessors; |
646 | } |
647 | #endif |
648 | #ifdef __OS2__ |
649 | if (SDL_CPUCount <= 0) { |
650 | DosQuerySysInfo(QSV_NUMPROCESSORS, QSV_NUMPROCESSORS, |
651 | &SDL_CPUCount, sizeof(SDL_CPUCount) ); |
652 | } |
653 | #endif |
654 | #endif |
655 | /* There has to be at least 1, right? :) */ |
656 | if (SDL_CPUCount <= 0) { |
657 | SDL_CPUCount = 1; |
658 | } |
659 | } |
660 | return SDL_CPUCount; |
661 | } |
662 | |
663 | #if defined(__e2k__) |
664 | inline const char * |
665 | SDL_GetCPUType(void) |
666 | { |
667 | static char SDL_CPUType[13]; |
668 | |
669 | SDL_strlcpy(SDL_CPUType, "E2K MACHINE" , sizeof(SDL_CPUType)); |
670 | |
671 | return SDL_CPUType; |
672 | } |
673 | #else |
674 | /* Oh, such a sweet sweet trick, just not very useful. :) */ |
675 | static const char * |
676 | SDL_GetCPUType(void) |
677 | { |
678 | static char SDL_CPUType[13]; |
679 | |
680 | if (!SDL_CPUType[0]) { |
681 | int i = 0; |
682 | |
683 | CPU_calcCPUIDFeatures(); |
684 | if (CPU_CPUIDMaxFunction > 0) { /* do we have CPUID at all? */ |
685 | int a, b, c, d; |
686 | cpuid(0x00000000, a, b, c, d); |
687 | (void) a; |
688 | SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8; |
689 | SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8; |
690 | SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8; |
691 | SDL_CPUType[i++] = (char)(b & 0xff); |
692 | |
693 | SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8; |
694 | SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8; |
695 | SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8; |
696 | SDL_CPUType[i++] = (char)(d & 0xff); |
697 | |
698 | SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8; |
699 | SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8; |
700 | SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8; |
701 | SDL_CPUType[i++] = (char)(c & 0xff); |
702 | } |
703 | if (!SDL_CPUType[0]) { |
704 | SDL_strlcpy(SDL_CPUType, "Unknown" , sizeof(SDL_CPUType)); |
705 | } |
706 | } |
707 | return SDL_CPUType; |
708 | } |
709 | #endif |
710 | |
711 | |
712 | #ifdef TEST_MAIN /* !!! FIXME: only used for test at the moment. */ |
713 | #if defined(__e2k__) |
714 | inline const char * |
715 | SDL_GetCPUName(void) |
716 | { |
717 | static char SDL_CPUName[48]; |
718 | |
719 | SDL_strlcpy(SDL_CPUName, __builtin_cpu_name(), sizeof(SDL_CPUName)); |
720 | |
721 | return SDL_CPUName; |
722 | } |
723 | #else |
724 | static const char * |
725 | SDL_GetCPUName(void) |
726 | { |
727 | static char SDL_CPUName[48]; |
728 | |
729 | if (!SDL_CPUName[0]) { |
730 | int i = 0; |
731 | int a, b, c, d; |
732 | |
733 | CPU_calcCPUIDFeatures(); |
734 | if (CPU_CPUIDMaxFunction > 0) { /* do we have CPUID at all? */ |
735 | cpuid(0x80000000, a, b, c, d); |
736 | if (a >= 0x80000004) { |
737 | cpuid(0x80000002, a, b, c, d); |
738 | SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; |
739 | SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; |
740 | SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; |
741 | SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; |
742 | SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; |
743 | SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; |
744 | SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; |
745 | SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; |
746 | SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; |
747 | SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; |
748 | SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; |
749 | SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; |
750 | SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; |
751 | SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; |
752 | SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; |
753 | SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; |
754 | cpuid(0x80000003, a, b, c, d); |
755 | SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; |
756 | SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; |
757 | SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; |
758 | SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; |
759 | SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; |
760 | SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; |
761 | SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; |
762 | SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; |
763 | SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; |
764 | SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; |
765 | SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; |
766 | SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; |
767 | SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; |
768 | SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; |
769 | SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; |
770 | SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; |
771 | cpuid(0x80000004, a, b, c, d); |
772 | SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; |
773 | SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; |
774 | SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; |
775 | SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8; |
776 | SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; |
777 | SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; |
778 | SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; |
779 | SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8; |
780 | SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; |
781 | SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; |
782 | SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; |
783 | SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8; |
784 | SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; |
785 | SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; |
786 | SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; |
787 | SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8; |
788 | } |
789 | } |
790 | if (!SDL_CPUName[0]) { |
791 | SDL_strlcpy(SDL_CPUName, "Unknown" , sizeof(SDL_CPUName)); |
792 | } |
793 | } |
794 | return SDL_CPUName; |
795 | } |
796 | #endif |
797 | #endif |
798 | |
799 | int |
800 | SDL_GetCPUCacheLineSize(void) |
801 | { |
802 | const char *cpuType = SDL_GetCPUType(); |
803 | int a, b, c, d; |
804 | (void) a; (void) b; (void) c; (void) d; |
805 | if (SDL_strcmp(cpuType, "GenuineIntel" ) == 0 || SDL_strcmp(cpuType, "CentaurHauls" ) == 0 || SDL_strcmp(cpuType, " Shanghai " ) == 0) { |
806 | cpuid(0x00000001, a, b, c, d); |
807 | return (((b >> 8) & 0xff) * 8); |
808 | } else if (SDL_strcmp(cpuType, "AuthenticAMD" ) == 0 || SDL_strcmp(cpuType, "HygonGenuine" ) == 0) { |
809 | cpuid(0x80000005, a, b, c, d); |
810 | return (c & 0xff); |
811 | } else { |
812 | /* Just make a guess here... */ |
813 | return SDL_CACHELINE_SIZE; |
814 | } |
815 | } |
816 | |
817 | static Uint32 SDL_CPUFeatures = 0xFFFFFFFF; |
818 | static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF; |
819 | |
820 | static Uint32 |
821 | SDL_GetCPUFeatures(void) |
822 | { |
823 | if (SDL_CPUFeatures == 0xFFFFFFFF) { |
824 | CPU_calcCPUIDFeatures(); |
825 | SDL_CPUFeatures = 0; |
826 | SDL_SIMDAlignment = sizeof(void *); /* a good safe base value */ |
827 | if (CPU_haveRDTSC()) { |
828 | SDL_CPUFeatures |= CPU_HAS_RDTSC; |
829 | } |
830 | if (CPU_haveAltiVec()) { |
831 | SDL_CPUFeatures |= CPU_HAS_ALTIVEC; |
832 | SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); |
833 | } |
834 | if (CPU_haveMMX()) { |
835 | SDL_CPUFeatures |= CPU_HAS_MMX; |
836 | SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8); |
837 | } |
838 | if (CPU_have3DNow()) { |
839 | SDL_CPUFeatures |= CPU_HAS_3DNOW; |
840 | SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8); |
841 | } |
842 | if (CPU_haveSSE()) { |
843 | SDL_CPUFeatures |= CPU_HAS_SSE; |
844 | SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); |
845 | } |
846 | if (CPU_haveSSE2()) { |
847 | SDL_CPUFeatures |= CPU_HAS_SSE2; |
848 | SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); |
849 | } |
850 | if (CPU_haveSSE3()) { |
851 | SDL_CPUFeatures |= CPU_HAS_SSE3; |
852 | SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); |
853 | } |
854 | if (CPU_haveSSE41()) { |
855 | SDL_CPUFeatures |= CPU_HAS_SSE41; |
856 | SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); |
857 | } |
858 | if (CPU_haveSSE42()) { |
859 | SDL_CPUFeatures |= CPU_HAS_SSE42; |
860 | SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); |
861 | } |
862 | if (CPU_haveAVX()) { |
863 | SDL_CPUFeatures |= CPU_HAS_AVX; |
864 | SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32); |
865 | } |
866 | if (CPU_haveAVX2()) { |
867 | SDL_CPUFeatures |= CPU_HAS_AVX2; |
868 | SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32); |
869 | } |
870 | if (CPU_haveAVX512F()) { |
871 | SDL_CPUFeatures |= CPU_HAS_AVX512F; |
872 | SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64); |
873 | } |
874 | if (CPU_haveARMSIMD()) { |
875 | SDL_CPUFeatures |= CPU_HAS_ARM_SIMD; |
876 | SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); |
877 | } |
878 | if (CPU_haveNEON()) { |
879 | SDL_CPUFeatures |= CPU_HAS_NEON; |
880 | SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16); |
881 | } |
882 | } |
883 | return SDL_CPUFeatures; |
884 | } |
885 | |
886 | #define CPU_FEATURE_AVAILABLE(f) ((SDL_GetCPUFeatures() & f) ? SDL_TRUE : SDL_FALSE) |
887 | |
888 | SDL_bool SDL_HasRDTSC(void) |
889 | { |
890 | return CPU_FEATURE_AVAILABLE(CPU_HAS_RDTSC); |
891 | } |
892 | |
893 | SDL_bool |
894 | SDL_HasAltiVec(void) |
895 | { |
896 | return CPU_FEATURE_AVAILABLE(CPU_HAS_ALTIVEC); |
897 | } |
898 | |
899 | SDL_bool |
900 | SDL_HasMMX(void) |
901 | { |
902 | return CPU_FEATURE_AVAILABLE(CPU_HAS_MMX); |
903 | } |
904 | |
905 | SDL_bool |
906 | SDL_Has3DNow(void) |
907 | { |
908 | return CPU_FEATURE_AVAILABLE(CPU_HAS_3DNOW); |
909 | } |
910 | |
911 | SDL_bool |
912 | SDL_HasSSE(void) |
913 | { |
914 | return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE); |
915 | } |
916 | |
917 | SDL_bool |
918 | SDL_HasSSE2(void) |
919 | { |
920 | return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE2); |
921 | } |
922 | |
923 | SDL_bool |
924 | SDL_HasSSE3(void) |
925 | { |
926 | return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE3); |
927 | } |
928 | |
929 | SDL_bool |
930 | SDL_HasSSE41(void) |
931 | { |
932 | return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE41); |
933 | } |
934 | |
935 | SDL_bool |
936 | SDL_HasSSE42(void) |
937 | { |
938 | return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE42); |
939 | } |
940 | |
941 | SDL_bool |
942 | SDL_HasAVX(void) |
943 | { |
944 | return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX); |
945 | } |
946 | |
947 | SDL_bool |
948 | SDL_HasAVX2(void) |
949 | { |
950 | return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX2); |
951 | } |
952 | |
953 | SDL_bool |
954 | SDL_HasAVX512F(void) |
955 | { |
956 | return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F); |
957 | } |
958 | |
959 | SDL_bool |
960 | SDL_HasARMSIMD(void) |
961 | { |
962 | return CPU_FEATURE_AVAILABLE(CPU_HAS_ARM_SIMD); |
963 | } |
964 | |
965 | SDL_bool |
966 | SDL_HasNEON(void) |
967 | { |
968 | return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON); |
969 | } |
970 | |
971 | static int SDL_SystemRAM = 0; |
972 | |
973 | int |
974 | SDL_GetSystemRAM(void) |
975 | { |
976 | if (!SDL_SystemRAM) { |
977 | #ifndef SDL_CPUINFO_DISABLED |
978 | #if defined(HAVE_SYSCONF) && defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE) |
979 | if (SDL_SystemRAM <= 0) { |
980 | SDL_SystemRAM = (int)((Sint64)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE) / (1024*1024)); |
981 | } |
982 | #endif |
983 | #ifdef HAVE_SYSCTLBYNAME |
984 | if (SDL_SystemRAM <= 0) { |
985 | #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__DragonFly__) |
986 | #ifdef HW_REALMEM |
987 | int mib[2] = {CTL_HW, HW_REALMEM}; |
988 | #else |
989 | /* might only report up to 2 GiB */ |
990 | int mib[2] = {CTL_HW, HW_PHYSMEM}; |
991 | #endif /* HW_REALMEM */ |
992 | #else |
993 | int mib[2] = {CTL_HW, HW_MEMSIZE}; |
994 | #endif /* __FreeBSD__ || __FreeBSD_kernel__ */ |
995 | Uint64 memsize = 0; |
996 | size_t len = sizeof(memsize); |
997 | |
998 | if (sysctl(mib, 2, &memsize, &len, NULL, 0) == 0) { |
999 | SDL_SystemRAM = (int)(memsize / (1024*1024)); |
1000 | } |
1001 | } |
1002 | #endif |
1003 | #ifdef __WIN32__ |
1004 | if (SDL_SystemRAM <= 0) { |
1005 | MEMORYSTATUSEX stat; |
1006 | stat.dwLength = sizeof(stat); |
1007 | if (GlobalMemoryStatusEx(&stat)) { |
1008 | SDL_SystemRAM = (int)(stat.ullTotalPhys / (1024 * 1024)); |
1009 | } |
1010 | } |
1011 | #endif |
1012 | #ifdef __OS2__ |
1013 | if (SDL_SystemRAM <= 0) { |
1014 | Uint32 sysram = 0; |
1015 | DosQuerySysInfo(QSV_TOTPHYSMEM, QSV_TOTPHYSMEM, &sysram, 4); |
1016 | SDL_SystemRAM = (int) (sysram / 0x100000U); |
1017 | } |
1018 | #endif |
1019 | #ifdef __RISCOS__ |
1020 | if (SDL_SystemRAM <= 0) { |
1021 | _kernel_swi_regs regs; |
1022 | regs.r[0] = 0x108; |
1023 | if (_kernel_swi(OS_Memory, ®s, ®s) == NULL) { |
1024 | SDL_SystemRAM = (int)(regs.r[1] * regs.r[2] / (1024 * 1024)); |
1025 | } |
1026 | } |
1027 | #endif |
1028 | #endif |
1029 | } |
1030 | return SDL_SystemRAM; |
1031 | } |
1032 | |
1033 | |
1034 | size_t |
1035 | SDL_SIMDGetAlignment(void) |
1036 | { |
1037 | if (SDL_SIMDAlignment == 0xFFFFFFFF) { |
1038 | SDL_GetCPUFeatures(); /* make sure this has been calculated */ |
1039 | } |
1040 | SDL_assert(SDL_SIMDAlignment != 0); |
1041 | return SDL_SIMDAlignment; |
1042 | } |
1043 | |
1044 | void * |
1045 | SDL_SIMDAlloc(const size_t len) |
1046 | { |
1047 | const size_t alignment = SDL_SIMDGetAlignment(); |
1048 | const size_t padding = alignment - (len % alignment); |
1049 | const size_t padded = (padding != alignment) ? (len + padding) : len; |
1050 | Uint8 *retval = NULL; |
1051 | Uint8 *ptr = (Uint8 *) SDL_malloc(padded + alignment + sizeof (void *)); |
1052 | if (ptr) { |
1053 | /* store the actual malloc pointer right before our aligned pointer. */ |
1054 | retval = ptr + sizeof (void *); |
1055 | retval += alignment - (((size_t) retval) % alignment); |
1056 | *(((void **) retval) - 1) = ptr; |
1057 | } |
1058 | return retval; |
1059 | } |
1060 | |
1061 | void * |
1062 | SDL_SIMDRealloc(void *mem, const size_t len) |
1063 | { |
1064 | const size_t alignment = SDL_SIMDGetAlignment(); |
1065 | const size_t padding = alignment - (len % alignment); |
1066 | const size_t padded = (padding != alignment) ? (len + padding) : len; |
1067 | Uint8 *retval = (Uint8*) mem; |
1068 | void *oldmem = mem; |
1069 | size_t memdiff = 0, ptrdiff; |
1070 | Uint8 *ptr; |
1071 | |
1072 | if (mem) { |
1073 | void **realptr = (void **) mem; |
1074 | realptr--; |
1075 | mem = *(((void **) mem) - 1); |
1076 | |
1077 | /* Check the delta between the real pointer and user pointer */ |
1078 | memdiff = ((size_t) oldmem) - ((size_t) mem); |
1079 | } |
1080 | |
1081 | ptr = (Uint8 *) SDL_realloc(mem, padded + alignment + sizeof (void *)); |
1082 | |
1083 | if (ptr == mem) { |
1084 | return retval; /* Pointer didn't change, nothing to do */ |
1085 | } |
1086 | if (ptr == NULL) { |
1087 | return NULL; /* Out of memory, bail! */ |
1088 | } |
1089 | |
1090 | /* Store the actual malloc pointer right before our aligned pointer. */ |
1091 | retval = ptr + sizeof (void *); |
1092 | retval += alignment - (((size_t) retval) % alignment); |
1093 | |
1094 | /* Make sure the delta is the same! */ |
1095 | if (mem) { |
1096 | ptrdiff = ((size_t) retval) - ((size_t) ptr); |
1097 | if (memdiff != ptrdiff) { /* Delta has changed, copy to new offset! */ |
1098 | oldmem = (void*) (((size_t) ptr) + memdiff); |
1099 | |
1100 | /* Even though the data past the old `len` is undefined, this is the |
1101 | * only length value we have, and it guarantees that we copy all the |
1102 | * previous memory anyhow. |
1103 | */ |
1104 | SDL_memmove(retval, oldmem, len); |
1105 | } |
1106 | } |
1107 | |
1108 | /* Actually store the malloc pointer, finally. */ |
1109 | *(((void **) retval) - 1) = ptr; |
1110 | return retval; |
1111 | } |
1112 | |
1113 | void |
1114 | SDL_SIMDFree(void *ptr) |
1115 | { |
1116 | if (ptr) { |
1117 | void **realptr = (void **) ptr; |
1118 | realptr--; |
1119 | SDL_free(*(((void **) ptr) - 1)); |
1120 | } |
1121 | } |
1122 | |
1123 | |
1124 | #ifdef TEST_MAIN |
1125 | |
1126 | #include <stdio.h> |
1127 | |
1128 | int |
1129 | main() |
1130 | { |
1131 | printf("CPU count: %d\n" , SDL_GetCPUCount()); |
1132 | printf("CPU type: %s\n" , SDL_GetCPUType()); |
1133 | printf("CPU name: %s\n" , SDL_GetCPUName()); |
1134 | printf("CacheLine size: %d\n" , SDL_GetCPUCacheLineSize()); |
1135 | printf("RDTSC: %d\n" , SDL_HasRDTSC()); |
1136 | printf("Altivec: %d\n" , SDL_HasAltiVec()); |
1137 | printf("MMX: %d\n" , SDL_HasMMX()); |
1138 | printf("3DNow: %d\n" , SDL_Has3DNow()); |
1139 | printf("SSE: %d\n" , SDL_HasSSE()); |
1140 | printf("SSE2: %d\n" , SDL_HasSSE2()); |
1141 | printf("SSE3: %d\n" , SDL_HasSSE3()); |
1142 | printf("SSE4.1: %d\n" , SDL_HasSSE41()); |
1143 | printf("SSE4.2: %d\n" , SDL_HasSSE42()); |
1144 | printf("AVX: %d\n" , SDL_HasAVX()); |
1145 | printf("AVX2: %d\n" , SDL_HasAVX2()); |
1146 | printf("AVX-512F: %d\n" , SDL_HasAVX512F()); |
1147 | printf("ARM SIMD: %d\n" , SDL_HasARMSIMD()); |
1148 | printf("NEON: %d\n" , SDL_HasNEON()); |
1149 | printf("RAM: %d MB\n" , SDL_GetSystemRAM()); |
1150 | return 0; |
1151 | } |
1152 | |
1153 | #endif /* TEST_MAIN */ |
1154 | |
1155 | /* vi: set ts=4 sw=4 expandtab: */ |
1156 | |