1// Copyright 2011 Google Inc. All Rights Reserved.
2//
3// Use of this source code is governed by a BSD-style license
4// that can be found in the COPYING file in the root of the source
5// tree. An additional intellectual property rights grant can be found
6// in the file PATENTS. All contributing project authors may
7// be found in the AUTHORS file in the root of the source tree.
8// -----------------------------------------------------------------------------
9//
10// CPU detection
11//
12// Author: Christian Duvivier (cduvivier@google.com)
13
14#include "src/dsp/cpu.h"
15
16#if defined(WEBP_HAVE_NEON_RTCD)
17#include <stdio.h>
18#include <string.h>
19#endif
20
21#if defined(WEBP_ANDROID_NEON)
22#include <cpu-features.h>
23#endif
24
25//------------------------------------------------------------------------------
26// SSE2 detection.
27//
28
29// apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
30#if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
31static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
32 __asm__ volatile (
33 "mov %%ebx, %%edi\n"
34 "cpuid\n"
35 "xchg %%edi, %%ebx\n"
36 : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
37 : "a"(info_type), "c"(0));
38}
39#elif defined(__x86_64__) && \
40 (defined(__code_model_medium__) || defined(__code_model_large__)) && \
41 defined(__PIC__)
42static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
43 __asm__ volatile (
44 "xchg{q}\t{%%rbx}, %q1\n"
45 "cpuid\n"
46 "xchg{q}\t{%%rbx}, %q1\n"
47 : "=a"(cpu_info[0]), "=&r"(cpu_info[1]), "=c"(cpu_info[2]),
48 "=d"(cpu_info[3])
49 : "a"(info_type), "c"(0));
50}
51#elif defined(__i386__) || defined(__x86_64__)
52static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
53 __asm__ volatile (
54 "cpuid\n"
55 : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
56 : "a"(info_type), "c"(0));
57}
58#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
59
60#if defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1
61#include <intrin.h>
62#define GetCPUInfo(info, type) __cpuidex(info, type, 0) // set ecx=0
63#define WEBP_HAVE_MSC_CPUID
64#elif _MSC_VER > 1310
65#include <intrin.h>
66#define GetCPUInfo __cpuid
67#define WEBP_HAVE_MSC_CPUID
68#endif
69
70#endif
71
72// NaCl has no support for xgetbv or the raw opcode.
73#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
74static WEBP_INLINE uint64_t xgetbv(void) {
75 const uint32_t ecx = 0;
76 uint32_t eax, edx;
77 // Use the raw opcode for xgetbv for compatibility with older toolchains.
78 __asm__ volatile (
79 ".byte 0x0f, 0x01, 0xd0\n"
80 : "=a"(eax), "=d"(edx) : "c" (ecx));
81 return ((uint64_t)edx << 32) | eax;
82}
83#elif (defined(_M_X64) || defined(_M_IX86)) && \
84 defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1
85#include <immintrin.h>
86#define xgetbv() _xgetbv(0)
87#elif defined(_MSC_VER) && defined(_M_IX86)
88static WEBP_INLINE uint64_t xgetbv(void) {
89 uint32_t eax_, edx_;
90 __asm {
91 xor ecx, ecx // ecx = 0
92 // Use the raw opcode for xgetbv for compatibility with older toolchains.
93 __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
94 mov eax_, eax
95 mov edx_, edx
96 }
97 return ((uint64_t)edx_ << 32) | eax_;
98}
99#else
100#define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains.
101#endif
102
103#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_HAVE_MSC_CPUID)
104
105// helper function for run-time detection of slow SSSE3 platforms
106static int CheckSlowModel(int info) {
107 // Table listing display models with longer latencies for the bsr instruction
108 // (ie 2 cycles vs 10/16 cycles) and some SSSE3 instructions like pshufb.
109 // Refer to Intel 64 and IA-32 Architectures Optimization Reference Manual.
110 static const uint8_t kSlowModels[] = {
111 0x37, 0x4a, 0x4d, // Silvermont Microarchitecture
112 0x1c, 0x26, 0x27 // Atom Microarchitecture
113 };
114 const uint32_t model = ((info & 0xf0000) >> 12) | ((info >> 4) & 0xf);
115 const uint32_t family = (info >> 8) & 0xf;
116 if (family == 0x06) {
117 size_t i;
118 for (i = 0; i < sizeof(kSlowModels) / sizeof(kSlowModels[0]); ++i) {
119 if (model == kSlowModels[i]) return 1;
120 }
121 }
122 return 0;
123}
124
125static int x86CPUInfo(CPUFeature feature) {
126 int max_cpuid_value;
127 int cpu_info[4];
128 int is_intel = 0;
129
130 // get the highest feature value cpuid supports
131 GetCPUInfo(cpu_info, 0);
132 max_cpuid_value = cpu_info[0];
133 if (max_cpuid_value < 1) {
134 return 0;
135 } else {
136 const int VENDOR_ID_INTEL_EBX = 0x756e6547; // uneG
137 const int VENDOR_ID_INTEL_EDX = 0x49656e69; // Ieni
138 const int VENDOR_ID_INTEL_ECX = 0x6c65746e; // letn
139 is_intel = (cpu_info[1] == VENDOR_ID_INTEL_EBX &&
140 cpu_info[2] == VENDOR_ID_INTEL_ECX &&
141 cpu_info[3] == VENDOR_ID_INTEL_EDX); // genuine Intel?
142 }
143
144 GetCPUInfo(cpu_info, 1);
145 if (feature == kSSE2) {
146 return !!(cpu_info[3] & (1 << 26));
147 }
148 if (feature == kSSE3) {
149 return !!(cpu_info[2] & (1 << 0));
150 }
151 if (feature == kSlowSSSE3) {
152 if (is_intel && (cpu_info[2] & (1 << 9))) { // SSSE3?
153 return CheckSlowModel(cpu_info[0]);
154 }
155 return 0;
156 }
157
158 if (feature == kSSE4_1) {
159 return !!(cpu_info[2] & (1 << 19));
160 }
161 if (feature == kAVX) {
162 // bits 27 (OSXSAVE) & 28 (256-bit AVX)
163 if ((cpu_info[2] & 0x18000000) == 0x18000000) {
164 // XMM state and YMM state enabled by the OS.
165 return (xgetbv() & 0x6) == 0x6;
166 }
167 }
168 if (feature == kAVX2) {
169 if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) {
170 GetCPUInfo(cpu_info, 7);
171 return !!(cpu_info[1] & (1 << 5));
172 }
173 }
174 return 0;
175}
176WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
177VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
178#elif defined(WEBP_ANDROID_NEON) // NB: needs to be before generic NEON test.
179static int AndroidCPUInfo(CPUFeature feature) {
180 const AndroidCpuFamily cpu_family = android_getCpuFamily();
181 const uint64_t cpu_features = android_getCpuFeatures();
182 if (feature == kNEON) {
183 return cpu_family == ANDROID_CPU_FAMILY_ARM &&
184 (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0;
185 }
186 return 0;
187}
188WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
189VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
190#elif defined(EMSCRIPTEN) // also needs to be before generic NEON test
191// Use compile flags as an indicator of SIMD support instead of a runtime check.
192static int wasmCPUInfo(CPUFeature feature) {
193 switch (feature) {
194#ifdef WEBP_HAVE_SSE2
195 case kSSE2:
196 return 1;
197#endif
198#ifdef WEBP_HAVE_SSE41
199 case kSSE3:
200 case kSlowSSSE3:
201 case kSSE4_1:
202 return 1;
203#endif
204#ifdef WEBP_HAVE_NEON
205 case kNEON:
206 return 1;
207#endif
208 default:
209 break;
210 }
211 return 0;
212}
213WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
214VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo;
215#elif defined(WEBP_HAVE_NEON)
216// In most cases this function doesn't check for NEON support (it's assumed by
217// the configuration), but enables turning off NEON at runtime, for testing
218// purposes, by setting VP8GetCPUInfo = NULL.
219static int armCPUInfo(CPUFeature feature) {
220 if (feature != kNEON) return 0;
221#if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD)
222 {
223 int has_neon = 0;
224 char line[200];
225 FILE* const cpuinfo = fopen("/proc/cpuinfo", "r");
226 if (cpuinfo == NULL) return 0;
227 while (fgets(line, sizeof(line), cpuinfo)) {
228 if (!strncmp(line, "Features", 8)) {
229 if (strstr(line, " neon ") != NULL) {
230 has_neon = 1;
231 break;
232 }
233 }
234 }
235 fclose(cpuinfo);
236 return has_neon;
237 }
238#else
239 return 1;
240#endif
241}
242WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
243VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
244#elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \
245 defined(WEBP_USE_MSA)
246static int mipsCPUInfo(CPUFeature feature) {
247 if ((feature == kMIPS32) || (feature == kMIPSdspR2) || (feature == kMSA)) {
248 return 1;
249 } else {
250 return 0;
251 }
252
253}
254WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
255VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
256#else
257WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
258VP8CPUInfo VP8GetCPUInfo = NULL;
259#endif
260