1 | // Copyright 2011 Google Inc. All Rights Reserved. |
2 | // |
3 | // Use of this source code is governed by a BSD-style license |
4 | // that can be found in the COPYING file in the root of the source |
5 | // tree. An additional intellectual property rights grant can be found |
6 | // in the file PATENTS. All contributing project authors may |
7 | // be found in the AUTHORS file in the root of the source tree. |
8 | // ----------------------------------------------------------------------------- |
9 | // |
10 | // CPU detection |
11 | // |
12 | // Author: Christian Duvivier (cduvivier@google.com) |
13 | |
14 | #include "src/dsp/cpu.h" |
15 | |
16 | #if defined(WEBP_HAVE_NEON_RTCD) |
17 | #include <stdio.h> |
18 | #include <string.h> |
19 | #endif |
20 | |
21 | #if defined(WEBP_ANDROID_NEON) |
22 | #include <cpu-features.h> |
23 | #endif |
24 | |
25 | //------------------------------------------------------------------------------ |
26 | // SSE2 detection. |
27 | // |
28 | |
29 | // apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC. |
30 | #if (defined(__pic__) || defined(__PIC__)) && defined(__i386__) |
31 | static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { |
32 | __asm__ volatile ( |
33 | "mov %%ebx, %%edi\n" |
34 | "cpuid\n" |
35 | "xchg %%edi, %%ebx\n" |
36 | : "=a" (cpu_info[0]), "=D" (cpu_info[1]), "=c" (cpu_info[2]), "=d" (cpu_info[3]) |
37 | : "a" (info_type), "c" (0)); |
38 | } |
39 | #elif defined(__x86_64__) && \ |
40 | (defined(__code_model_medium__) || defined(__code_model_large__)) && \ |
41 | defined(__PIC__) |
42 | static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { |
43 | __asm__ volatile ( |
44 | "xchg{q}\t{%%rbx}, %q1\n" |
45 | "cpuid\n" |
46 | "xchg{q}\t{%%rbx}, %q1\n" |
47 | : "=a" (cpu_info[0]), "=&r" (cpu_info[1]), "=c" (cpu_info[2]), |
48 | "=d" (cpu_info[3]) |
49 | : "a" (info_type), "c" (0)); |
50 | } |
51 | #elif defined(__i386__) || defined(__x86_64__) |
52 | static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { |
53 | __asm__ volatile ( |
54 | "cpuid\n" |
55 | : "=a" (cpu_info[0]), "=b" (cpu_info[1]), "=c" (cpu_info[2]), "=d" (cpu_info[3]) |
56 | : "a" (info_type), "c" (0)); |
57 | } |
58 | #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) |
59 | |
60 | #if defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1 |
61 | #include <intrin.h> |
62 | #define GetCPUInfo(info, type) __cpuidex(info, type, 0) // set ecx=0 |
63 | #define WEBP_HAVE_MSC_CPUID |
64 | #elif _MSC_VER > 1310 |
65 | #include <intrin.h> |
66 | #define GetCPUInfo __cpuid |
67 | #define WEBP_HAVE_MSC_CPUID |
68 | #endif |
69 | |
70 | #endif |
71 | |
72 | // NaCl has no support for xgetbv or the raw opcode. |
73 | #if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__)) |
74 | static WEBP_INLINE uint64_t xgetbv(void) { |
75 | const uint32_t ecx = 0; |
76 | uint32_t eax, edx; |
77 | // Use the raw opcode for xgetbv for compatibility with older toolchains. |
78 | __asm__ volatile ( |
79 | ".byte 0x0f, 0x01, 0xd0\n" |
80 | : "=a" (eax), "=d" (edx) : "c" (ecx)); |
81 | return ((uint64_t)edx << 32) | eax; |
82 | } |
83 | #elif (defined(_M_X64) || defined(_M_IX86)) && \ |
84 | defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1 |
85 | #include <immintrin.h> |
86 | #define xgetbv() _xgetbv(0) |
87 | #elif defined(_MSC_VER) && defined(_M_IX86) |
88 | static WEBP_INLINE uint64_t xgetbv(void) { |
89 | uint32_t eax_, edx_; |
90 | __asm { |
91 | xor ecx, ecx // ecx = 0 |
92 | // Use the raw opcode for xgetbv for compatibility with older toolchains. |
93 | __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 |
94 | mov eax_, eax |
95 | mov edx_, edx |
96 | } |
97 | return ((uint64_t)edx_ << 32) | eax_; |
98 | } |
99 | #else |
100 | #define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. |
101 | #endif |
102 | |
103 | #if defined(__i386__) || defined(__x86_64__) || defined(WEBP_HAVE_MSC_CPUID) |
104 | |
105 | // helper function for run-time detection of slow SSSE3 platforms |
106 | static int CheckSlowModel(int info) { |
107 | // Table listing display models with longer latencies for the bsr instruction |
108 | // (ie 2 cycles vs 10/16 cycles) and some SSSE3 instructions like pshufb. |
109 | // Refer to Intel 64 and IA-32 Architectures Optimization Reference Manual. |
110 | static const uint8_t kSlowModels[] = { |
111 | 0x37, 0x4a, 0x4d, // Silvermont Microarchitecture |
112 | 0x1c, 0x26, 0x27 // Atom Microarchitecture |
113 | }; |
114 | const uint32_t model = ((info & 0xf0000) >> 12) | ((info >> 4) & 0xf); |
115 | const uint32_t family = (info >> 8) & 0xf; |
116 | if (family == 0x06) { |
117 | size_t i; |
118 | for (i = 0; i < sizeof(kSlowModels) / sizeof(kSlowModels[0]); ++i) { |
119 | if (model == kSlowModels[i]) return 1; |
120 | } |
121 | } |
122 | return 0; |
123 | } |
124 | |
125 | static int x86CPUInfo(CPUFeature feature) { |
126 | int max_cpuid_value; |
127 | int cpu_info[4]; |
128 | int is_intel = 0; |
129 | |
130 | // get the highest feature value cpuid supports |
131 | GetCPUInfo(cpu_info, 0); |
132 | max_cpuid_value = cpu_info[0]; |
133 | if (max_cpuid_value < 1) { |
134 | return 0; |
135 | } else { |
136 | const int VENDOR_ID_INTEL_EBX = 0x756e6547; // uneG |
137 | const int VENDOR_ID_INTEL_EDX = 0x49656e69; // Ieni |
138 | const int VENDOR_ID_INTEL_ECX = 0x6c65746e; // letn |
139 | is_intel = (cpu_info[1] == VENDOR_ID_INTEL_EBX && |
140 | cpu_info[2] == VENDOR_ID_INTEL_ECX && |
141 | cpu_info[3] == VENDOR_ID_INTEL_EDX); // genuine Intel? |
142 | } |
143 | |
144 | GetCPUInfo(cpu_info, 1); |
145 | if (feature == kSSE2) { |
146 | return !!(cpu_info[3] & (1 << 26)); |
147 | } |
148 | if (feature == kSSE3) { |
149 | return !!(cpu_info[2] & (1 << 0)); |
150 | } |
151 | if (feature == kSlowSSSE3) { |
152 | if (is_intel && (cpu_info[2] & (1 << 9))) { // SSSE3? |
153 | return CheckSlowModel(cpu_info[0]); |
154 | } |
155 | return 0; |
156 | } |
157 | |
158 | if (feature == kSSE4_1) { |
159 | return !!(cpu_info[2] & (1 << 19)); |
160 | } |
161 | if (feature == kAVX) { |
162 | // bits 27 (OSXSAVE) & 28 (256-bit AVX) |
163 | if ((cpu_info[2] & 0x18000000) == 0x18000000) { |
164 | // XMM state and YMM state enabled by the OS. |
165 | return (xgetbv() & 0x6) == 0x6; |
166 | } |
167 | } |
168 | if (feature == kAVX2) { |
169 | if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) { |
170 | GetCPUInfo(cpu_info, 7); |
171 | return !!(cpu_info[1] & (1 << 5)); |
172 | } |
173 | } |
174 | return 0; |
175 | } |
176 | WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; |
177 | VP8CPUInfo VP8GetCPUInfo = x86CPUInfo; |
178 | #elif defined(WEBP_ANDROID_NEON) // NB: needs to be before generic NEON test. |
179 | static int AndroidCPUInfo(CPUFeature feature) { |
180 | const AndroidCpuFamily cpu_family = android_getCpuFamily(); |
181 | const uint64_t cpu_features = android_getCpuFeatures(); |
182 | if (feature == kNEON) { |
183 | return cpu_family == ANDROID_CPU_FAMILY_ARM && |
184 | (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0; |
185 | } |
186 | return 0; |
187 | } |
188 | WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; |
189 | VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo; |
190 | #elif defined(EMSCRIPTEN) // also needs to be before generic NEON test |
191 | // Use compile flags as an indicator of SIMD support instead of a runtime check. |
192 | static int wasmCPUInfo(CPUFeature feature) { |
193 | switch (feature) { |
194 | #ifdef WEBP_HAVE_SSE2 |
195 | case kSSE2: |
196 | return 1; |
197 | #endif |
198 | #ifdef WEBP_HAVE_SSE41 |
199 | case kSSE3: |
200 | case kSlowSSSE3: |
201 | case kSSE4_1: |
202 | return 1; |
203 | #endif |
204 | #ifdef WEBP_HAVE_NEON |
205 | case kNEON: |
206 | return 1; |
207 | #endif |
208 | default: |
209 | break; |
210 | } |
211 | return 0; |
212 | } |
213 | WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; |
214 | VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo; |
215 | #elif defined(WEBP_HAVE_NEON) |
216 | // In most cases this function doesn't check for NEON support (it's assumed by |
217 | // the configuration), but enables turning off NEON at runtime, for testing |
218 | // purposes, by setting VP8GetCPUInfo = NULL. |
219 | static int armCPUInfo(CPUFeature feature) { |
220 | if (feature != kNEON) return 0; |
221 | #if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD) |
222 | { |
223 | int has_neon = 0; |
224 | char line[200]; |
225 | FILE* const cpuinfo = fopen("/proc/cpuinfo" , "r" ); |
226 | if (cpuinfo == NULL) return 0; |
227 | while (fgets(line, sizeof(line), cpuinfo)) { |
228 | if (!strncmp(line, "Features" , 8)) { |
229 | if (strstr(line, " neon " ) != NULL) { |
230 | has_neon = 1; |
231 | break; |
232 | } |
233 | } |
234 | } |
235 | fclose(cpuinfo); |
236 | return has_neon; |
237 | } |
238 | #else |
239 | return 1; |
240 | #endif |
241 | } |
242 | WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; |
243 | VP8CPUInfo VP8GetCPUInfo = armCPUInfo; |
244 | #elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \ |
245 | defined(WEBP_USE_MSA) |
246 | static int mipsCPUInfo(CPUFeature feature) { |
247 | if ((feature == kMIPS32) || (feature == kMIPSdspR2) || (feature == kMSA)) { |
248 | return 1; |
249 | } else { |
250 | return 0; |
251 | } |
252 | |
253 | } |
254 | WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; |
255 | VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo; |
256 | #else |
257 | WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; |
258 | VP8CPUInfo VP8GetCPUInfo = NULL; |
259 | #endif |
260 | |