| 1 | // Copyright 2011 Google Inc. All Rights Reserved. | 
|---|
| 2 | // | 
|---|
| 3 | // Use of this source code is governed by a BSD-style license | 
|---|
| 4 | // that can be found in the COPYING file in the root of the source | 
|---|
| 5 | // tree. An additional intellectual property rights grant can be found | 
|---|
| 6 | // in the file PATENTS. All contributing project authors may | 
|---|
| 7 | // be found in the AUTHORS file in the root of the source tree. | 
|---|
| 8 | // ----------------------------------------------------------------------------- | 
|---|
| 9 | // | 
|---|
| 10 | // CPU detection | 
|---|
| 11 | // | 
|---|
| 12 | // Author: Christian Duvivier (cduvivier@google.com) | 
|---|
| 13 |  | 
|---|
| 14 | #include "./dsp.h" | 
|---|
| 15 |  | 
|---|
| 16 | #if defined(WEBP_HAVE_NEON_RTCD) | 
|---|
| 17 | #include <stdio.h> | 
|---|
| 18 | #include <string.h> | 
|---|
| 19 | #endif | 
|---|
| 20 |  | 
|---|
| 21 | #if defined(WEBP_ANDROID_NEON) | 
|---|
| 22 | #include <cpu-features.h> | 
|---|
| 23 | #endif | 
|---|
| 24 |  | 
|---|
| 25 | //------------------------------------------------------------------------------ | 
|---|
| 26 | // SSE2 detection. | 
|---|
| 27 | // | 
|---|
| 28 |  | 
|---|
| 29 | // apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC. | 
|---|
| 30 | #if (defined(__pic__) || defined(__PIC__)) && defined(__i386__) | 
|---|
| 31 | static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { | 
|---|
| 32 | __asm__ volatile ( | 
|---|
| 33 | "mov %%ebx, %%edi\n" | 
|---|
| 34 | "cpuid\n" | 
|---|
| 35 | "xchg %%edi, %%ebx\n" | 
|---|
| 36 | : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) | 
|---|
| 37 | : "a"(info_type), "c"(0)); | 
|---|
| 38 | } | 
|---|
| 39 | #elif defined(__x86_64__) && \ | 
|---|
| 40 | (defined(__code_model_medium__) || defined(__code_model_large__)) && \ | 
|---|
| 41 | defined(__PIC__) | 
|---|
| 42 | static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { | 
|---|
| 43 | __asm__ volatile ( | 
|---|
| 44 | "xchg{q}\t{%%rbx}, %q1\n" | 
|---|
| 45 | "cpuid\n" | 
|---|
| 46 | "xchg{q}\t{%%rbx}, %q1\n" | 
|---|
| 47 | : "=a"(cpu_info[0]), "=&r"(cpu_info[1]), "=c"(cpu_info[2]), | 
|---|
| 48 | "=d"(cpu_info[3]) | 
|---|
| 49 | : "a"(info_type), "c"(0)); | 
|---|
| 50 | } | 
|---|
| 51 | #elif defined(__i386__) || defined(__x86_64__) | 
|---|
| 52 | static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { | 
|---|
| 53 | __asm__ volatile ( | 
|---|
| 54 | "cpuid\n" | 
|---|
| 55 | : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) | 
|---|
| 56 | : "a"(info_type), "c"(0)); | 
|---|
| 57 | } | 
|---|
| 58 | #elif (defined(_M_X64) || defined(_M_IX86)) && \ | 
|---|
| 59 | defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729  // >= VS2008 SP1 | 
|---|
| 60 | #include <intrin.h> | 
|---|
| 61 | #define GetCPUInfo(info, type) __cpuidex(info, type, 0)  // set ecx=0 | 
|---|
| 62 | #elif defined(WEBP_MSC_SSE2) | 
|---|
| 63 | #define GetCPUInfo __cpuid | 
|---|
| 64 | #endif | 
|---|
| 65 |  | 
|---|
| 66 | // NaCl has no support for xgetbv or the raw opcode. | 
|---|
| 67 | #if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__)) | 
|---|
| 68 | static WEBP_INLINE uint64_t xgetbv(void) { | 
|---|
| 69 | const uint32_t ecx = 0; | 
|---|
| 70 | uint32_t eax, edx; | 
|---|
| 71 | // Use the raw opcode for xgetbv for compatibility with older toolchains. | 
|---|
| 72 | __asm__ volatile ( | 
|---|
| 73 | ".byte 0x0f, 0x01, 0xd0\n" | 
|---|
| 74 | : "=a"(eax), "=d"(edx) : "c"(ecx)); | 
|---|
| 75 | return ((uint64_t)edx << 32) | eax; | 
|---|
| 76 | } | 
|---|
| 77 | #elif (defined(_M_X64) || defined(_M_IX86)) && \ | 
|---|
| 78 | defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219  // >= VS2010 SP1 | 
|---|
| 79 | #include <immintrin.h> | 
|---|
| 80 | #define xgetbv() _xgetbv(0) | 
|---|
| 81 | #elif defined(_MSC_VER) && defined(_M_IX86) | 
|---|
| 82 | static WEBP_INLINE uint64_t xgetbv(void) { | 
|---|
| 83 | uint32_t eax_, edx_; | 
|---|
| 84 | __asm { | 
|---|
| 85 | xor ecx, ecx  // ecx = 0 | 
|---|
| 86 | // Use the raw opcode for xgetbv for compatibility with older toolchains. | 
|---|
| 87 | __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 | 
|---|
| 88 | mov eax_, eax | 
|---|
| 89 | mov edx_, edx | 
|---|
| 90 | } | 
|---|
| 91 | return ((uint64_t)edx_ << 32) | eax_; | 
|---|
| 92 | } | 
|---|
| 93 | #else | 
|---|
| 94 | #define xgetbv() 0U  // no AVX for older x64 or unrecognized toolchains. | 
|---|
| 95 | #endif | 
|---|
| 96 |  | 
|---|
| 97 | #if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2) | 
|---|
| 98 |  | 
|---|
| 99 | // helper function for run-time detection of slow SSSE3 platforms | 
|---|
| 100 | static int CheckSlowModel(int info) { | 
|---|
| 101 | // Table listing display models with longer latencies for the bsr instruction | 
|---|
| 102 | // (ie 2 cycles vs 10/16 cycles) and some SSSE3 instructions like pshufb. | 
|---|
| 103 | // Refer to Intel 64 and IA-32 Architectures Optimization Reference Manual. | 
|---|
| 104 | static const uint8_t kSlowModels[] = { | 
|---|
| 105 | 0x37, 0x4a, 0x4d,  // Silvermont Microarchitecture | 
|---|
| 106 | 0x1c, 0x26, 0x27   // Atom Microarchitecture | 
|---|
| 107 | }; | 
|---|
| 108 | const uint32_t model = ((info & 0xf0000) >> 12) | ((info >> 4) & 0xf); | 
|---|
| 109 | const uint32_t family = (info >> 8) & 0xf; | 
|---|
| 110 | if (family == 0x06) { | 
|---|
| 111 | size_t i; | 
|---|
| 112 | for (i = 0; i < sizeof(kSlowModels) / sizeof(kSlowModels[0]); ++i) { | 
|---|
| 113 | if (model == kSlowModels[i]) return 1; | 
|---|
| 114 | } | 
|---|
| 115 | } | 
|---|
| 116 | return 0; | 
|---|
| 117 | } | 
|---|
| 118 |  | 
|---|
| 119 | static int x86CPUInfo(CPUFeature feature) { | 
|---|
| 120 | int max_cpuid_value; | 
|---|
| 121 | int cpu_info[4]; | 
|---|
| 122 | int is_intel = 0; | 
|---|
| 123 |  | 
|---|
| 124 | // get the highest feature value cpuid supports | 
|---|
| 125 | GetCPUInfo(cpu_info, 0); | 
|---|
| 126 | max_cpuid_value = cpu_info[0]; | 
|---|
| 127 | if (max_cpuid_value < 1) { | 
|---|
| 128 | return 0; | 
|---|
| 129 | } else { | 
|---|
| 130 | const int VENDOR_ID_INTEL_EBX = 0x756e6547;  // uneG | 
|---|
| 131 | const int VENDOR_ID_INTEL_EDX = 0x49656e69;  // Ieni | 
|---|
| 132 | const int VENDOR_ID_INTEL_ECX = 0x6c65746e;  // letn | 
|---|
| 133 | is_intel = (cpu_info[1] == VENDOR_ID_INTEL_EBX && | 
|---|
| 134 | cpu_info[2] == VENDOR_ID_INTEL_ECX && | 
|---|
| 135 | cpu_info[3] == VENDOR_ID_INTEL_EDX);    // genuine Intel? | 
|---|
| 136 | } | 
|---|
| 137 |  | 
|---|
| 138 | GetCPUInfo(cpu_info, 1); | 
|---|
| 139 | if (feature == kSSE2) { | 
|---|
| 140 | return !!(cpu_info[3] & (1 << 26)); | 
|---|
| 141 | } | 
|---|
| 142 | if (feature == kSSE3) { | 
|---|
| 143 | return !!(cpu_info[2] & (1 << 0)); | 
|---|
| 144 | } | 
|---|
| 145 | if (feature == kSlowSSSE3) { | 
|---|
| 146 | if (is_intel && (cpu_info[2] & (1 << 0))) {   // SSSE3? | 
|---|
| 147 | return CheckSlowModel(cpu_info[0]); | 
|---|
| 148 | } | 
|---|
| 149 | return 0; | 
|---|
| 150 | } | 
|---|
| 151 |  | 
|---|
| 152 | if (feature == kSSE4_1) { | 
|---|
| 153 | return !!(cpu_info[2] & (1 << 19)); | 
|---|
| 154 | } | 
|---|
| 155 | if (feature == kAVX) { | 
|---|
| 156 | // bits 27 (OSXSAVE) & 28 (256-bit AVX) | 
|---|
| 157 | if ((cpu_info[2] & 0x18000000) == 0x18000000) { | 
|---|
| 158 | // XMM state and YMM state enabled by the OS. | 
|---|
| 159 | return (xgetbv() & 0x6) == 0x6; | 
|---|
| 160 | } | 
|---|
| 161 | } | 
|---|
| 162 | if (feature == kAVX2) { | 
|---|
| 163 | if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) { | 
|---|
| 164 | GetCPUInfo(cpu_info, 7); | 
|---|
| 165 | return !!(cpu_info[1] & (1 << 5)); | 
|---|
| 166 | } | 
|---|
| 167 | } | 
|---|
| 168 | return 0; | 
|---|
| 169 | } | 
|---|
| 170 | VP8CPUInfo VP8GetCPUInfo = x86CPUInfo; | 
|---|
| 171 | #elif defined(WEBP_ANDROID_NEON)  // NB: needs to be before generic NEON test. | 
|---|
| 172 | static int AndroidCPUInfo(CPUFeature feature) { | 
|---|
| 173 | const AndroidCpuFamily cpu_family = android_getCpuFamily(); | 
|---|
| 174 | const uint64_t cpu_features = android_getCpuFeatures(); | 
|---|
| 175 | if (feature == kNEON) { | 
|---|
| 176 | return (cpu_family == ANDROID_CPU_FAMILY_ARM && | 
|---|
| 177 | 0 != (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON)); | 
|---|
| 178 | } | 
|---|
| 179 | return 0; | 
|---|
| 180 | } | 
|---|
| 181 | VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo; | 
|---|
| 182 | #elif defined(WEBP_USE_NEON) | 
|---|
| 183 | // define a dummy function to enable turning off NEON at runtime by setting | 
|---|
| 184 | // VP8DecGetCPUInfo = NULL | 
|---|
| 185 | static int armCPUInfo(CPUFeature feature) { | 
|---|
| 186 | if (feature != kNEON) return 0; | 
|---|
| 187 | #if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD) | 
|---|
| 188 | { | 
|---|
| 189 | int has_neon = 0; | 
|---|
| 190 | char line[200]; | 
|---|
| 191 | FILE* const cpuinfo = fopen( "/proc/cpuinfo", "r"); | 
|---|
| 192 | if (cpuinfo == NULL) return 0; | 
|---|
| 193 | while (fgets(line, sizeof(line), cpuinfo)) { | 
|---|
| 194 | if (!strncmp(line, "Features", 8)) { | 
|---|
| 195 | if (strstr(line, " neon ") != NULL) { | 
|---|
| 196 | has_neon = 1; | 
|---|
| 197 | break; | 
|---|
| 198 | } | 
|---|
| 199 | } | 
|---|
| 200 | } | 
|---|
| 201 | fclose(cpuinfo); | 
|---|
| 202 | return has_neon; | 
|---|
| 203 | } | 
|---|
| 204 | #else | 
|---|
| 205 | return 1; | 
|---|
| 206 | #endif | 
|---|
| 207 | } | 
|---|
| 208 | VP8CPUInfo VP8GetCPUInfo = armCPUInfo; | 
|---|
| 209 | #elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \ | 
|---|
| 210 | defined(WEBP_USE_MSA) | 
|---|
| 211 | static int mipsCPUInfo(CPUFeature feature) { | 
|---|
| 212 | if ((feature == kMIPS32) || (feature == kMIPSdspR2) || (feature == kMSA)) { | 
|---|
| 213 | return 1; | 
|---|
| 214 | } else { | 
|---|
| 215 | return 0; | 
|---|
| 216 | } | 
|---|
| 217 |  | 
|---|
| 218 | } | 
|---|
| 219 | VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo; | 
|---|
| 220 | #else | 
|---|
| 221 | VP8CPUInfo VP8GetCPUInfo = NULL; | 
|---|
| 222 | #endif | 
|---|
| 223 |  | 
|---|