| 1 | // basisu_kernels_sse.cpp |
| 2 | // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | // you may not use this file except in compliance with the License. |
| 6 | // You may obtain a copy of the License at |
| 7 | // |
| 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | // |
| 10 | // Unless required by applicable law or agreed to in writing, software |
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | // See the License for the specific language governing permissions and |
| 14 | // limitations under the License. |
| 15 | #include "basisu_enc.h" |
| 16 | |
| 17 | #if BASISU_SUPPORT_SSE |
| 18 | |
| 19 | #define CPPSPMD_SSE2 0 |
| 20 | |
| 21 | #ifdef _MSC_VER |
| 22 | #include <intrin.h> |
| 23 | #endif |
| 24 | |
| 25 | #if !defined(_MSC_VER) |
| 26 | #if __AVX__ || __AVX2__ || __AVX512F__ |
| 27 | #error Please check your compiler options |
| 28 | #endif |
| 29 | |
| 30 | #if CPPSPMD_SSE2 |
| 31 | #if __SSE4_1__ || __SSE3__ || __SSE4_2__ || __SSSE3__ |
| 32 | #error SSE4.1/SSE3/SSE4.2/SSSE3 cannot be enabled to use this file |
| 33 | #endif |
| 34 | #else |
| 35 | #if !__SSE4_1__ || !__SSE3__ || !__SSSE3__ |
| 36 | #error Please check your compiler options |
| 37 | #endif |
| 38 | #endif |
| 39 | #endif |
| 40 | |
| 41 | #include "cppspmd_sse.h" |
| 42 | |
| 43 | #include "cppspmd_type_aliases.h" |
| 44 | |
| 45 | using namespace basisu; |
| 46 | |
| 47 | #include "basisu_kernels_declares.h" |
| 48 | #include "basisu_kernels_imp.h" |
| 49 | |
| 50 | namespace basisu |
| 51 | { |
| 52 | |
| 53 | struct cpu_info |
| 54 | { |
| 55 | cpu_info() { memset(this, 0, sizeof(*this)); } |
| 56 | |
| 57 | bool m_has_fpu; |
| 58 | bool m_has_mmx; |
| 59 | bool m_has_sse; |
| 60 | bool m_has_sse2; |
| 61 | bool m_has_sse3; |
| 62 | bool m_has_ssse3; |
| 63 | bool m_has_sse41; |
| 64 | bool m_has_sse42; |
| 65 | bool m_has_avx; |
| 66 | bool m_has_avx2; |
| 67 | bool m_has_pclmulqdq; |
| 68 | }; |
| 69 | |
| 70 | static void extract_x86_flags(cpu_info &info, uint32_t ecx, uint32_t edx) |
| 71 | { |
| 72 | info.m_has_fpu = (edx & (1 << 0)) != 0; |
| 73 | info.m_has_mmx = (edx & (1 << 23)) != 0; |
| 74 | info.m_has_sse = (edx & (1 << 25)) != 0; |
| 75 | info.m_has_sse2 = (edx & (1 << 26)) != 0; |
| 76 | info.m_has_sse3 = (ecx & (1 << 0)) != 0; |
| 77 | info.m_has_ssse3 = (ecx & (1 << 9)) != 0; |
| 78 | info.m_has_sse41 = (ecx & (1 << 19)) != 0; |
| 79 | info.m_has_sse42 = (ecx & (1 << 20)) != 0; |
| 80 | info.m_has_pclmulqdq = (ecx & (1 << 1)) != 0; |
| 81 | info.m_has_avx = (ecx & (1 << 28)) != 0; |
| 82 | } |
| 83 | |
| 84 | static void extract_x86_extended_flags(cpu_info &info, uint32_t ebx) |
| 85 | { |
| 86 | info.m_has_avx2 = (ebx & (1 << 5)) != 0; |
| 87 | } |
| 88 | |
| 89 | #ifndef _MSC_VER |
| 90 | static void do_cpuid(uint32_t eax, uint32_t ecx, uint32_t* regs) |
| 91 | { |
| 92 | uint32_t ebx = 0, edx = 0; |
| 93 | |
| 94 | #if defined(__PIC__) && defined(__i386__) |
| 95 | __asm__("movl %%ebx, %%edi;" |
| 96 | "cpuid;" |
| 97 | "xchgl %%ebx, %%edi;" |
| 98 | : "=D" (ebx), "+a" (eax), "+c" (ecx), "=d" (edx)); |
| 99 | #else |
| 100 | __asm__("cpuid;" : "+b" (ebx), "+a" (eax), "+c" (ecx), "=d" (edx)); |
| 101 | #endif |
| 102 | |
| 103 | regs[0] = eax; regs[1] = ebx; regs[2] = ecx; regs[3] = edx; |
| 104 | } |
| 105 | #endif |
| 106 | |
| 107 | static void get_cpuinfo(cpu_info &info) |
| 108 | { |
| 109 | int regs[4]; |
| 110 | |
| 111 | #ifdef _MSC_VER |
| 112 | __cpuid(regs, 0); |
| 113 | #else |
| 114 | do_cpuid(0, 0, (uint32_t *)regs); |
| 115 | #endif |
| 116 | |
| 117 | const uint32_t max_eax = regs[0]; |
| 118 | |
| 119 | if (max_eax >= 1U) |
| 120 | { |
| 121 | #ifdef _MSC_VER |
| 122 | __cpuid(regs, 1); |
| 123 | #else |
| 124 | do_cpuid(1, 0, (uint32_t*)regs); |
| 125 | #endif |
| 126 | extract_x86_flags(info, regs[2], regs[3]); |
| 127 | } |
| 128 | |
| 129 | if (max_eax >= 7U) |
| 130 | { |
| 131 | #ifdef _MSC_VER |
| 132 | __cpuidex(regs, 7, 0); |
| 133 | #else |
| 134 | do_cpuid(7, 0, (uint32_t*)regs); |
| 135 | #endif |
| 136 | |
| 137 | extract_x86_extended_flags(info, regs[1]); |
| 138 | } |
| 139 | } |
| 140 | |
| 141 | void detect_sse41() |
| 142 | { |
| 143 | cpu_info info; |
| 144 | get_cpuinfo(info); |
| 145 | |
| 146 | // Check for everything from SSE to SSE 4.1 |
| 147 | g_cpu_supports_sse41 = info.m_has_sse && info.m_has_sse2 && info.m_has_sse3 && info.m_has_ssse3 && info.m_has_sse41; |
| 148 | } |
| 149 | |
| 150 | } // namespace basisu |
| 151 | #else // #if BASISU_SUPPORT_SSE |
| 152 | namespace basisu |
| 153 | { |
| 154 | |
| 155 | void detect_sse41() |
| 156 | { |
| 157 | } |
| 158 | |
| 159 | } // namespace basisu |
| 160 | #endif // #if BASISU_SUPPORT_SSE |
| 161 | |
| 162 | |