1// basisu_kernels_sse.cpp
2// Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved.
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15#include "basisu_enc.h"
16
17#if BASISU_SUPPORT_SSE
18
19#define CPPSPMD_SSE2 0
20
21#ifdef _MSC_VER
22#include <intrin.h>
23#endif
24
25#if !defined(_MSC_VER)
26 #if __AVX__ || __AVX2__ || __AVX512F__
27 #error Please check your compiler options
28 #endif
29
30 #if CPPSPMD_SSE2
31 #if __SSE4_1__ || __SSE3__ || __SSE4_2__ || __SSSE3__
32 #error SSE4.1/SSE3/SSE4.2/SSSE3 cannot be enabled to use this file
33 #endif
34 #else
35 #if !__SSE4_1__ || !__SSE3__ || !__SSSE3__
36 #error Please check your compiler options
37 #endif
38 #endif
39#endif
40
41#include "cppspmd_sse.h"
42
43#include "cppspmd_type_aliases.h"
44
45using namespace basisu;
46
47#include "basisu_kernels_declares.h"
48#include "basisu_kernels_imp.h"
49
50namespace basisu
51{
52
53struct cpu_info
54{
55 cpu_info() { memset(this, 0, sizeof(*this)); }
56
57 bool m_has_fpu;
58 bool m_has_mmx;
59 bool m_has_sse;
60 bool m_has_sse2;
61 bool m_has_sse3;
62 bool m_has_ssse3;
63 bool m_has_sse41;
64 bool m_has_sse42;
65 bool m_has_avx;
66 bool m_has_avx2;
67 bool m_has_pclmulqdq;
68};
69
70static void extract_x86_flags(cpu_info &info, uint32_t ecx, uint32_t edx)
71{
72 info.m_has_fpu = (edx & (1 << 0)) != 0;
73 info.m_has_mmx = (edx & (1 << 23)) != 0;
74 info.m_has_sse = (edx & (1 << 25)) != 0;
75 info.m_has_sse2 = (edx & (1 << 26)) != 0;
76 info.m_has_sse3 = (ecx & (1 << 0)) != 0;
77 info.m_has_ssse3 = (ecx & (1 << 9)) != 0;
78 info.m_has_sse41 = (ecx & (1 << 19)) != 0;
79 info.m_has_sse42 = (ecx & (1 << 20)) != 0;
80 info.m_has_pclmulqdq = (ecx & (1 << 1)) != 0;
81 info.m_has_avx = (ecx & (1 << 28)) != 0;
82}
83
84static void extract_x86_extended_flags(cpu_info &info, uint32_t ebx)
85{
86 info.m_has_avx2 = (ebx & (1 << 5)) != 0;
87}
88
89#ifndef _MSC_VER
90static void do_cpuid(uint32_t eax, uint32_t ecx, uint32_t* regs)
91{
92 uint32_t ebx = 0, edx = 0;
93
94#if defined(__PIC__) && defined(__i386__)
95 __asm__("movl %%ebx, %%edi;"
96 "cpuid;"
97 "xchgl %%ebx, %%edi;"
98 : "=D"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx));
99#else
100 __asm__("cpuid;" : "+b"(ebx), "+a"(eax), "+c"(ecx), "=d"(edx));
101#endif
102
103 regs[0] = eax; regs[1] = ebx; regs[2] = ecx; regs[3] = edx;
104}
105#endif
106
107static void get_cpuinfo(cpu_info &info)
108{
109 int regs[4];
110
111#ifdef _MSC_VER
112 __cpuid(regs, 0);
113#else
114 do_cpuid(0, 0, (uint32_t *)regs);
115#endif
116
117 const uint32_t max_eax = regs[0];
118
119 if (max_eax >= 1U)
120 {
121#ifdef _MSC_VER
122 __cpuid(regs, 1);
123#else
124 do_cpuid(1, 0, (uint32_t*)regs);
125#endif
126 extract_x86_flags(info, regs[2], regs[3]);
127 }
128
129 if (max_eax >= 7U)
130 {
131#ifdef _MSC_VER
132 __cpuidex(regs, 7, 0);
133#else
134 do_cpuid(7, 0, (uint32_t*)regs);
135#endif
136
137 extract_x86_extended_flags(info, regs[1]);
138 }
139}
140
141void detect_sse41()
142{
143 cpu_info info;
144 get_cpuinfo(info);
145
146 // Check for everything from SSE to SSE 4.1
147 g_cpu_supports_sse41 = info.m_has_sse && info.m_has_sse2 && info.m_has_sse3 && info.m_has_ssse3 && info.m_has_sse41;
148}
149
150} // namespace basisu
151#else // #if BASISU_SUPPORT_SSE
152namespace basisu
153{
154
155void detect_sse41()
156{
157}
158
159} // namespace basisu
160#endif // #if BASISU_SUPPORT_SSE
161
162