1#include <stdbool.h>
2#include <stdint.h>
3#include <stddef.h>
4#include <stdint.h>
5
6#include "../include/libbase64.h"
7#include "codecs.h"
8
9#if (__x86_64__ || __i386__ || _M_X86 || _M_X64)
10 #define BASE64_X86
11 #if (HAVE_SSSE3 || HAVE_SSE41 || HAVE_SSE42 || HAVE_AVX || HAVE_AVX2)
12 #define BASE64_X86_SIMD
13 #endif
14#endif
15
16#ifdef BASE64_X86
17#ifdef _MSC_VER
18 #include <intrin.h>
19 #define __cpuid_count(__level, __count, __eax, __ebx, __ecx, __edx) \
20 { \
21 int info[4]; \
22 __cpuidex(info, __level, __count); \
23 __eax = info[0]; \
24 __ebx = info[1]; \
25 __ecx = info[2]; \
26 __edx = info[3]; \
27 }
28 #define __cpuid(__level, __eax, __ebx, __ecx, __edx) \
29 __cpuid_count(__level, 0, __eax, __ebx, __ecx, __edx)
30#else
31 #include <cpuid.h>
32 #if HAVE_AVX2 || HAVE_AVX
33 #if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || (__clang_major__ >= 3))
34 static inline uint64_t _xgetbv (uint32_t index)
35 {
36 uint32_t eax, edx;
37 __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
38 return ((uint64_t)edx << 32) | eax;
39 }
40 #else
41 #error "Platform not supported"
42 #endif
43 #endif
44#endif
45
46#ifndef bit_AVX2
47#define bit_AVX2 (1 << 5)
48#endif
49#ifndef bit_SSSE3
50#define bit_SSSE3 (1 << 9)
51#endif
52#ifndef bit_SSE41
53#define bit_SSE41 (1 << 19)
54#endif
55#ifndef bit_SSE42
56#define bit_SSE42 (1 << 20)
57#endif
58#ifndef bit_AVX
59#define bit_AVX (1 << 28)
60#endif
61
62#define bit_XSAVE_XRSTORE (1 << 27)
63
64#ifndef _XCR_XFEATURE_ENABLED_MASK
65#define _XCR_XFEATURE_ENABLED_MASK 0
66#endif
67
68#define _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS 0x6
69#endif
70
71// Function declarations:
72#define BASE64_CODEC_FUNCS(arch) \
73 BASE64_ENC_FUNCTION(arch); \
74 BASE64_DEC_FUNCTION(arch); \
75
76BASE64_CODEC_FUNCS(avx2)
77BASE64_CODEC_FUNCS(neon32)
78BASE64_CODEC_FUNCS(neon64)
79BASE64_CODEC_FUNCS(plain)
80BASE64_CODEC_FUNCS(ssse3)
81BASE64_CODEC_FUNCS(sse41)
82BASE64_CODEC_FUNCS(sse42)
83BASE64_CODEC_FUNCS(avx)
84
85static bool
86codec_choose_forced (struct codec *codec, int flags)
87{
88 // If the user wants to use a certain codec,
89 // always allow it, even if the codec is a no-op.
90 // For testing purposes.
91
92 if (!(flags & 0xFF)) {
93 return false;
94 }
95 if (flags & BASE64_FORCE_AVX2) {
96 codec->enc = base64_stream_encode_avx2;
97 codec->dec = base64_stream_decode_avx2;
98 return true;
99 }
100 if (flags & BASE64_FORCE_NEON32) {
101 codec->enc = base64_stream_encode_neon32;
102 codec->dec = base64_stream_decode_neon32;
103 return true;
104 }
105 if (flags & BASE64_FORCE_NEON64) {
106 codec->enc = base64_stream_encode_neon64;
107 codec->dec = base64_stream_decode_neon64;
108 return true;
109 }
110 if (flags & BASE64_FORCE_PLAIN) {
111 codec->enc = base64_stream_encode_plain;
112 codec->dec = base64_stream_decode_plain;
113 return true;
114 }
115 if (flags & BASE64_FORCE_SSSE3) {
116 codec->enc = base64_stream_encode_ssse3;
117 codec->dec = base64_stream_decode_ssse3;
118 return true;
119 }
120 if (flags & BASE64_FORCE_SSE41) {
121 codec->enc = base64_stream_encode_sse41;
122 codec->dec = base64_stream_decode_sse41;
123 return true;
124 }
125 if (flags & BASE64_FORCE_SSE42) {
126 codec->enc = base64_stream_encode_sse42;
127 codec->dec = base64_stream_decode_sse42;
128 return true;
129 }
130 if (flags & BASE64_FORCE_AVX) {
131 codec->enc = base64_stream_encode_avx;
132 codec->dec = base64_stream_decode_avx;
133 return true;
134 }
135 return false;
136}
137
138static bool
139codec_choose_arm (struct codec *codec)
140{
141#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && ((defined(__aarch64__) && HAVE_NEON64) || HAVE_NEON32)
142
143 // Unfortunately there is no portable way to check for NEON
144 // support at runtime from userland in the same way that x86
145 // has cpuid, so just stick to the compile-time configuration:
146
147 #if defined(__aarch64__) && HAVE_NEON64
148 codec->enc = base64_stream_encode_neon64;
149 codec->dec = base64_stream_decode_neon64;
150 #else
151 codec->enc = base64_stream_encode_neon32;
152 codec->dec = base64_stream_decode_neon32;
153 #endif
154
155 return true;
156
157#else
158 (void)codec;
159 return false;
160#endif
161}
162
163static bool
164codec_choose_x86 (struct codec *codec)
165{
166#ifdef BASE64_X86_SIMD
167
168 unsigned int eax, ebx = 0, ecx = 0, edx;
169 unsigned int max_level;
170
171 #ifdef _MSC_VER
172 int info[4];
173 __cpuidex(info, 0, 0);
174 max_level = info[0];
175 #else
176 max_level = __get_cpuid_max(0, NULL);
177 #endif
178
179 #if HAVE_AVX2 || HAVE_AVX
180 // Check for AVX/AVX2 support:
181 // Checking for AVX requires 3 things:
182 // 1) CPUID indicates that the OS uses XSAVE and XRSTORE instructions
183 // (allowing saving YMM registers on context switch)
184 // 2) CPUID indicates support for AVX
185 // 3) XGETBV indicates the AVX registers will be saved and restored on
186 // context switch
187 //
188 // Note that XGETBV is only available on 686 or later CPUs, so the
189 // instruction needs to be conditionally run.
190 if (max_level >= 1) {
191 __cpuid_count(1, 0, eax, ebx, ecx, edx);
192 if (ecx & bit_XSAVE_XRSTORE) {
193 uint64_t xcr_mask;
194 xcr_mask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
195 if (xcr_mask & _XCR_XMM_AND_YMM_STATE_ENABLED_BY_OS) {
196 #if HAVE_AVX2
197 if (max_level >= 7) {
198 __cpuid_count(7, 0, eax, ebx, ecx, edx);
199 if (ebx & bit_AVX2) {
200 codec->enc = base64_stream_encode_avx2;
201 codec->dec = base64_stream_decode_avx2;
202 return true;
203 }
204 }
205 #endif
206 #if HAVE_AVX
207 __cpuid_count(1, 0, eax, ebx, ecx, edx);
208 if (ecx & bit_AVX) {
209 codec->enc = base64_stream_encode_avx;
210 codec->dec = base64_stream_decode_avx;
211 return true;
212 }
213 #endif
214 }
215 }
216 }
217 #endif
218
219 #if HAVE_SSE42
220 // Check for SSE42 support:
221 if (max_level >= 1) {
222 __cpuid(1, eax, ebx, ecx, edx);
223 if (ecx & bit_SSE42) {
224 codec->enc = base64_stream_encode_sse42;
225 codec->dec = base64_stream_decode_sse42;
226 return true;
227 }
228 }
229 #endif
230
231 #if HAVE_SSE41
232 // Check for SSE41 support:
233 if (max_level >= 1) {
234 __cpuid(1, eax, ebx, ecx, edx);
235 if (ecx & bit_SSE41) {
236 codec->enc = base64_stream_encode_sse41;
237 codec->dec = base64_stream_decode_sse41;
238 return true;
239 }
240 }
241 #endif
242
243 #if HAVE_SSSE3
244 // Check for SSSE3 support:
245 if (max_level >= 1) {
246 __cpuid(1, eax, ebx, ecx, edx);
247 if (ecx & bit_SSSE3) {
248 codec->enc = base64_stream_encode_ssse3;
249 codec->dec = base64_stream_decode_ssse3;
250 return true;
251 }
252 }
253 #endif
254
255#else
256 (void)codec;
257#endif
258
259 return false;
260}
261
262void
263codec_choose (struct codec *codec, int flags)
264{
265 // User forced a codec:
266 if (codec_choose_forced(codec, flags)) {
267 return;
268 }
269
270 // Runtime feature detection:
271 if (codec_choose_arm(codec)) {
272 return;
273 }
274 if (codec_choose_x86(codec)) {
275 return;
276 }
277 codec->enc = base64_stream_encode_plain;
278 codec->dec = base64_stream_decode_plain;
279}
280