1 | /* Copyright (C) 2013 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMD_SETUP_ARCH_H |
9 | #define LIBSIMDPP_SIMD_SETUP_ARCH_H |
10 | |
11 | #include <simdpp/detail/preprocessor.h> |
12 | #include <simdpp/detail/preprocessor/stringize.hpp> |
13 | |
14 | // Set up macros for current architecture. Note that this file may be included |
15 | // multiple times, more information on the caveats are within the file. |
16 | #include <simdpp/detail/preprocess_single_arch.h> |
17 | |
18 | // Set up main feature macros |
19 | #if SIMDPP_ARCH_PP_USE_NULL |
20 | #define SIMDPP_USE_NULL 1 |
21 | #else |
22 | #define SIMDPP_USE_NULL 0 |
23 | #endif |
24 | #if SIMDPP_ARCH_PP_USE_SSE2 |
25 | #define SIMDPP_USE_SSE2 1 |
26 | #else |
27 | #define SIMDPP_USE_SSE2 0 |
28 | #endif |
29 | #if SIMDPP_ARCH_PP_USE_SSE3 |
30 | #define SIMDPP_USE_SSE3 1 |
31 | #else |
32 | #define SIMDPP_USE_SSE3 0 |
33 | #endif |
34 | #if SIMDPP_ARCH_PP_USE_SSSE3 |
35 | #define SIMDPP_USE_SSSE3 1 |
36 | #else |
37 | #define SIMDPP_USE_SSSE3 0 |
38 | #endif |
39 | #if SIMDPP_ARCH_PP_USE_SSE4_1 |
40 | #define SIMDPP_USE_SSE4_1 1 |
41 | #else |
42 | #define SIMDPP_USE_SSE4_1 0 |
43 | #endif |
44 | #if SIMDPP_ARCH_PP_USE_X86_POPCNT_INSN |
45 | #define SIMDPP_USE_X86_POPCNT_INSN 1 |
46 | #else |
47 | #define SIMDPP_USE_X86_POPCNT_INSN 0 |
48 | #endif |
49 | #if SIMDPP_ARCH_PP_USE_AVX |
50 | #define SIMDPP_USE_AVX 1 |
51 | #else |
52 | #define SIMDPP_USE_AVX 0 |
53 | #endif |
54 | #if SIMDPP_ARCH_PP_USE_AVX2 |
55 | #define SIMDPP_USE_AVX2 1 |
56 | #else |
57 | #define SIMDPP_USE_AVX2 0 |
58 | #endif |
59 | #if SIMDPP_ARCH_PP_USE_FMA3 |
60 | #define SIMDPP_USE_FMA3 1 |
61 | #else |
62 | #define SIMDPP_USE_FMA3 0 |
63 | #endif |
64 | #if SIMDPP_ARCH_PP_USE_FMA4 |
65 | #define SIMDPP_USE_FMA4 1 |
66 | #else |
67 | #define SIMDPP_USE_FMA4 0 |
68 | #endif |
69 | #if SIMDPP_ARCH_PP_USE_XOP |
70 | #define SIMDPP_USE_XOP 1 |
71 | #else |
72 | #define SIMDPP_USE_XOP 0 |
73 | #endif |
74 | #if SIMDPP_ARCH_PP_USE_AVX512F |
75 | #define SIMDPP_USE_AVX512F 1 |
76 | #else |
77 | #define SIMDPP_USE_AVX512F 0 |
78 | #endif |
79 | #if SIMDPP_ARCH_PP_USE_AVX512BW |
80 | #define SIMDPP_USE_AVX512BW 1 |
81 | #else |
82 | #define SIMDPP_USE_AVX512BW 0 |
83 | #endif |
84 | #if SIMDPP_ARCH_PP_USE_AVX512DQ |
85 | #define SIMDPP_USE_AVX512DQ 1 |
86 | #else |
87 | #define SIMDPP_USE_AVX512DQ 0 |
88 | #endif |
89 | #if SIMDPP_ARCH_PP_USE_AVX512VL |
90 | #define SIMDPP_USE_AVX512VL 1 |
91 | #else |
92 | #define SIMDPP_USE_AVX512VL 0 |
93 | #endif |
94 | #if SIMDPP_ARCH_PP_USE_NEON |
95 | #define SIMDPP_USE_NEON 1 |
96 | #else |
97 | #define SIMDPP_USE_NEON 0 |
98 | #endif |
99 | #if SIMDPP_ARCH_PP_USE_NEON_FLT_SP |
100 | #define SIMDPP_USE_NEON_FLT_SP 1 |
101 | #else |
102 | #define SIMDPP_USE_NEON_FLT_SP 0 |
103 | #endif |
104 | #if SIMDPP_ARCH_PP_USE_ALTIVEC |
105 | #define SIMDPP_USE_ALTIVEC 1 |
106 | #else |
107 | #define SIMDPP_USE_ALTIVEC 0 |
108 | #endif |
109 | #if SIMDPP_ARCH_PP_USE_VSX_206 |
110 | #define SIMDPP_USE_VSX_206 1 |
111 | #else |
112 | #define SIMDPP_USE_VSX_206 0 |
113 | #endif |
114 | #if SIMDPP_ARCH_PP_USE_VSX_207 |
115 | #define SIMDPP_USE_VSX_207 1 |
116 | #else |
117 | #define SIMDPP_USE_VSX_207 0 |
118 | #endif |
119 | #if SIMDPP_ARCH_PP_USE_MSA |
120 | #define SIMDPP_USE_MSA 1 |
121 | #else |
122 | #define SIMDPP_USE_MSA 0 |
123 | #endif |
124 | |
125 | // Generate SIMDPP_ARCH_NAMESPACE. It's a human-readable identifier depending |
126 | // on the enabled instruction sets |
127 | #if SIMDPP_ARCH_PP_NS_USE_NULL |
128 | #define SIMDPP_NS_ID_NULL SIMDPP_INSN_ID_NULL |
129 | #else |
130 | #define SIMDPP_NS_ID_NULL |
131 | #endif |
132 | #if SIMDPP_ARCH_PP_NS_USE_SSE2 |
133 | #define SIMDPP_NS_ID_SSE2 SIMDPP_INSN_ID_SSE2 |
134 | #else |
135 | #define SIMDPP_NS_ID_SSE2 |
136 | #endif |
137 | #if SIMDPP_ARCH_PP_NS_USE_SSE3 |
138 | #define SIMDPP_NS_ID_SSE3 SIMDPP_INSN_ID_SSE3 |
139 | #else |
140 | #define SIMDPP_NS_ID_SSE3 |
141 | #endif |
142 | #if SIMDPP_ARCH_PP_NS_USE_SSSE3 |
143 | #define SIMDPP_NS_ID_SSSE3 SIMDPP_INSN_ID_SSSE3 |
144 | #else |
145 | #define SIMDPP_NS_ID_SSSE3 |
146 | #endif |
147 | #if SIMDPP_ARCH_PP_NS_USE_SSE4_1 |
148 | #define SIMDPP_NS_ID_SSE4_1 SIMDPP_INSN_ID_SSE4_1 |
149 | #else |
150 | #define SIMDPP_NS_ID_SSE4_1 |
151 | #endif |
152 | #if SIMDPP_ARCH_PP_NS_USE_POPCNT_INSN |
153 | #define SIMDPP_NS_ID_POPCNT_INSN SIMDPP_INSN_ID_POPCNT_INSN |
154 | #else |
155 | #define SIMDPP_NS_ID_POPCNT_INSN |
156 | #endif |
157 | #if SIMDPP_ARCH_PP_NS_USE_AVX |
158 | #define SIMDPP_NS_ID_AVX SIMDPP_INSN_ID_AVX |
159 | #else |
160 | #define SIMDPP_NS_ID_AVX |
161 | #endif |
162 | #if SIMDPP_ARCH_PP_NS_USE_AVX2 |
163 | #define SIMDPP_NS_ID_AVX2 SIMDPP_INSN_ID_AVX2 |
164 | #else |
165 | #define SIMDPP_NS_ID_AVX2 |
166 | #endif |
167 | #if SIMDPP_ARCH_PP_NS_USE_FMA3 |
168 | #define SIMDPP_NS_ID_FMA3 SIMDPP_INSN_ID_FMA3 |
169 | #else |
170 | #define SIMDPP_NS_ID_FMA3 |
171 | #endif |
172 | #if SIMDPP_ARCH_PP_NS_USE_FMA4 |
173 | #define SIMDPP_NS_ID_FMA4 SIMDPP_INSN_ID_FMA4 |
174 | #else |
175 | #define SIMDPP_NS_ID_FMA4 |
176 | #endif |
177 | #if SIMDPP_ARCH_PP_NS_USE_XOP |
178 | #define SIMDPP_NS_ID_XOP SIMDPP_INSN_ID_XOP |
179 | #else |
180 | #define SIMDPP_NS_ID_XOP |
181 | #endif |
182 | #if SIMDPP_ARCH_PP_NS_USE_AVX512F |
183 | #define SIMDPP_NS_ID_AVX512F SIMDPP_INSN_ID_AVX512F |
184 | #else |
185 | #define SIMDPP_NS_ID_AVX512F |
186 | #endif |
187 | #if SIMDPP_ARCH_PP_NS_USE_AVX512BW |
188 | #define SIMDPP_NS_ID_AVX512BW SIMDPP_INSN_ID_AVX512BW |
189 | #else |
190 | #define SIMDPP_NS_ID_AVX512BW |
191 | #endif |
192 | #if SIMDPP_ARCH_PP_NS_USE_AVX512DQ |
193 | #define SIMDPP_NS_ID_AVX512DQ SIMDPP_INSN_ID_AVX512DQ |
194 | #else |
195 | #define SIMDPP_NS_ID_AVX512DQ |
196 | #endif |
197 | #if SIMDPP_ARCH_PP_NS_USE_AVX512VL |
198 | #define SIMDPP_NS_ID_AVX512VL SIMDPP_INSN_ID_AVX512VL |
199 | #else |
200 | #define SIMDPP_NS_ID_AVX512VL |
201 | #endif |
202 | #if SIMDPP_ARCH_PP_NS_USE_NEON |
203 | #define SIMDPP_NS_ID_NEON SIMDPP_INSN_ID_NEON |
204 | #else |
205 | #define SIMDPP_NS_ID_NEON |
206 | #endif |
207 | #if SIMDPP_ARCH_PP_NS_USE_NEON_FLT_SP |
208 | #define SIMDPP_NS_ID_NEON_FLT_SP SIMDPP_INSN_ID_NEON_FLT_SP |
209 | #else |
210 | #define SIMDPP_NS_ID_NEON_FLT_SP |
211 | #endif |
212 | #if SIMDPP_ARCH_PP_NS_USE_ALTIVEC |
213 | #define SIMDPP_NS_ID_ALTIVEC SIMDPP_INSN_ID_ALTIVEC |
214 | #else |
215 | #define SIMDPP_NS_ID_ALTIVEC |
216 | #endif |
217 | #if SIMDPP_ARCH_PP_NS_USE_VSX_206 |
218 | #define SIMDPP_NS_ID_VSX_206 SIMDPP_INSN_ID_VSX_206 |
219 | #else |
220 | #define SIMDPP_NS_ID_VSX_206 |
221 | #endif |
222 | #if SIMDPP_ARCH_PP_NS_USE_VSX_207 |
223 | #define SIMDPP_NS_ID_VSX_207 SIMDPP_INSN_ID_VSX_207 |
224 | #else |
225 | #define SIMDPP_NS_ID_VSX_207 |
226 | #endif |
227 | #if SIMDPP_ARCH_PP_NS_USE_MSA |
228 | #define SIMDPP_NS_ID_MSA SIMDPP_INSN_ID_MSA |
229 | #else |
230 | #define SIMDPP_NS_ID_MSA |
231 | #endif |
232 | |
233 | #define SIMDPP_ARCH_NAMESPACE SIMDPP_PP_PASTE22(arch, \ |
234 | SIMDPP_NS_ID_NULL, \ |
235 | SIMDPP_NS_ID_SSE2, \ |
236 | SIMDPP_NS_ID_SSE3, \ |
237 | SIMDPP_NS_ID_SSSE3, \ |
238 | SIMDPP_NS_ID_SSE4_1, \ |
239 | SIMDPP_NS_ID_POPCNT_INSN, \ |
240 | SIMDPP_NS_ID_AVX, \ |
241 | SIMDPP_NS_ID_AVX2, \ |
242 | SIMDPP_NS_ID_AVX512F, \ |
243 | SIMDPP_NS_ID_AVX512BW, \ |
244 | SIMDPP_NS_ID_AVX512DQ, \ |
245 | SIMDPP_NS_ID_AVX512VL, \ |
246 | SIMDPP_NS_ID_FMA3, \ |
247 | SIMDPP_NS_ID_FMA4, \ |
248 | SIMDPP_NS_ID_XOP, \ |
249 | SIMDPP_NS_ID_NEON, \ |
250 | SIMDPP_NS_ID_NEON_FLT_SP, \ |
251 | SIMDPP_NS_ID_MSA, \ |
252 | SIMDPP_NS_ID_ALTIVEC, \ |
253 | SIMDPP_NS_ID_VSX_206, \ |
254 | SIMDPP_NS_ID_VSX_207) |
255 | |
256 | #define SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE SIMDPP_ARCH_NAMESPACE |
257 | #define SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_FOR_DISPATCH 0 |
258 | #include <simdpp/dispatch/preprocess_single_compile_arch.h> |
259 | #undef SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_FOR_DISPATCH |
260 | #undef SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE |
261 | |
262 | // Include headers relevant for the enabled instruction sets. |
263 | #if SIMDPP_USE_SSE2 |
264 | #include <xmmintrin.h> |
265 | #include <emmintrin.h> |
266 | #endif |
267 | |
268 | #if SIMDPP_USE_SSE3 |
269 | #include <pmmintrin.h> |
270 | #endif |
271 | |
272 | #if SIMDPP_USE_SSSE3 |
273 | #include <tmmintrin.h> |
274 | #endif |
275 | |
276 | #if SIMDPP_USE_SSE4_1 |
277 | #include <smmintrin.h> |
278 | #endif |
279 | |
280 | #if SIMDPP_USE_AVX |
281 | #include <immintrin.h> |
282 | #endif |
283 | |
284 | #if SIMDPP_USE_AVX2 |
285 | #include <immintrin.h> |
286 | #endif |
287 | |
288 | #if SIMDPP_USE_FMA3 |
289 | #include <immintrin.h> |
290 | #endif |
291 | |
292 | #if SIMDPP_USE_FMA4 |
293 | #include <x86intrin.h> |
294 | #if SIMDPP_USE_FMA3 |
295 | #error "X86_FMA3 and X86_FMA4 can't be used together" |
296 | #endif |
297 | #endif |
298 | |
299 | #if SIMDPP_USE_XOP |
300 | #include <x86intrin.h> |
301 | #endif |
302 | |
303 | #if SIMDPP_USE_AVX512F || SIMDPP_USE_AVX512BW |
304 | #include <immintrin.h> |
305 | #endif |
306 | |
307 | #if SIMDPP_USE_NEON || SIMDPP_USE_NEON_FLT_SP |
308 | #include <arm_neon.h> |
309 | #endif |
310 | |
311 | #if SIMDPP_USE_ALTIVEC |
312 | #include <altivec.h> |
313 | #undef vector |
314 | #undef pixel |
315 | #undef bool |
316 | #endif |
317 | |
318 | #if SIMDPP_USE_MSA |
319 | #include <msa.h> |
320 | #endif |
321 | |
322 | // helper macros |
323 | #if __amd64__ || __x86_64__ || _M_AMD64 || __aarch64__ || __powerpc64__ |
324 | #define SIMDPP_64_BITS 1 |
325 | #define SIMDPP_32_BITS 0 |
326 | #else |
327 | #define SIMDPP_32_BITS 1 |
328 | #define SIMDPP_64_BITS 0 |
329 | #endif |
330 | |
331 | #if SIMDPP_USE_NEON && SIMDPP_64_BITS |
332 | #undef SIMDPP_USE_NEON_FLT_SP |
333 | #define SIMDPP_USE_NEON_FLT_SP 1 |
334 | #endif |
335 | |
336 | #if SIMDPP_USE_ALTIVEC |
337 | #ifndef __BYTE_ORDER__ |
338 | #error "Could not determine byte order" |
339 | #elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ |
340 | #define SIMDPP_LITTLE_ENDIAN 1 |
341 | #define SIMDPP_BIG_ENDIAN 0 |
342 | #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ |
343 | #define SIMDPP_LITTLE_ENDIAN 0 |
344 | #define SIMDPP_BIG_ENDIAN 1 |
345 | #else |
346 | #error "Could not determine byte order" |
347 | #endif |
348 | #endif |
349 | |
350 | #define SIMDPP_USE_NEON32 (SIMDPP_USE_NEON && SIMDPP_32_BITS) |
351 | #define SIMDPP_USE_NEON64 (SIMDPP_USE_NEON && SIMDPP_64_BITS) |
352 | #define SIMDPP_USE_NEON32_FLT_SP (SIMDPP_USE_NEON_FLT_SP && SIMDPP_32_BITS) |
353 | #define SIMDPP_USE_NEON_NO_FLT_SP (SIMDPP_USE_NEON && !SIMDPP_USE_NEON_FLT_SP) |
354 | |
355 | #if __i386__ || __i386 || _M_IX86 || __amd64__ || __x64_64__ || _M_AMD64 || _M_X64 |
356 | #define SIMDPP_X86 1 |
357 | #elif _M_ARM || __arm__ || __aarch64__ |
358 | #define SIMDPP_ARM 1 |
359 | #elif __powerpc__ || __powerpc64__ |
360 | #define SIMDPP_PPC 1 |
361 | #elif __mips__ |
362 | #define SIMDPP_MIPS 1 |
363 | #endif |
364 | |
365 | /** @def SIMDPP_ARCH_NAME |
366 | Usable in contexts where a string is required |
367 | */ |
368 | #define SIMDPP_ARCH_NAME SIMDPP_PP_STRINGIZE(SIMDPP_ARCH_NAMESPACE) |
369 | |
370 | // misc macros |
371 | #if __GNUC__ |
372 | #define SIMDPP_INL __attribute__((__always_inline__)) inline |
373 | #elif _MSC_VER |
374 | #define SIMDPP_INL __forceinline |
375 | #else |
376 | #define SIMDPP_INL inline |
377 | #endif |
378 | |
379 | #if defined(__GNUC__) || defined(__clang__) |
380 | #define SIMDPP_DEPRECATED(msg) __attribute__ ((deprecated(msg))) |
381 | #else |
382 | #define SIMDPP_DEPRECATED(msg) |
383 | #endif |
384 | |
385 | #if __GNUC__ |
386 | #define SIMDPP_ALIGN(X) __attribute__((__aligned__(X))) |
387 | #elif _MSC_VER |
388 | #define SIMDPP_ALIGN(X) __declspec(align(X)) |
389 | #else |
390 | #error "Unsupported compiler" |
391 | #endif |
392 | |
393 | #define SIMDPP_LIBRARY_VERSION_CXX11 1 |
394 | #define SIMDPP_LIBRARY_VERSION_CXX98 0 |
395 | |
396 | #include <simdpp/detail/workarounds.h> |
397 | #include <simdpp/deprecations.h> |
398 | |
399 | // #define SIMDPP_EXPR_DEBUG 1 |
400 | |
401 | // FIXME: unused (workarounds for AMD CPUs) |
402 | // #define SIMDPP_USE_AMD |
403 | |
404 | #endif |
405 | |