1/* Copyright (C) 2013 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#ifndef LIBSIMDPP_SIMD_SETUP_ARCH_H
9#define LIBSIMDPP_SIMD_SETUP_ARCH_H
10
11#include <simdpp/detail/preprocessor.h>
12#include <simdpp/detail/preprocessor/stringize.hpp>
13
14// Set up macros for current architecture. Note that this file may be included
15// multiple times, more information on the caveats are within the file.
16#include <simdpp/detail/preprocess_single_arch.h>
17
18// Set up main feature macros
19#if SIMDPP_ARCH_PP_USE_NULL
20#define SIMDPP_USE_NULL 1
21#else
22#define SIMDPP_USE_NULL 0
23#endif
24#if SIMDPP_ARCH_PP_USE_SSE2
25#define SIMDPP_USE_SSE2 1
26#else
27#define SIMDPP_USE_SSE2 0
28#endif
29#if SIMDPP_ARCH_PP_USE_SSE3
30#define SIMDPP_USE_SSE3 1
31#else
32#define SIMDPP_USE_SSE3 0
33#endif
34#if SIMDPP_ARCH_PP_USE_SSSE3
35#define SIMDPP_USE_SSSE3 1
36#else
37#define SIMDPP_USE_SSSE3 0
38#endif
39#if SIMDPP_ARCH_PP_USE_SSE4_1
40#define SIMDPP_USE_SSE4_1 1
41#else
42#define SIMDPP_USE_SSE4_1 0
43#endif
44#if SIMDPP_ARCH_PP_USE_X86_POPCNT_INSN
45#define SIMDPP_USE_X86_POPCNT_INSN 1
46#else
47#define SIMDPP_USE_X86_POPCNT_INSN 0
48#endif
49#if SIMDPP_ARCH_PP_USE_AVX
50#define SIMDPP_USE_AVX 1
51#else
52#define SIMDPP_USE_AVX 0
53#endif
54#if SIMDPP_ARCH_PP_USE_AVX2
55#define SIMDPP_USE_AVX2 1
56#else
57#define SIMDPP_USE_AVX2 0
58#endif
59#if SIMDPP_ARCH_PP_USE_FMA3
60#define SIMDPP_USE_FMA3 1
61#else
62#define SIMDPP_USE_FMA3 0
63#endif
64#if SIMDPP_ARCH_PP_USE_FMA4
65#define SIMDPP_USE_FMA4 1
66#else
67#define SIMDPP_USE_FMA4 0
68#endif
69#if SIMDPP_ARCH_PP_USE_XOP
70#define SIMDPP_USE_XOP 1
71#else
72#define SIMDPP_USE_XOP 0
73#endif
74#if SIMDPP_ARCH_PP_USE_AVX512F
75#define SIMDPP_USE_AVX512F 1
76#else
77#define SIMDPP_USE_AVX512F 0
78#endif
79#if SIMDPP_ARCH_PP_USE_AVX512BW
80#define SIMDPP_USE_AVX512BW 1
81#else
82#define SIMDPP_USE_AVX512BW 0
83#endif
84#if SIMDPP_ARCH_PP_USE_AVX512DQ
85#define SIMDPP_USE_AVX512DQ 1
86#else
87#define SIMDPP_USE_AVX512DQ 0
88#endif
89#if SIMDPP_ARCH_PP_USE_AVX512VL
90#define SIMDPP_USE_AVX512VL 1
91#else
92#define SIMDPP_USE_AVX512VL 0
93#endif
94#if SIMDPP_ARCH_PP_USE_NEON
95#define SIMDPP_USE_NEON 1
96#else
97#define SIMDPP_USE_NEON 0
98#endif
99#if SIMDPP_ARCH_PP_USE_NEON_FLT_SP
100#define SIMDPP_USE_NEON_FLT_SP 1
101#else
102#define SIMDPP_USE_NEON_FLT_SP 0
103#endif
104#if SIMDPP_ARCH_PP_USE_ALTIVEC
105#define SIMDPP_USE_ALTIVEC 1
106#else
107#define SIMDPP_USE_ALTIVEC 0
108#endif
109#if SIMDPP_ARCH_PP_USE_VSX_206
110#define SIMDPP_USE_VSX_206 1
111#else
112#define SIMDPP_USE_VSX_206 0
113#endif
114#if SIMDPP_ARCH_PP_USE_VSX_207
115#define SIMDPP_USE_VSX_207 1
116#else
117#define SIMDPP_USE_VSX_207 0
118#endif
119#if SIMDPP_ARCH_PP_USE_MSA
120#define SIMDPP_USE_MSA 1
121#else
122#define SIMDPP_USE_MSA 0
123#endif
124
125// Generate SIMDPP_ARCH_NAMESPACE. It's a human-readable identifier depending
126// on the enabled instruction sets
127#if SIMDPP_ARCH_PP_NS_USE_NULL
128#define SIMDPP_NS_ID_NULL SIMDPP_INSN_ID_NULL
129#else
130#define SIMDPP_NS_ID_NULL
131#endif
132#if SIMDPP_ARCH_PP_NS_USE_SSE2
133#define SIMDPP_NS_ID_SSE2 SIMDPP_INSN_ID_SSE2
134#else
135#define SIMDPP_NS_ID_SSE2
136#endif
137#if SIMDPP_ARCH_PP_NS_USE_SSE3
138#define SIMDPP_NS_ID_SSE3 SIMDPP_INSN_ID_SSE3
139#else
140#define SIMDPP_NS_ID_SSE3
141#endif
142#if SIMDPP_ARCH_PP_NS_USE_SSSE3
143#define SIMDPP_NS_ID_SSSE3 SIMDPP_INSN_ID_SSSE3
144#else
145#define SIMDPP_NS_ID_SSSE3
146#endif
147#if SIMDPP_ARCH_PP_NS_USE_SSE4_1
148#define SIMDPP_NS_ID_SSE4_1 SIMDPP_INSN_ID_SSE4_1
149#else
150#define SIMDPP_NS_ID_SSE4_1
151#endif
152#if SIMDPP_ARCH_PP_NS_USE_POPCNT_INSN
153#define SIMDPP_NS_ID_POPCNT_INSN SIMDPP_INSN_ID_POPCNT_INSN
154#else
155#define SIMDPP_NS_ID_POPCNT_INSN
156#endif
157#if SIMDPP_ARCH_PP_NS_USE_AVX
158#define SIMDPP_NS_ID_AVX SIMDPP_INSN_ID_AVX
159#else
160#define SIMDPP_NS_ID_AVX
161#endif
162#if SIMDPP_ARCH_PP_NS_USE_AVX2
163#define SIMDPP_NS_ID_AVX2 SIMDPP_INSN_ID_AVX2
164#else
165#define SIMDPP_NS_ID_AVX2
166#endif
167#if SIMDPP_ARCH_PP_NS_USE_FMA3
168#define SIMDPP_NS_ID_FMA3 SIMDPP_INSN_ID_FMA3
169#else
170#define SIMDPP_NS_ID_FMA3
171#endif
172#if SIMDPP_ARCH_PP_NS_USE_FMA4
173#define SIMDPP_NS_ID_FMA4 SIMDPP_INSN_ID_FMA4
174#else
175#define SIMDPP_NS_ID_FMA4
176#endif
177#if SIMDPP_ARCH_PP_NS_USE_XOP
178#define SIMDPP_NS_ID_XOP SIMDPP_INSN_ID_XOP
179#else
180#define SIMDPP_NS_ID_XOP
181#endif
182#if SIMDPP_ARCH_PP_NS_USE_AVX512F
183#define SIMDPP_NS_ID_AVX512F SIMDPP_INSN_ID_AVX512F
184#else
185#define SIMDPP_NS_ID_AVX512F
186#endif
187#if SIMDPP_ARCH_PP_NS_USE_AVX512BW
188#define SIMDPP_NS_ID_AVX512BW SIMDPP_INSN_ID_AVX512BW
189#else
190#define SIMDPP_NS_ID_AVX512BW
191#endif
192#if SIMDPP_ARCH_PP_NS_USE_AVX512DQ
193#define SIMDPP_NS_ID_AVX512DQ SIMDPP_INSN_ID_AVX512DQ
194#else
195#define SIMDPP_NS_ID_AVX512DQ
196#endif
197#if SIMDPP_ARCH_PP_NS_USE_AVX512VL
198#define SIMDPP_NS_ID_AVX512VL SIMDPP_INSN_ID_AVX512VL
199#else
200#define SIMDPP_NS_ID_AVX512VL
201#endif
202#if SIMDPP_ARCH_PP_NS_USE_NEON
203#define SIMDPP_NS_ID_NEON SIMDPP_INSN_ID_NEON
204#else
205#define SIMDPP_NS_ID_NEON
206#endif
207#if SIMDPP_ARCH_PP_NS_USE_NEON_FLT_SP
208#define SIMDPP_NS_ID_NEON_FLT_SP SIMDPP_INSN_ID_NEON_FLT_SP
209#else
210#define SIMDPP_NS_ID_NEON_FLT_SP
211#endif
212#if SIMDPP_ARCH_PP_NS_USE_ALTIVEC
213#define SIMDPP_NS_ID_ALTIVEC SIMDPP_INSN_ID_ALTIVEC
214#else
215#define SIMDPP_NS_ID_ALTIVEC
216#endif
217#if SIMDPP_ARCH_PP_NS_USE_VSX_206
218#define SIMDPP_NS_ID_VSX_206 SIMDPP_INSN_ID_VSX_206
219#else
220#define SIMDPP_NS_ID_VSX_206
221#endif
222#if SIMDPP_ARCH_PP_NS_USE_VSX_207
223#define SIMDPP_NS_ID_VSX_207 SIMDPP_INSN_ID_VSX_207
224#else
225#define SIMDPP_NS_ID_VSX_207
226#endif
227#if SIMDPP_ARCH_PP_NS_USE_MSA
228#define SIMDPP_NS_ID_MSA SIMDPP_INSN_ID_MSA
229#else
230#define SIMDPP_NS_ID_MSA
231#endif
232
233#define SIMDPP_ARCH_NAMESPACE SIMDPP_PP_PASTE22(arch, \
234 SIMDPP_NS_ID_NULL, \
235 SIMDPP_NS_ID_SSE2, \
236 SIMDPP_NS_ID_SSE3, \
237 SIMDPP_NS_ID_SSSE3, \
238 SIMDPP_NS_ID_SSE4_1, \
239 SIMDPP_NS_ID_POPCNT_INSN, \
240 SIMDPP_NS_ID_AVX, \
241 SIMDPP_NS_ID_AVX2, \
242 SIMDPP_NS_ID_AVX512F, \
243 SIMDPP_NS_ID_AVX512BW, \
244 SIMDPP_NS_ID_AVX512DQ, \
245 SIMDPP_NS_ID_AVX512VL, \
246 SIMDPP_NS_ID_FMA3, \
247 SIMDPP_NS_ID_FMA4, \
248 SIMDPP_NS_ID_XOP, \
249 SIMDPP_NS_ID_NEON, \
250 SIMDPP_NS_ID_NEON_FLT_SP, \
251 SIMDPP_NS_ID_MSA, \
252 SIMDPP_NS_ID_ALTIVEC, \
253 SIMDPP_NS_ID_VSX_206, \
254 SIMDPP_NS_ID_VSX_207)
255
256#define SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE SIMDPP_ARCH_NAMESPACE
257#define SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_FOR_DISPATCH 0
258#include <simdpp/dispatch/preprocess_single_compile_arch.h>
259#undef SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_FOR_DISPATCH
260#undef SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE
261
262// Include headers relevant for the enabled instruction sets.
263#if SIMDPP_USE_SSE2
264 #include <xmmintrin.h>
265 #include <emmintrin.h>
266#endif
267
268#if SIMDPP_USE_SSE3
269 #include <pmmintrin.h>
270#endif
271
272#if SIMDPP_USE_SSSE3
273 #include <tmmintrin.h>
274#endif
275
276#if SIMDPP_USE_SSE4_1
277 #include <smmintrin.h>
278#endif
279
280#if SIMDPP_USE_AVX
281 #include <immintrin.h>
282#endif
283
284#if SIMDPP_USE_AVX2
285 #include <immintrin.h>
286#endif
287
288#if SIMDPP_USE_FMA3
289 #include <immintrin.h>
290#endif
291
292#if SIMDPP_USE_FMA4
293 #include <x86intrin.h>
294 #if SIMDPP_USE_FMA3
295 #error "X86_FMA3 and X86_FMA4 can't be used together"
296 #endif
297#endif
298
299#if SIMDPP_USE_XOP
300 #include <x86intrin.h>
301#endif
302
303#if SIMDPP_USE_AVX512F || SIMDPP_USE_AVX512BW
304 #include <immintrin.h>
305#endif
306
307#if SIMDPP_USE_NEON || SIMDPP_USE_NEON_FLT_SP
308 #include <arm_neon.h>
309#endif
310
311#if SIMDPP_USE_ALTIVEC
312 #include <altivec.h>
313 #undef vector
314 #undef pixel
315 #undef bool
316#endif
317
318#if SIMDPP_USE_MSA
319 #include <msa.h>
320#endif
321
322// helper macros
323#if __amd64__ || __x86_64__ || _M_AMD64 || __aarch64__ || __powerpc64__
324#define SIMDPP_64_BITS 1
325#define SIMDPP_32_BITS 0
326#else
327#define SIMDPP_32_BITS 1
328#define SIMDPP_64_BITS 0
329#endif
330
331#if SIMDPP_USE_NEON && SIMDPP_64_BITS
332#undef SIMDPP_USE_NEON_FLT_SP
333#define SIMDPP_USE_NEON_FLT_SP 1
334#endif
335
336#if SIMDPP_USE_ALTIVEC
337 #ifndef __BYTE_ORDER__
338 #error "Could not determine byte order"
339 #elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
340 #define SIMDPP_LITTLE_ENDIAN 1
341 #define SIMDPP_BIG_ENDIAN 0
342 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
343 #define SIMDPP_LITTLE_ENDIAN 0
344 #define SIMDPP_BIG_ENDIAN 1
345 #else
346 #error "Could not determine byte order"
347 #endif
348#endif
349
350#define SIMDPP_USE_NEON32 (SIMDPP_USE_NEON && SIMDPP_32_BITS)
351#define SIMDPP_USE_NEON64 (SIMDPP_USE_NEON && SIMDPP_64_BITS)
352#define SIMDPP_USE_NEON32_FLT_SP (SIMDPP_USE_NEON_FLT_SP && SIMDPP_32_BITS)
353#define SIMDPP_USE_NEON_NO_FLT_SP (SIMDPP_USE_NEON && !SIMDPP_USE_NEON_FLT_SP)
354
355#if __i386__ || __i386 || _M_IX86 || __amd64__ || __x64_64__ || _M_AMD64 || _M_X64
356#define SIMDPP_X86 1
357#elif _M_ARM || __arm__ || __aarch64__
358#define SIMDPP_ARM 1
359#elif __powerpc__ || __powerpc64__
360#define SIMDPP_PPC 1
361#elif __mips__
362#define SIMDPP_MIPS 1
363#endif
364
365/** @def SIMDPP_ARCH_NAME
366 Usable in contexts where a string is required
367*/
368#define SIMDPP_ARCH_NAME SIMDPP_PP_STRINGIZE(SIMDPP_ARCH_NAMESPACE)
369
370// misc macros
371#if __GNUC__
372#define SIMDPP_INL __attribute__((__always_inline__)) inline
373#elif _MSC_VER
374#define SIMDPP_INL __forceinline
375#else
376#define SIMDPP_INL inline
377#endif
378
379#if defined(__GNUC__) || defined(__clang__)
380#define SIMDPP_DEPRECATED(msg) __attribute__ ((deprecated(msg)))
381#else
382#define SIMDPP_DEPRECATED(msg)
383#endif
384
385#if __GNUC__
386#define SIMDPP_ALIGN(X) __attribute__((__aligned__(X)))
387#elif _MSC_VER
388#define SIMDPP_ALIGN(X) __declspec(align(X))
389#else
390#error "Unsupported compiler"
391#endif
392
393#define SIMDPP_LIBRARY_VERSION_CXX11 1
394#define SIMDPP_LIBRARY_VERSION_CXX98 0
395
396#include <simdpp/detail/workarounds.h>
397#include <simdpp/deprecations.h>
398
399// #define SIMDPP_EXPR_DEBUG 1
400
401// FIXME: unused (workarounds for AMD CPUs)
402// #define SIMDPP_USE_AMD
403
404#endif
405