1 | /* Copyright (C) 2011-2012 Povilas Kanapickas <povilas@radix.lt> |
2 | |
3 | Distributed under the Boost Software License, Version 1.0. |
4 | (See accompanying file LICENSE_1_0.txt or copy at |
5 | http://www.boost.org/LICENSE_1_0.txt) |
6 | */ |
7 | |
8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_WORKAROUNDS_H |
9 | #define LIBSIMDPP_SIMDPP_DETAIL_WORKAROUNDS_H |
10 | |
11 | // this file contains workarounds for common compiler problems |
12 | |
13 | // the implementation of XOP's com instruction is buggy in clang 3.5 and 3.4. |
14 | #if defined(__clang__) && (__clang_major__ == 3) && (__clang_minor__ < 6) |
15 | #define SIMDPP_WORKAROUND_XOP_COM 1 |
16 | #endif |
17 | |
18 | #if SIMDPP_USE_NEON64 |
19 | #if (__GNUC__ == 4) && (__GNUC_MINOR__ <= 8) && !defined(__INTEL_COMPILER) && !defined(__clang__) |
20 | /* GCC 4.8 and older misses various functions: |
21 | - vdupq_laneq_* family of functions |
22 | - vreinterpretq_f64_* family of functions |
23 | - vreinterpretq_*_f64 family of functions |
24 | */ |
25 | #error "The first supported GCC version for aarch64 NEON is 4.9" |
26 | #endif |
27 | |
28 | #if (__GNUC__ == 4) && (__GNUC_MINOR__ <= 9) && !defined(__INTEL_COMPILER) && !defined(__clang__) |
29 | #define vmul_f64(x, y) ((float64x1_t)( ((float64x1_t)(x)) * ((float64x1_t)(y)) )) |
30 | #endif |
31 | #endif |
32 | |
33 | #if SIMDPP_USE_AVX512F |
34 | #if defined(__GNUC__) && (__GNUC__ < 6) && !defined(__INTEL_COMPILER) && !defined(__clang__) |
35 | /* GCC 5.x and older have the following bugs: |
36 | - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70059. |
37 | _mm512_inserti64x4(x, y, 0) and related intrinsics result in wrong code. |
38 | _mm512_castsi256_si512 is not available in GCC 4.9, thus there's no way |
39 | to convert between 256-bit and 512-bit vectors. |
40 | - Error: invalid register operand for `vpsrlw' when compiling shift code |
41 | on old binutils |
42 | */ |
43 | #error "The first supported GCC version for AVX512F is 6.0" |
44 | #endif |
45 | |
46 | #if (!defined(__APPLE__) && ((__clang_major__ == 4) || (__clang_major__ == 5))) || \ |
47 | (defined(__APPLE__) && (__clang_major__ == 9)) |
48 | // Internal compiler errors when trying to select wrong instruction for specific |
49 | // combination of shuffles. Not possible to work around as shuffle detection is |
50 | // quite clever. |
51 | #error Clang 4.x-5.x is not supported on AVX512F due to compiler bugs. |
52 | #endif |
53 | #endif |
54 | |
55 | #if SIMDPP_USE_AVX512VL |
56 | #if !defined(__APPLE__) && (__clang_major__ == 3) |
57 | // clang 3.9 and older incorrectly compile reduce_{min,max} for int32 and uint32 |
58 | #error Clang 3.9 and older is not supported on AVX512VL due to compiler bugs. |
59 | #endif |
60 | #endif |
61 | |
62 | #if SIMDPP_USE_AVX || SIMDPP_USE_AVX2 |
63 | #if (__clang_major__ == 3) && (__clang_minor__ == 6) |
64 | /* See https://llvm.org/bugs/show_bug.cgi?id=23441. Clang does not generate |
65 | correct floating-point code for basic 256-bit floating-point operations, |
66 | such as those resulting from _mm256_set_ps, _mm256_load_ps. Due to the |
67 | nature of affected operations, the bug is almost impossible to work around |
68 | reliably. |
69 | */ |
70 | #error AVX and AVX2 are not supported on clang 3.6 due to compiler bugs |
71 | #endif |
72 | #endif |
73 | |
74 | #if SIMDPP_USE_AVX |
75 | #if (__GNUC__ == 4) && (__GNUC_MINOR__ == 4) && !defined(__INTEL_COMPILER) && !defined(__clang__) |
76 | /* GCC emits "Error: operand size mismatch for `vmovq'" when compiling |
77 | 256-bit shuffling intrinsics. No workaround has been found yet |
78 | */ |
79 | #error AVX is not supported in GCC 4.4 due to compiler bugs |
80 | #endif |
81 | #endif |
82 | |
83 | #if (__clang_major__ == 3) && (__clang_minor__ <= 4) |
84 | #define SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS 1 |
85 | /* Clang 3.4 and older may crash when the following intrinsics are used with |
86 | arguments that are known at compile time: _mm256_sll_epi{16,32,64}, |
87 | _mm256_srl_epi{16,32,64}, _mm256_sra_epi{16,32} |
88 | */ |
89 | #endif |
90 | |
91 | #if SIMDPP_USE_ALTIVEC |
92 | #if defined(__GNUC__) && (__GNUC__ < 6) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && \ |
93 | !defined(__INTEL_COMPILER) && !defined(__clang__) |
94 | // Internal compiler errors or wrong behaviour on various SIMD memory operations |
95 | #error GCC 5.x and older not supported on PPC little-endian due to compiler bugs. |
96 | #endif |
97 | #endif |
98 | |
99 | #if SIMDPP_USE_VSX_206 |
100 | #if defined(__GNUC__) && (__GNUC__ < 6) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && \ |
101 | !defined(__INTEL_COMPILER) && !defined(__clang__) |
102 | // Internal compiler errors or wrong behaviour on various SIMD memory operations |
103 | #error GCC 5.x and older not supported on VSX big-endian due to compiler bugs. |
104 | #endif |
105 | #endif |
106 | |
107 | #if SIMDPP_USE_AVX512F && (__clang_major__ == 3) |
108 | // Clang does not have _MM_CMPINT_* definitions up to Clang 4.0. |
109 | #ifndef _MM_CMPINT_EQ |
110 | #define _MM_CMPINT_EQ 0 |
111 | #endif |
112 | #ifndef _MM_CMPINT_LT |
113 | #define _MM_CMPINT_LT 1 |
114 | #endif |
115 | #ifndef _MM_CMPINT_LE |
116 | #define _MM_CMPINT_LE 2 |
117 | #endif |
118 | #ifndef _MM_CMPINT_FALSE |
119 | #define _MM_CMPINT_FALSE 3 |
120 | #endif |
121 | #ifndef _MM_CMPINT_NEQ |
122 | #define _MM_CMPINT_NEQ 4 |
123 | #endif |
124 | #ifndef _MM_CMPINT_NLT |
125 | #define _MM_CMPINT_NLT 5 |
126 | #endif |
127 | #ifndef _MM_CMPINT_NLE |
128 | #define _MM_CMPINT_NLE 6 |
129 | #endif |
130 | #ifndef _MM_CMPINT_TRUE |
131 | #define _MM_CMPINT_TRUE 7 |
132 | #endif |
133 | #endif |
134 | |
135 | namespace simdpp { |
136 | namespace SIMDPP_ARCH_NAMESPACE { |
137 | namespace detail { |
138 | |
139 | template<unsigned V> struct make_constexpr { static const unsigned value = V; }; |
140 | |
141 | } // namespace detail |
142 | } // namespace SIMDPP_ARCH_NAMESPACE |
143 | } // namespace simdpp |
144 | |
145 | #if __GNUC__ |
146 | #define SIMDPP_WORKAROUND_MAKE_CONSTEXPR(X) detail::make_constexpr<(X)>::value |
147 | #else |
148 | #define SIMDPP_WORKAROUND_MAKE_CONSTEXPR(X) (X) |
149 | #endif |
150 | |
151 | #endif |
152 | |