| 1 | /* Copyright (C) 2011-2012 Povilas Kanapickas <povilas@radix.lt> |
| 2 | |
| 3 | Distributed under the Boost Software License, Version 1.0. |
| 4 | (See accompanying file LICENSE_1_0.txt or copy at |
| 5 | http://www.boost.org/LICENSE_1_0.txt) |
| 6 | */ |
| 7 | |
| 8 | #ifndef LIBSIMDPP_SIMDPP_DETAIL_WORKAROUNDS_H |
| 9 | #define LIBSIMDPP_SIMDPP_DETAIL_WORKAROUNDS_H |
| 10 | |
| 11 | // this file contains workarounds for common compiler problems |
| 12 | |
| 13 | // the implementation of XOP's com instruction is buggy in clang 3.5 and 3.4. |
| 14 | #if defined(__clang__) && (__clang_major__ == 3) && (__clang_minor__ < 6) |
| 15 | #define SIMDPP_WORKAROUND_XOP_COM 1 |
| 16 | #endif |
| 17 | |
| 18 | #if SIMDPP_USE_NEON64 |
| 19 | #if (__GNUC__ == 4) && (__GNUC_MINOR__ <= 8) && !defined(__INTEL_COMPILER) && !defined(__clang__) |
| 20 | /* GCC 4.8 and older misses various functions: |
| 21 | - vdupq_laneq_* family of functions |
| 22 | - vreinterpretq_f64_* family of functions |
| 23 | - vreinterpretq_*_f64 family of functions |
| 24 | */ |
| 25 | #error "The first supported GCC version for aarch64 NEON is 4.9" |
| 26 | #endif |
| 27 | |
| 28 | #if (__GNUC__ == 4) && (__GNUC_MINOR__ <= 9) && !defined(__INTEL_COMPILER) && !defined(__clang__) |
| 29 | #define vmul_f64(x, y) ((float64x1_t)( ((float64x1_t)(x)) * ((float64x1_t)(y)) )) |
| 30 | #endif |
| 31 | #endif |
| 32 | |
| 33 | #if SIMDPP_USE_AVX512F |
| 34 | #if defined(__GNUC__) && (__GNUC__ < 6) && !defined(__INTEL_COMPILER) && !defined(__clang__) |
| 35 | /* GCC 5.x and older have the following bugs: |
| 36 | - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70059. |
| 37 | _mm512_inserti64x4(x, y, 0) and related intrinsics result in wrong code. |
| 38 | _mm512_castsi256_si512 is not available in GCC 4.9, thus there's no way |
| 39 | to convert between 256-bit and 512-bit vectors. |
| 40 | - Error: invalid register operand for `vpsrlw' when compiling shift code |
| 41 | on old binutils |
| 42 | */ |
| 43 | #error "The first supported GCC version for AVX512F is 6.0" |
| 44 | #endif |
| 45 | |
| 46 | #if (!defined(__APPLE__) && ((__clang_major__ == 4) || (__clang_major__ == 5))) || \ |
| 47 | (defined(__APPLE__) && (__clang_major__ == 9)) |
| 48 | // Internal compiler errors when trying to select wrong instruction for specific |
| 49 | // combination of shuffles. Not possible to work around as shuffle detection is |
| 50 | // quite clever. |
| 51 | #error Clang 4.x-5.x is not supported on AVX512F due to compiler bugs. |
| 52 | #endif |
| 53 | #endif |
| 54 | |
| 55 | #if SIMDPP_USE_AVX512VL |
| 56 | #if !defined(__APPLE__) && (__clang_major__ == 3) |
| 57 | // clang 3.9 and older incorrectly compile reduce_{min,max} for int32 and uint32 |
| 58 | #error Clang 3.9 and older is not supported on AVX512VL due to compiler bugs. |
| 59 | #endif |
| 60 | #endif |
| 61 | |
| 62 | #if SIMDPP_USE_AVX || SIMDPP_USE_AVX2 |
| 63 | #if (__clang_major__ == 3) && (__clang_minor__ == 6) |
| 64 | /* See https://llvm.org/bugs/show_bug.cgi?id=23441. Clang does not generate |
| 65 | correct floating-point code for basic 256-bit floating-point operations, |
| 66 | such as those resulting from _mm256_set_ps, _mm256_load_ps. Due to the |
| 67 | nature of affected operations, the bug is almost impossible to work around |
| 68 | reliably. |
| 69 | */ |
| 70 | #error AVX and AVX2 are not supported on clang 3.6 due to compiler bugs |
| 71 | #endif |
| 72 | #endif |
| 73 | |
| 74 | #if SIMDPP_USE_AVX |
| 75 | #if (__GNUC__ == 4) && (__GNUC_MINOR__ == 4) && !defined(__INTEL_COMPILER) && !defined(__clang__) |
| 76 | /* GCC emits "Error: operand size mismatch for `vmovq'" when compiling |
| 77 | 256-bit shuffling intrinsics. No workaround has been found yet |
| 78 | */ |
| 79 | #error AVX is not supported in GCC 4.4 due to compiler bugs |
| 80 | #endif |
| 81 | #endif |
| 82 | |
| 83 | #if (__clang_major__ == 3) && (__clang_minor__ <= 4) |
| 84 | #define SIMDPP_WORKAROUND_AVX2_SHIFT_INTRINSICS 1 |
| 85 | /* Clang 3.4 and older may crash when the following intrinsics are used with |
| 86 | arguments that are known at compile time: _mm256_sll_epi{16,32,64}, |
| 87 | _mm256_srl_epi{16,32,64}, _mm256_sra_epi{16,32} |
| 88 | */ |
| 89 | #endif |
| 90 | |
| 91 | #if SIMDPP_USE_ALTIVEC |
| 92 | #if defined(__GNUC__) && (__GNUC__ < 6) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && \ |
| 93 | !defined(__INTEL_COMPILER) && !defined(__clang__) |
| 94 | // Internal compiler errors or wrong behaviour on various SIMD memory operations |
| 95 | #error GCC 5.x and older not supported on PPC little-endian due to compiler bugs. |
| 96 | #endif |
| 97 | #endif |
| 98 | |
| 99 | #if SIMDPP_USE_VSX_206 |
| 100 | #if defined(__GNUC__) && (__GNUC__ < 6) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ && \ |
| 101 | !defined(__INTEL_COMPILER) && !defined(__clang__) |
| 102 | // Internal compiler errors or wrong behaviour on various SIMD memory operations |
| 103 | #error GCC 5.x and older not supported on VSX big-endian due to compiler bugs. |
| 104 | #endif |
| 105 | #endif |
| 106 | |
| 107 | #if SIMDPP_USE_AVX512F && (__clang_major__ == 3) |
| 108 | // Clang does not have _MM_CMPINT_* definitions up to Clang 4.0. |
| 109 | #ifndef _MM_CMPINT_EQ |
| 110 | #define _MM_CMPINT_EQ 0 |
| 111 | #endif |
| 112 | #ifndef _MM_CMPINT_LT |
| 113 | #define _MM_CMPINT_LT 1 |
| 114 | #endif |
| 115 | #ifndef _MM_CMPINT_LE |
| 116 | #define _MM_CMPINT_LE 2 |
| 117 | #endif |
| 118 | #ifndef _MM_CMPINT_FALSE |
| 119 | #define _MM_CMPINT_FALSE 3 |
| 120 | #endif |
| 121 | #ifndef _MM_CMPINT_NEQ |
| 122 | #define _MM_CMPINT_NEQ 4 |
| 123 | #endif |
| 124 | #ifndef _MM_CMPINT_NLT |
| 125 | #define _MM_CMPINT_NLT 5 |
| 126 | #endif |
| 127 | #ifndef _MM_CMPINT_NLE |
| 128 | #define _MM_CMPINT_NLE 6 |
| 129 | #endif |
| 130 | #ifndef _MM_CMPINT_TRUE |
| 131 | #define _MM_CMPINT_TRUE 7 |
| 132 | #endif |
| 133 | #endif |
| 134 | |
| 135 | namespace simdpp { |
| 136 | namespace SIMDPP_ARCH_NAMESPACE { |
| 137 | namespace detail { |
| 138 | |
| 139 | template<unsigned V> struct make_constexpr { static const unsigned value = V; }; |
| 140 | |
| 141 | } // namespace detail |
| 142 | } // namespace SIMDPP_ARCH_NAMESPACE |
| 143 | } // namespace simdpp |
| 144 | |
| 145 | #if __GNUC__ |
| 146 | #define SIMDPP_WORKAROUND_MAKE_CONSTEXPR(X) detail::make_constexpr<(X)>::value |
| 147 | #else |
| 148 | #define SIMDPP_WORKAROUND_MAKE_CONSTEXPR(X) (X) |
| 149 | #endif |
| 150 | |
| 151 | #endif |
| 152 | |