| 1 | /* Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt> |
| 2 | |
| 3 | Distributed under the Boost Software License, Version 1.0. |
| 4 | (See accompanying file LICENSE_1_0.txt or copy at |
| 5 | http://www.boost.org/LICENSE_1_0.txt) |
| 6 | */ |
| 7 | |
| 8 | #include <simdpp/detail/preprocessor/stringize.hpp> |
| 9 | #include <simdpp/dispatch/arch.h> |
| 10 | #include <simdpp/dispatch/dispatcher.h> |
| 11 | |
| 12 | /* This file setups this_compile_arch() and create_fn_version() functions |
| 13 | for a particular architecture. |
| 14 | |
| 15 | The file may be included into other files multiple times, it has been |
| 16 | extracted to reduce code duplication. |
| 17 | |
| 18 | The file depends on preprocess_single_arch.h being included beforehand and |
| 19 | its output macros not being overridden yet. Additionally, it depends on |
| 20 | SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE being defined to appropriate |
| 21 | namespace for the architecture. |
| 22 | |
| 23 | The file also depends on the definition of |
| 24 | SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_FOR_DISPATCH. If it is defined to 0, only |
| 25 | this_compile_arch() is available as |
| 26 | simdpp::SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE::this_compile_arch(). |
| 27 | If the macro is defined to nonzero, then both this_compile_arch() and |
| 28 | create_fn_version() are available at |
| 29 | simdpp::SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE::this_compile_arch() and |
| 30 | simdpp::SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE::create_fn_version(). |
| 31 | |
| 32 | This separation is required because the current architecture and the |
| 33 | set of dispatched architectures (if enabled) are defined separately, and |
| 34 | it's hard to figure out whether there's any overlap. If it is, then we need |
| 35 | to make sure that all functions are defined only once in each namespace. |
| 36 | */ |
| 37 | namespace simdpp { |
| 38 | namespace SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE { |
| 39 | |
| 40 | #if SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_FOR_DISPATCH |
| 41 | namespace detail { |
| 42 | #endif |
| 43 | |
| 44 | /** Returns the instruction set flags that will be required by the currently |
| 45 | compiled code |
| 46 | */ |
| 47 | static inline Arch this_compile_arch() |
| 48 | { |
| 49 | Arch res = Arch::NONE_NULL; |
| 50 | #if SIMDPP_ARCH_PP_USE_SSE2 |
| 51 | res |= Arch::X86_SSE2; |
| 52 | #endif |
| 53 | #if SIMDPP_ARCH_PP_USE_SSE3 |
| 54 | res |= Arch::X86_SSE3; |
| 55 | #endif |
| 56 | #if SIMDPP_ARCH_PP_USE_SSSE3 |
| 57 | res |= Arch::X86_SSSE3; |
| 58 | #endif |
| 59 | #if SIMDPP_ARCH_PP_USE_SSE4_1 |
| 60 | res |= Arch::X86_SSE4_1; |
| 61 | #endif |
| 62 | #if SIMDPP_ARCH_PP_USE_X86_POPCNT_INSN |
| 63 | res |= Arch::X86_POPCNT_INSN; |
| 64 | #endif |
| 65 | #if SIMDPP_ARCH_PP_USE_AVX |
| 66 | res |= Arch::X86_AVX; |
| 67 | #endif |
| 68 | #if SIMDPP_ARCH_PP_USE_AVX2 |
| 69 | res |= Arch::X86_AVX2; |
| 70 | #endif |
| 71 | #if SIMDPP_ARCH_PP_USE_FMA3 |
| 72 | res |= Arch::X86_FMA3; |
| 73 | #endif |
| 74 | #if SIMDPP_ARCH_PP_USE_FMA4 |
| 75 | res |= Arch::X86_FMA4; |
| 76 | #endif |
| 77 | #if SIMDPP_ARCH_PP_USE_XOP |
| 78 | res |= Arch::X86_XOP; |
| 79 | #endif |
| 80 | #if SIMDPP_ARCH_PP_USE_AVX512F |
| 81 | res |= Arch::X86_AVX512F; |
| 82 | #endif |
| 83 | #if SIMDPP_ARCH_PP_USE_AVX512BW |
| 84 | res |= Arch::X86_AVX512BW; |
| 85 | #endif |
| 86 | #if SIMDPP_ARCH_PP_USE_AVX512DQ |
| 87 | res |= Arch::X86_AVX512DQ; |
| 88 | #endif |
| 89 | #if SIMDPP_ARCH_PP_USE_AVX512VL |
| 90 | res |= Arch::X86_AVX512VL; |
| 91 | #endif |
| 92 | #if SIMDPP_ARCH_PP_USE_NEON |
| 93 | res |= Arch::ARM_NEON; |
| 94 | #endif |
| 95 | #if SIMDPP_ARCH_PP_USE_NEON_FLT_SP |
| 96 | res |= Arch::ARM_NEON_FLT_SP; |
| 97 | #endif |
| 98 | #if SIMDPP_ARCH_PP_USE_ALTIVEC |
| 99 | res |= Arch::POWER_ALTIVEC; |
| 100 | #endif |
| 101 | #if SIMDPP_ARCH_PP_USE_VSX_206 |
| 102 | res |= Arch::POWER_VSX_206; |
| 103 | #endif |
| 104 | #if SIMDPP_ARCH_PP_USE_VSX_207 |
| 105 | res |= Arch::POWER_VSX_207; |
| 106 | #endif |
| 107 | #if SIMDPP_ARCH_PP_USE_MSA |
| 108 | res |= Arch::MIPS_MSA; |
| 109 | #endif |
| 110 | return res; |
| 111 | } |
| 112 | |
| 113 | #if SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_FOR_DISPATCH |
| 114 | |
| 115 | template<class FunPtr> |
| 116 | static inline simdpp::detail::FnVersion create_fn_version(FunPtr fun_ptr) |
| 117 | { |
| 118 | simdpp::detail::FnVersion res; |
| 119 | res.needed_arch = simdpp::SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE::detail::this_compile_arch(); |
| 120 | res.fun_ptr = reinterpret_cast<simdpp::detail::VoidFunPtr>(fun_ptr); |
| 121 | res.arch_name = SIMDPP_PP_STRINGIZE(SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE); |
| 122 | return res; |
| 123 | } |
| 124 | |
| 125 | } // namespace detail |
| 126 | #endif |
| 127 | |
| 128 | } // namespace SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE |
| 129 | } // namespace simdpp |
| 130 | |
| 131 | // this file may be included several times; thus no include guard present |
| 132 | |