1/* Copyright (C) 2013-2017 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6*/
7
8#include <simdpp/detail/preprocessor/stringize.hpp>
9#include <simdpp/dispatch/arch.h>
10#include <simdpp/dispatch/dispatcher.h>
11
12/* This file setups this_compile_arch() and create_fn_version() functions
13 for a particular architecture.
14
15 The file may be included into other files multiple times, it has been
16 extracted to reduce code duplication.
17
18 The file depends on preprocess_single_arch.h being included beforehand and
19 its output macros not being overridden yet. Additionally, it depends on
20 SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE being defined to appropriate
21 namespace for the architecture.
22
23 The file also depends on the definition of
24 SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_FOR_DISPATCH. If it is defined to 0, only
25 this_compile_arch() is available as
26 simdpp::SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE::this_compile_arch().
27 If the macro is defined to nonzero, then both this_compile_arch() and
28 create_fn_version() are available at
29 simdpp::SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE::this_compile_arch() and
30 simdpp::SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE::create_fn_version().
31
32 This separation is required because the current architecture and the
33 set of dispatched architectures (if enabled) are defined separately, and
34 it's hard to figure out whether there's any overlap. If it is, then we need
35 to make sure that all functions are defined only once in each namespace.
36*/
37namespace simdpp {
38namespace SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE {
39
40#if SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_FOR_DISPATCH
41namespace detail {
42#endif
43
44/** Returns the instruction set flags that will be required by the currently
45 compiled code
46*/
47static inline Arch this_compile_arch()
48{
49 Arch res = Arch::NONE_NULL;
50#if SIMDPP_ARCH_PP_USE_SSE2
51 res |= Arch::X86_SSE2;
52#endif
53#if SIMDPP_ARCH_PP_USE_SSE3
54 res |= Arch::X86_SSE3;
55#endif
56#if SIMDPP_ARCH_PP_USE_SSSE3
57 res |= Arch::X86_SSSE3;
58#endif
59#if SIMDPP_ARCH_PP_USE_SSE4_1
60 res |= Arch::X86_SSE4_1;
61#endif
62#if SIMDPP_ARCH_PP_USE_X86_POPCNT_INSN
63 res |= Arch::X86_POPCNT_INSN;
64#endif
65#if SIMDPP_ARCH_PP_USE_AVX
66 res |= Arch::X86_AVX;
67#endif
68#if SIMDPP_ARCH_PP_USE_AVX2
69 res |= Arch::X86_AVX2;
70#endif
71#if SIMDPP_ARCH_PP_USE_FMA3
72 res |= Arch::X86_FMA3;
73#endif
74#if SIMDPP_ARCH_PP_USE_FMA4
75 res |= Arch::X86_FMA4;
76#endif
77#if SIMDPP_ARCH_PP_USE_XOP
78 res |= Arch::X86_XOP;
79#endif
80#if SIMDPP_ARCH_PP_USE_AVX512F
81 res |= Arch::X86_AVX512F;
82#endif
83#if SIMDPP_ARCH_PP_USE_AVX512BW
84 res |= Arch::X86_AVX512BW;
85#endif
86#if SIMDPP_ARCH_PP_USE_AVX512DQ
87 res |= Arch::X86_AVX512DQ;
88#endif
89#if SIMDPP_ARCH_PP_USE_AVX512VL
90 res |= Arch::X86_AVX512VL;
91#endif
92#if SIMDPP_ARCH_PP_USE_NEON
93 res |= Arch::ARM_NEON;
94#endif
95#if SIMDPP_ARCH_PP_USE_NEON_FLT_SP
96 res |= Arch::ARM_NEON_FLT_SP;
97#endif
98#if SIMDPP_ARCH_PP_USE_ALTIVEC
99 res |= Arch::POWER_ALTIVEC;
100#endif
101#if SIMDPP_ARCH_PP_USE_VSX_206
102 res |= Arch::POWER_VSX_206;
103#endif
104#if SIMDPP_ARCH_PP_USE_VSX_207
105 res |= Arch::POWER_VSX_207;
106#endif
107#if SIMDPP_ARCH_PP_USE_MSA
108 res |= Arch::MIPS_MSA;
109#endif
110 return res;
111}
112
113#if SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_FOR_DISPATCH
114
115template<class FunPtr>
116static inline simdpp::detail::FnVersion create_fn_version(FunPtr fun_ptr)
117{
118 simdpp::detail::FnVersion res;
119 res.needed_arch = simdpp::SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE::detail::this_compile_arch();
120 res.fun_ptr = reinterpret_cast<simdpp::detail::VoidFunPtr>(fun_ptr);
121 res.arch_name = SIMDPP_PP_STRINGIZE(SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE);
122 return res;
123}
124
125} // namespace detail
126#endif
127
128} // namespace SIMDPP_ARCH_PP_THIS_COMPILE_ARCH_NAMESPACE
129} // namespace simdpp
130
131// this file may be included several times; thus no include guard present
132