1/***************************************************************************
2 * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and *
3 * Martin Renou *
4 * Copyright (c) QuantStack *
5 * Copyright (c) Serge Guelton *
6 * *
7 * Distributed under the terms of the BSD 3-Clause License. *
8 * *
9 * The full license is in the file LICENSE, distributed with this software. *
10 ****************************************************************************/
11
12#ifndef XSIMD_CPUID_HPP
13#define XSIMD_CPUID_HPP
14
15#include <algorithm>
16#include <cstring>
17
18#if defined(__linux__) && (defined(__ARM_NEON) || defined(_M_ARM))
19#include <asm/hwcap.h>
20#include <sys/auxv.h>
21#endif
22
23#if defined(_MSC_VER)
24// Contains the definition of __cpuidex
25#include <intrin.h>
26#endif
27
28#include "../types/xsimd_all_registers.hpp"
29
30namespace xsimd
31{
32 namespace detail
33 {
34 struct supported_arch
35 {
36 unsigned sse2 : 1;
37 unsigned sse3 : 1;
38 unsigned ssse3 : 1;
39 unsigned sse4_1 : 1;
40 unsigned sse4_2 : 1;
41 unsigned sse4a : 1;
42 unsigned fma3_sse : 1;
43 unsigned fma4 : 1;
44 unsigned xop : 1;
45 unsigned avx : 1;
46 unsigned fma3_avx : 1;
47 unsigned avx2 : 1;
48 unsigned fma3_avx2 : 1;
49 unsigned avx512f : 1;
50 unsigned avx512cd : 1;
51 unsigned avx512dq : 1;
52 unsigned avx512bw : 1;
53 unsigned neon : 1;
54 unsigned neon64 : 1;
55
56 // version number of the best arch available
57 unsigned best;
58
59 supported_arch() noexcept
60 {
61 memset(s: this, c: 0, n: sizeof(supported_arch));
62
63#if defined(__aarch64__) || defined(_M_ARM64)
64 neon = 1;
65 neon64 = 1;
66 best = neon64::version();
67#elif defined(__ARM_NEON) || defined(_M_ARM)
68#if defined(__linux__)
69 neon = bool(getauxval(AT_HWCAP) & HWCAP_NEON);
70#else
71 // that's very conservative :-/
72 neon = 0;
73#endif
74 neon64 = 0;
75 best = neon::version() * neon;
76
77#elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86)
78 auto get_cpuid = [](int reg[4], int func_id) noexcept
79 {
80
81#if defined(_MSC_VER)
82 __cpuidex(reg, func_id, 0);
83
84#elif defined(__INTEL_COMPILER)
85 __cpuid(reg, func_id);
86
87#elif defined(__GNUC__) || defined(__clang__)
88
89#if defined(__i386__) && defined(__PIC__)
90 // %ebx may be the PIC register
91 __asm__("xchg{l}\t{%%}ebx, %1\n\t"
92 "cpuid\n\t"
93 "xchg{l}\t{%%}ebx, %1\n\t"
94 : "=a"(reg[0]), "=r"(reg[1]), "=c"(reg[2]),
95 "=d"(reg[3])
96 : "a"(func_id), "c"(0));
97
98#else
99 __asm__("cpuid\n\t"
100 : "=a"(reg[0]), "=b"(reg[1]), "=c"(reg[2]),
101 "=d"(reg[3])
102 : "a"(func_id), "c"(0));
103#endif
104
105#else
106#error "Unsupported configuration"
107#endif
108 };
109
110 int regs[4];
111
112 get_cpuid(regs, 0x1);
113
114 sse2 = regs[3] >> 26 & 1;
115 best = std::max(best, sse2::version() * sse2);
116
117 sse3 = regs[2] >> 0 & 1;
118 best = std::max(best, sse3::version() * sse3);
119
120 ssse3 = regs[2] >> 9 & 1;
121 best = std::max(best, ssse3::version() * ssse3);
122
123 sse4_1 = regs[2] >> 19 & 1;
124 best = std::max(best, sse4_1::version() * sse4_1);
125
126 sse4_2 = regs[2] >> 20 & 1;
127 best = std::max(best, sse4_2::version() * sse4_2);
128
129 fma3_sse = regs[2] >> 12 & 1;
130 if (sse4_2)
131 best = std::max(best, fma3<xsimd::sse4_2>::version() * fma3_sse);
132
133 get_cpuid(regs, 0x80000001);
134 fma4 = regs[2] >> 16 & 1;
135 best = std::max(best, fma4::version() * fma4);
136
137 // sse4a = regs[2] >> 6 & 1;
138 // best = std::max(best, XSIMD_X86_AMD_SSE4A_VERSION * sse4a);
139
140 // xop = regs[2] >> 11 & 1;
141 // best = std::max(best, XSIMD_X86_AMD_XOP_VERSION * xop);
142
143 avx = regs[2] >> 28 & 1;
144 best = std::max(best, avx::version() * avx);
145
146 fma3_avx = avx && fma3_sse;
147 best = std::max(best, fma3<xsimd::avx>::version() * fma3_avx);
148
149 get_cpuid(regs, 0x7);
150 avx2 = regs[1] >> 5 & 1;
151 best = std::max(best, avx2::version() * avx2);
152
153 fma3_avx2 = avx2 && fma3_sse;
154 best = std::max(best, fma3<xsimd::avx2>::version() * fma3_avx2);
155
156 avx512f = regs[1] >> 16 & 1;
157 best = std::max(best, avx512f::version() * avx512f);
158
159 avx512cd = regs[1] >> 28 & 1;
160 best = std::max(best, avx512cd::version() * avx512cd * avx512f);
161
162 avx512dq = regs[1] >> 17 & 1;
163 best = std::max(best, avx512dq::version() * avx512dq * avx512cd * avx512f);
164
165 avx512bw = regs[1] >> 30 & 1;
166 best = std::max(best, avx512bw::version() * avx512bw * avx512dq * avx512cd * avx512f);
167
168#endif
169 }
170 };
171 }
172
173 inline detail::supported_arch available_architectures() noexcept
174 {
175 static detail::supported_arch supported;
176 return supported;
177 }
178}
179
180#endif
181