1/*
2 * Copyright (c) 2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#ifndef CPUID_INLINE_H_
30#define CPUID_INLINE_H_
31
32#include "ue2common.h"
33#include "cpuid_flags.h"
34
35#if !defined(_WIN32) && !defined(CPUID_H_)
36#include <cpuid.h>
37/* system header doesn't have a header guard */
38#define CPUID_H_
39#endif
40
41#ifdef __cplusplus
42extern "C"
43{
44#endif
45
46static inline
47void cpuid(unsigned int op, unsigned int leaf, unsigned int *eax,
48 unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
49#ifndef _WIN32
50 __cpuid_count(op, leaf, *eax, *ebx, *ecx, *edx);
51#else
52 int a[4];
53 __cpuidex(a, op, leaf);
54 *eax = a[0];
55 *ebx = a[1];
56 *ecx = a[2];
57 *edx = a[3];
58#endif
59}
60
61// ECX
62#define CPUID_SSE3 (1 << 0)
63#define CPUID_SSSE3 (1 << 9)
64#define CPUID_SSE4_1 (1 << 19)
65#define CPUID_SSE4_2 (1 << 20)
66#define CPUID_POPCNT (1 << 23)
67#define CPUID_XSAVE (1 << 27)
68#define CPUID_AVX (1 << 28)
69
70// EDX
71#define CPUID_FXSAVE (1 << 24)
72#define CPUID_SSE (1 << 25)
73#define CPUID_SSE2 (1 << 26)
74#define CPUID_HTT (1 << 28)
75
76// Structured Extended Feature Flags Enumeration Leaf ECX values
77#define CPUID_BMI (1 << 3)
78#define CPUID_AVX2 (1 << 5)
79#define CPUID_BMI2 (1 << 8)
80
81// Structured Extended Feature Flags Enumeration Leaf EBX values
82#define CPUID_AVX512F (1 << 16)
83#define CPUID_AVX512BW (1 << 30)
84
85// Extended Control Register 0 (XCR0) values
86#define CPUID_XCR0_SSE (1 << 1)
87#define CPUID_XCR0_AVX (1 << 2)
88#define CPUID_XCR0_OPMASK (1 << 5) // k-regs
89#define CPUID_XCR0_ZMM_Hi256 (1 << 6) // upper 256 bits of ZMM0-ZMM15
90#define CPUID_XCR0_Hi16_ZMM (1 << 7) // ZMM16-ZMM31
91
92#define CPUID_XCR0_AVX512 \
93 (CPUID_XCR0_OPMASK | CPUID_XCR0_ZMM_Hi256 | CPUID_XCR0_Hi16_ZMM)
94
95static inline
96u64a xgetbv(u32 op) {
97#if defined(_WIN32) || defined(__INTEL_COMPILER)
98 return _xgetbv(op);
99#else
100 u32 a, d;
101 __asm__ volatile (
102 "xgetbv\n"
103 : "=a"(a),
104 "=d"(d)
105 : "c"(op));
106 return ((u64a)d << 32) + a;
107#endif
108}
109
110static inline
111int check_avx2(void) {
112#if defined(__INTEL_COMPILER)
113 return _may_i_use_cpu_feature(_FEATURE_AVX2);
114#else
115 unsigned int eax, ebx, ecx, edx;
116
117 cpuid(1, 0, &eax, &ebx, &ecx, &edx);
118
119 /* check AVX is supported and XGETBV is enabled by OS */
120 if ((ecx & (CPUID_AVX | CPUID_XSAVE)) != (CPUID_AVX | CPUID_XSAVE)) {
121 DEBUG_PRINTF("AVX and XSAVE not supported\n");
122 return 0;
123 }
124
125 /* check that SSE and AVX registers are enabled by OS */
126 u64a xcr0 = xgetbv(0);
127 if ((xcr0 & (CPUID_XCR0_SSE | CPUID_XCR0_AVX)) !=
128 (CPUID_XCR0_SSE | CPUID_XCR0_AVX)) {
129 DEBUG_PRINTF("SSE and AVX registers not enabled\n");
130 return 0;
131 }
132
133 /* ECX and EDX contain capability flags */
134 ecx = 0;
135 cpuid(7, 0, &eax, &ebx, &ecx, &edx);
136
137 if (ebx & CPUID_AVX2) {
138 DEBUG_PRINTF("AVX2 enabled\n");
139 return 1;
140 }
141
142 return 0;
143#endif
144}
145
146static inline
147int check_avx512(void) {
148 /*
149 * For our purposes, having avx512 really means "can we use AVX512BW?"
150 */
151#if defined(__INTEL_COMPILER)
152 return _may_i_use_cpu_feature(_FEATURE_AVX512BW | _FEATURE_AVX512VL);
153#else
154 unsigned int eax, ebx, ecx, edx;
155
156 cpuid(1, 0, &eax, &ebx, &ecx, &edx);
157
158 /* check XSAVE is enabled by OS */
159 if (!(ecx & CPUID_XSAVE)) {
160 DEBUG_PRINTF("AVX and XSAVE not supported\n");
161 return 0;
162 }
163
164 /* check that AVX 512 registers are enabled by OS */
165 u64a xcr0 = xgetbv(0);
166 if ((xcr0 & CPUID_XCR0_AVX512) != CPUID_XCR0_AVX512) {
167 DEBUG_PRINTF("AVX512 registers not enabled\n");
168 return 0;
169 }
170
171 /* ECX and EDX contain capability flags */
172 ecx = 0;
173 cpuid(7, 0, &eax, &ebx, &ecx, &edx);
174
175 if (!(ebx & CPUID_AVX512F)) {
176 DEBUG_PRINTF("AVX512F (AVX512 Foundation) instructions not enabled\n");
177 return 0;
178 }
179
180 if (ebx & CPUID_AVX512BW) {
181 DEBUG_PRINTF("AVX512BW instructions enabled\n");
182 return 1;
183 }
184
185 return 0;
186#endif
187}
188
189static inline
190int check_ssse3(void) {
191 unsigned int eax, ebx, ecx, edx;
192 cpuid(1, 0, &eax, &ebx, &ecx, &edx);
193 return !!(ecx & CPUID_SSSE3);
194}
195
196static inline
197int check_sse42(void) {
198 unsigned int eax, ebx, ecx, edx;
199 cpuid(1, 0, &eax, &ebx, &ecx, &edx);
200 return !!(ecx & CPUID_SSE4_2);
201}
202
203static inline
204int check_popcnt(void) {
205 unsigned int eax, ebx, ecx, edx;
206 cpuid(1, 0, &eax, &ebx, &ecx, &edx);
207 return !!(ecx & CPUID_POPCNT);
208}
209
210#ifdef __cplusplus
211} /* extern "C" */
212#endif
213
214#endif /* CPUID_INLINE_H_ */
215