1// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "CPUID.hpp"
16
17#if defined(_WIN32)
18 #ifndef WIN32_LEAN_AND_MEAN
19 #define WIN32_LEAN_AND_MEAN
20 #endif
21 #include <windows.h>
22 #include <intrin.h>
23 #include <float.h>
24#else
25 #include <unistd.h>
26 #include <sched.h>
27 #include <sys/types.h>
28#endif
29
30namespace sw
31{
32 bool CPUID::MMX = detectMMX();
33 bool CPUID::CMOV = detectCMOV();
34 bool CPUID::SSE = detectSSE();
35 bool CPUID::SSE2 = detectSSE2();
36 bool CPUID::SSE3 = detectSSE3();
37 bool CPUID::SSSE3 = detectSSSE3();
38 bool CPUID::SSE4_1 = detectSSE4_1();
39 int CPUID::cores = detectCoreCount();
40 int CPUID::affinity = detectAffinity();
41
42 bool CPUID::enableMMX = true;
43 bool CPUID::enableCMOV = true;
44 bool CPUID::enableSSE = true;
45 bool CPUID::enableSSE2 = true;
46 bool CPUID::enableSSE3 = true;
47 bool CPUID::enableSSSE3 = true;
48 bool CPUID::enableSSE4_1 = true;
49
50 void CPUID::setEnableMMX(bool enable)
51 {
52 enableMMX = enable;
53
54 if(!enableMMX)
55 {
56 enableSSE = false;
57 enableSSE2 = false;
58 enableSSE3 = false;
59 enableSSSE3 = false;
60 enableSSE4_1 = false;
61 }
62 }
63
64 void CPUID::setEnableCMOV(bool enable)
65 {
66 enableCMOV = enable;
67
68 if(!CMOV)
69 {
70 enableSSE = false;
71 enableSSE2 = false;
72 enableSSE3 = false;
73 enableSSSE3 = false;
74 enableSSE4_1 = false;
75 }
76 }
77
78 void CPUID::setEnableSSE(bool enable)
79 {
80 enableSSE = enable;
81
82 if(enableSSE)
83 {
84 enableMMX = true;
85 enableCMOV = true;
86 }
87 else
88 {
89 enableSSE2 = false;
90 enableSSE3 = false;
91 enableSSSE3 = false;
92 enableSSE4_1 = false;
93 }
94 }
95
96 void CPUID::setEnableSSE2(bool enable)
97 {
98 enableSSE2 = enable;
99
100 if(enableSSE2)
101 {
102 enableMMX = true;
103 enableCMOV = true;
104 enableSSE = true;
105 }
106 else
107 {
108 enableSSE3 = false;
109 enableSSSE3 = false;
110 enableSSE4_1 = false;
111 }
112 }
113
114 void CPUID::setEnableSSE3(bool enable)
115 {
116 enableSSE3 = enable;
117
118 if(enableSSE3)
119 {
120 enableMMX = true;
121 enableCMOV = true;
122 enableSSE = true;
123 enableSSE2 = true;
124 }
125 else
126 {
127 enableSSSE3 = false;
128 enableSSE4_1 = false;
129 }
130 }
131
132 void CPUID::setEnableSSSE3(bool enable)
133 {
134 enableSSSE3 = enable;
135
136 if(enableSSSE3)
137 {
138 enableMMX = true;
139 enableCMOV = true;
140 enableSSE = true;
141 enableSSE2 = true;
142 enableSSE3 = true;
143 }
144 else
145 {
146 enableSSE4_1 = false;
147 }
148 }
149
150 void CPUID::setEnableSSE4_1(bool enable)
151 {
152 enableSSE4_1 = enable;
153
154 if(enableSSE4_1)
155 {
156 enableMMX = true;
157 enableCMOV = true;
158 enableSSE = true;
159 enableSSE2 = true;
160 enableSSE3 = true;
161 enableSSSE3 = true;
162 }
163 }
164
165 static void cpuid(int registers[4], int info)
166 {
167 #if defined(__i386__) || defined(__x86_64__)
168 #if defined(_WIN32)
169 __cpuid(registers, info);
170 #else
171 __asm volatile("cpuid": "=a" (registers[0]), "=b" (registers[1]), "=c" (registers[2]), "=d" (registers[3]): "a" (info));
172 #endif
173 #else
174 registers[0] = 0;
175 registers[1] = 0;
176 registers[2] = 0;
177 registers[3] = 0;
178 #endif
179 }
180
181 bool CPUID::detectMMX()
182 {
183 int registers[4];
184 cpuid(registers, 1);
185 return MMX = (registers[3] & 0x00800000) != 0;
186 }
187
188 bool CPUID::detectCMOV()
189 {
190 int registers[4];
191 cpuid(registers, 1);
192 return CMOV = (registers[3] & 0x00008000) != 0;
193 }
194
195 bool CPUID::detectSSE()
196 {
197 int registers[4];
198 cpuid(registers, 1);
199 return SSE = (registers[3] & 0x02000000) != 0;
200 }
201
202 bool CPUID::detectSSE2()
203 {
204 int registers[4];
205 cpuid(registers, 1);
206 return SSE2 = (registers[3] & 0x04000000) != 0;
207 }
208
209 bool CPUID::detectSSE3()
210 {
211 int registers[4];
212 cpuid(registers, 1);
213 return SSE3 = (registers[2] & 0x00000001) != 0;
214 }
215
216 bool CPUID::detectSSSE3()
217 {
218 int registers[4];
219 cpuid(registers, 1);
220 return SSSE3 = (registers[2] & 0x00000200) != 0;
221 }
222
223 bool CPUID::detectSSE4_1()
224 {
225 int registers[4];
226 cpuid(registers, 1);
227 return SSE4_1 = (registers[2] & 0x00080000) != 0;
228 }
229
230 int CPUID::detectCoreCount()
231 {
232 int cores = 0;
233
234 #if defined(_WIN32)
235 DWORD_PTR processAffinityMask = 1;
236 DWORD_PTR systemAffinityMask = 1;
237
238 GetProcessAffinityMask(GetCurrentProcess(), &processAffinityMask, &systemAffinityMask);
239
240 while(systemAffinityMask)
241 {
242 if(systemAffinityMask & 1)
243 {
244 cores++;
245 }
246
247 systemAffinityMask >>= 1;
248 }
249 #else
250 cores = sysconf(_SC_NPROCESSORS_ONLN);
251 #endif
252
253 if(cores < 1) cores = 1;
254 if(cores > 16) cores = 16;
255
256 return cores; // FIXME: Number of physical cores
257 }
258
259 int CPUID::detectAffinity()
260 {
261 int cores = 0;
262
263 #if defined(_WIN32)
264 DWORD_PTR processAffinityMask = 1;
265 DWORD_PTR systemAffinityMask = 1;
266
267 GetProcessAffinityMask(GetCurrentProcess(), &processAffinityMask, &systemAffinityMask);
268
269 while(processAffinityMask)
270 {
271 if(processAffinityMask & 1)
272 {
273 cores++;
274 }
275
276 processAffinityMask >>= 1;
277 }
278 #else
279 return detectCoreCount(); // FIXME: Assumes no affinity limitation
280 #endif
281
282 if(cores < 1) cores = 1;
283 if(cores > 16) cores = 16;
284
285 return cores;
286 }
287
288 void CPUID::setFlushToZero(bool enable)
289 {
290 #if defined(_MSC_VER)
291 _controlfp(enable ? _DN_FLUSH : _DN_SAVE, _MCW_DN);
292 #else
293 // Unimplemented
294 #endif
295 }
296
297 void CPUID::setDenormalsAreZero(bool enable)
298 {
299 // Unimplemented
300 }
301}
302