1 | // Copyright 2009-2021 Intel Corporation |
2 | // SPDX-License-Identifier: Apache-2.0 |
3 | |
4 | #include "thread.h" |
5 | #include "sysinfo.h" |
6 | #include "string.h" |
7 | |
8 | #include <iostream> |
9 | #if defined(__ARM_NEON) |
10 | #include "../simd/arm/emulation.h" |
11 | #else |
12 | #include <xmmintrin.h> |
13 | #if defined(__EMSCRIPTEN__) |
14 | #include "../simd/wasm/emulation.h" |
15 | #endif |
16 | #endif |
17 | |
18 | #if defined(PTHREADS_WIN32) |
19 | #pragma comment (lib, "pthreadVC.lib") |
20 | #endif |
21 | |
22 | //////////////////////////////////////////////////////////////////////////////// |
23 | /// Windows Platform |
24 | //////////////////////////////////////////////////////////////////////////////// |
25 | |
26 | #if defined(__WIN32__) |
27 | |
28 | #define WIN32_LEAN_AND_MEAN |
29 | #include <windows.h> |
30 | |
31 | namespace embree |
32 | { |
33 | /*! set the affinity of a given thread */ |
34 | void setAffinity(HANDLE thread, ssize_t affinity) |
35 | { |
36 | typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)(); |
37 | typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD); |
38 | typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE, const GROUP_AFFINITY *, PGROUP_AFFINITY); |
39 | typedef BOOL (WINAPI *SetThreadIdealProcessorExFunc)(HANDLE, PPROCESSOR_NUMBER, PPROCESSOR_NUMBER); |
40 | HMODULE hlib = LoadLibrary("Kernel32" ); |
41 | GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount" ); |
42 | GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc)GetProcAddress(hlib, "GetActiveProcessorCount" ); |
43 | SetThreadGroupAffinityFunc pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hlib, "SetThreadGroupAffinity" ); |
44 | SetThreadIdealProcessorExFunc pSetThreadIdealProcessorEx = (SetThreadIdealProcessorExFunc)GetProcAddress(hlib, "SetThreadIdealProcessorEx" ); |
45 | if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx) |
46 | { |
47 | int groups = pGetActiveProcessorGroupCount(); |
48 | int totalProcessors = 0, group = 0, number = 0; |
49 | for (int i = 0; i<groups; i++) { |
50 | int processors = pGetActiveProcessorCount(i); |
51 | if (totalProcessors + processors > affinity) { |
52 | group = i; |
53 | number = (int)affinity - totalProcessors; |
54 | break; |
55 | } |
56 | totalProcessors += processors; |
57 | } |
58 | |
59 | GROUP_AFFINITY groupAffinity; |
60 | groupAffinity.Group = (WORD)group; |
61 | groupAffinity.Mask = (KAFFINITY)(uint64_t(1) << number); |
62 | groupAffinity.Reserved[0] = 0; |
63 | groupAffinity.Reserved[1] = 0; |
64 | groupAffinity.Reserved[2] = 0; |
65 | if (!pSetThreadGroupAffinity(thread, &groupAffinity, nullptr)) |
66 | WARNING("SetThreadGroupAffinity failed" ); // on purpose only a warning |
67 | |
68 | PROCESSOR_NUMBER processorNumber; |
69 | processorNumber.Group = group; |
70 | processorNumber.Number = number; |
71 | processorNumber.Reserved = 0; |
72 | if (!pSetThreadIdealProcessorEx(thread, &processorNumber, nullptr)) |
73 | WARNING("SetThreadIdealProcessorEx failed" ); // on purpose only a warning |
74 | } |
75 | else |
76 | { |
77 | if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64_t(1) << affinity))) |
78 | WARNING("SetThreadAffinityMask failed" ); // on purpose only a warning |
79 | if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1) |
80 | WARNING("SetThreadIdealProcessor failed" ); // on purpose only a warning |
81 | } |
82 | } |
83 | |
84 | /*! set affinity of the calling thread */ |
85 | void setAffinity(ssize_t affinity) { |
86 | setAffinity(GetCurrentThread(), affinity); |
87 | } |
88 | |
89 | struct ThreadStartupData |
90 | { |
91 | public: |
92 | ThreadStartupData (thread_func f, void* arg) |
93 | : f(f), arg(arg) {} |
94 | public: |
95 | thread_func f; |
96 | void* arg; |
97 | }; |
98 | |
99 | DWORD WINAPI threadStartup(LPVOID ptr) |
100 | { |
101 | ThreadStartupData* parg = (ThreadStartupData*) ptr; |
102 | _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6)); |
103 | parg->f(parg->arg); |
104 | delete parg; |
105 | return 0; |
106 | } |
107 | |
108 | #if !defined(PTHREADS_WIN32) |
109 | |
110 | /*! creates a hardware thread running on specific core */ |
111 | thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID) |
112 | { |
113 | HANDLE thread = CreateThread(nullptr, stack_size, threadStartup, new ThreadStartupData(f,arg), 0, nullptr); |
114 | if (thread == nullptr) FATAL("CreateThread failed" ); |
115 | if (threadID >= 0) setAffinity(thread, threadID); |
116 | return thread_t(thread); |
117 | } |
118 | |
119 | /*! the thread calling this function gets yielded */ |
120 | void yield() { |
121 | SwitchToThread(); |
122 | } |
123 | |
124 | /*! waits until the given thread has terminated */ |
125 | void join(thread_t tid) { |
126 | WaitForSingleObject(HANDLE(tid), INFINITE); |
127 | CloseHandle(HANDLE(tid)); |
128 | } |
129 | |
130 | /*! destroy a hardware thread by its handle */ |
131 | void destroyThread(thread_t tid) { |
132 | TerminateThread(HANDLE(tid),0); |
133 | CloseHandle(HANDLE(tid)); |
134 | } |
135 | |
136 | /*! creates thread local storage */ |
137 | tls_t createTls() { |
138 | return tls_t(size_t(TlsAlloc())); |
139 | } |
140 | |
141 | /*! set the thread local storage pointer */ |
142 | void setTls(tls_t tls, void* const ptr) { |
143 | TlsSetValue(DWORD(size_t(tls)), ptr); |
144 | } |
145 | |
146 | /*! return the thread local storage pointer */ |
147 | void* getTls(tls_t tls) { |
148 | return TlsGetValue(DWORD(size_t(tls))); |
149 | } |
150 | |
151 | /*! destroys thread local storage identifier */ |
152 | void destroyTls(tls_t tls) { |
153 | TlsFree(DWORD(size_t(tls))); |
154 | } |
155 | #endif |
156 | } |
157 | |
158 | #endif |
159 | |
160 | //////////////////////////////////////////////////////////////////////////////// |
161 | /// Linux Platform |
162 | //////////////////////////////////////////////////////////////////////////////// |
163 | |
164 | #if defined(__LINUX__) && !defined(__ANDROID__) |
165 | |
166 | #include <fstream> |
167 | #include <sstream> |
168 | #include <algorithm> |
169 | |
170 | namespace embree |
171 | { |
172 | static MutexSys mutex; |
173 | static std::vector<size_t> threadIDs; |
174 | |
175 | /* changes thread ID mapping such that we first fill up all thread on one core */ |
176 | size_t mapThreadID(size_t threadID) |
177 | { |
178 | Lock<MutexSys> lock(mutex); |
179 | |
180 | if (threadIDs.size() == 0) |
181 | { |
182 | /* parse thread/CPU topology */ |
183 | for (size_t cpuID=0;;cpuID++) |
184 | { |
185 | std::fstream fs; |
186 | std::string cpu = std::string("/sys/devices/system/cpu/cpu" ) + std::to_string((long long)cpuID) + std::string("/topology/thread_siblings_list" ); |
187 | fs.open (cpu.c_str(), std::fstream::in); |
188 | if (fs.fail()) break; |
189 | |
190 | int i; |
191 | while (fs >> i) |
192 | { |
193 | if (std::none_of(threadIDs.begin(),threadIDs.end(),[&] (int id) { return id == i; })) |
194 | threadIDs.push_back(i); |
195 | if (fs.peek() == ',') |
196 | fs.ignore(); |
197 | } |
198 | fs.close(); |
199 | } |
200 | |
201 | #if 0 |
202 | for (size_t i=0;i<threadIDs.size();i++) |
203 | std::cout << i << " -> " << threadIDs[i] << std::endl; |
204 | #endif |
205 | |
206 | /* verify the mapping and do not use it if the mapping has errors */ |
207 | for (size_t i=0;i<threadIDs.size();i++) { |
208 | for (size_t j=0;j<threadIDs.size();j++) { |
209 | if (i != j && threadIDs[i] == threadIDs[j]) { |
210 | threadIDs.clear(); |
211 | } |
212 | } |
213 | } |
214 | } |
215 | |
216 | /* re-map threadIDs if mapping is available */ |
217 | size_t ID = threadID; |
218 | if (threadID < threadIDs.size()) |
219 | ID = threadIDs[threadID]; |
220 | |
221 | /* find correct thread to affinitize to */ |
222 | cpu_set_t set; |
223 | CPU_ZERO(&set); |
224 | |
225 | if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0) |
226 | { |
227 | for (int i=0, j=0; i<CPU_SETSIZE; i++) |
228 | { |
229 | if (!CPU_ISSET(i,&set)) continue; |
230 | |
231 | if (j == ID) { |
232 | ID = i; |
233 | break; |
234 | } |
235 | j++; |
236 | } |
237 | } |
238 | |
239 | return ID; |
240 | } |
241 | |
242 | /*! set affinity of the calling thread */ |
243 | void setAffinity(ssize_t affinity) |
244 | { |
245 | cpu_set_t cset; |
246 | CPU_ZERO(&cset); |
247 | //size_t threadID = mapThreadID(affinity); // this is not working properly in LXC containers when some processors are disabled |
248 | size_t threadID = affinity; |
249 | CPU_SET(threadID, &cset); |
250 | |
251 | pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset); |
252 | } |
253 | } |
254 | #endif |
255 | |
256 | //////////////////////////////////////////////////////////////////////////////// |
257 | /// Android Platform |
258 | //////////////////////////////////////////////////////////////////////////////// |
259 | |
260 | #if defined(__ANDROID__) |
261 | |
262 | namespace embree |
263 | { |
264 | /*! set affinity of the calling thread */ |
265 | void setAffinity(ssize_t affinity) |
266 | { |
267 | cpu_set_t cset; |
268 | CPU_ZERO(&cset); |
269 | CPU_SET(affinity, &cset); |
270 | |
271 | sched_setaffinity(0, sizeof(cset), &cset); |
272 | } |
273 | } |
274 | #endif |
275 | |
276 | //////////////////////////////////////////////////////////////////////////////// |
277 | /// FreeBSD Platform |
278 | //////////////////////////////////////////////////////////////////////////////// |
279 | |
280 | #if defined(__FreeBSD__) |
281 | |
282 | #include <pthread_np.h> |
283 | |
284 | namespace embree |
285 | { |
286 | /*! set affinity of the calling thread */ |
287 | void setAffinity(ssize_t affinity) |
288 | { |
289 | cpuset_t cset; |
290 | CPU_ZERO(&cset); |
291 | CPU_SET(affinity, &cset); |
292 | |
293 | pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset); |
294 | } |
295 | } |
296 | #endif |
297 | |
298 | //////////////////////////////////////////////////////////////////////////////// |
299 | /// WebAssembly Platform |
300 | //////////////////////////////////////////////////////////////////////////////// |
301 | |
302 | #if defined(__EMSCRIPTEN__) |
303 | namespace embree |
304 | { |
305 | /*! set affinity of the calling thread */ |
306 | void setAffinity(ssize_t affinity) |
307 | { |
308 | // Setting thread affinity is not supported in WASM. |
309 | } |
310 | } |
311 | #endif |
312 | |
313 | //////////////////////////////////////////////////////////////////////////////// |
314 | /// MacOSX Platform |
315 | //////////////////////////////////////////////////////////////////////////////// |
316 | |
317 | #if defined(__MACOSX__) |
318 | |
319 | #include <mach/thread_act.h> |
320 | #include <mach/thread_policy.h> |
321 | #include <mach/mach_init.h> |
322 | |
323 | namespace embree |
324 | { |
325 | /*! set affinity of the calling thread */ |
326 | void setAffinity(ssize_t affinity) |
327 | { |
328 | #if !defined(__ARM_NEON) // affinity seems not supported on M1 chip |
329 | |
330 | thread_affinity_policy ap; |
331 | ap.affinity_tag = affinity; |
332 | if (thread_policy_set(mach_thread_self(),THREAD_AFFINITY_POLICY,(thread_policy_t)&ap,THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS) |
333 | WARNING("setting thread affinity failed" ); // on purpose only a warning |
334 | |
335 | #endif |
336 | } |
337 | } |
338 | #endif |
339 | |
340 | //////////////////////////////////////////////////////////////////////////////// |
341 | /// Unix Platform |
342 | //////////////////////////////////////////////////////////////////////////////// |
343 | |
344 | #if defined(__UNIX__) || defined(PTHREADS_WIN32) |
345 | |
346 | #include <pthread.h> |
347 | #include <sched.h> |
348 | |
349 | #if defined(__USE_NUMA__) |
350 | #include <numa.h> |
351 | #endif |
352 | |
353 | namespace embree |
354 | { |
355 | struct ThreadStartupData |
356 | { |
357 | public: |
358 | ThreadStartupData (thread_func f, void* arg, int affinity) |
359 | : f(f), arg(arg), affinity(affinity) {} |
360 | public: |
361 | thread_func f; |
362 | void* arg; |
363 | ssize_t affinity; |
364 | }; |
365 | |
366 | static void* threadStartup(ThreadStartupData* parg) |
367 | { |
368 | _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6)); |
369 | |
370 | /*! Mac OS X does not support setting affinity at thread creation time */ |
371 | #if defined(__MACOSX__) |
372 | if (parg->affinity >= 0) |
373 | setAffinity(parg->affinity); |
374 | #endif |
375 | |
376 | parg->f(parg->arg); |
377 | delete parg; |
378 | return nullptr; |
379 | } |
380 | |
381 | /*! creates a hardware thread running on specific core */ |
382 | thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID) |
383 | { |
384 | /* set stack size */ |
385 | pthread_attr_t attr; |
386 | pthread_attr_init(&attr); |
387 | if (stack_size > 0) pthread_attr_setstacksize (&attr, stack_size); |
388 | |
389 | /* create thread */ |
390 | pthread_t* tid = new pthread_t; |
391 | if (pthread_create(tid,&attr,(void*(*)(void*))threadStartup,new ThreadStartupData(f,arg,threadID)) != 0) { |
392 | pthread_attr_destroy(&attr); |
393 | delete tid; |
394 | FATAL("pthread_create failed" ); |
395 | } |
396 | pthread_attr_destroy(&attr); |
397 | |
398 | /* set affinity */ |
399 | #if defined(__LINUX__) && !defined(__ANDROID__) |
400 | if (threadID >= 0) { |
401 | cpu_set_t cset; |
402 | CPU_ZERO(&cset); |
403 | threadID = mapThreadID(threadID); |
404 | CPU_SET(threadID, &cset); |
405 | pthread_setaffinity_np(*tid, sizeof(cset), &cset); |
406 | } |
407 | #elif defined(__FreeBSD__) |
408 | if (threadID >= 0) { |
409 | cpuset_t cset; |
410 | CPU_ZERO(&cset); |
411 | CPU_SET(threadID, &cset); |
412 | pthread_setaffinity_np(*tid, sizeof(cset), &cset); |
413 | } |
414 | #elif defined(__ANDROID__) |
415 | if (threadID >= 0) { |
416 | cpu_set_t cset; |
417 | CPU_ZERO(&cset); |
418 | CPU_SET(threadID, &cset); |
419 | sched_setaffinity(pthread_gettid_np(*tid), sizeof(cset), &cset); |
420 | } |
421 | #endif |
422 | |
423 | return thread_t(tid); |
424 | } |
425 | |
426 | /*! the thread calling this function gets yielded */ |
427 | void yield() { |
428 | sched_yield(); |
429 | } |
430 | |
431 | /*! waits until the given thread has terminated */ |
432 | void join(thread_t tid) { |
433 | if (pthread_join(*(pthread_t*)tid, nullptr) != 0) |
434 | FATAL("pthread_join failed" ); |
435 | delete (pthread_t*)tid; |
436 | } |
437 | |
438 | /*! destroy a hardware thread by its handle */ |
439 | void destroyThread(thread_t tid) { |
440 | #if defined(__ANDROID__) |
441 | FATAL("Can't destroy threads on Android." ); // pthread_cancel not implemented. |
442 | #else |
443 | pthread_cancel(*(pthread_t*)tid); |
444 | delete (pthread_t*)tid; |
445 | #endif |
446 | } |
447 | |
448 | /*! creates thread local storage */ |
449 | tls_t createTls() |
450 | { |
451 | pthread_key_t* key = new pthread_key_t; |
452 | if (pthread_key_create(key,nullptr) != 0) { |
453 | delete key; |
454 | FATAL("pthread_key_create failed" ); |
455 | } |
456 | |
457 | return tls_t(key); |
458 | } |
459 | |
460 | /*! return the thread local storage pointer */ |
461 | void* getTls(tls_t tls) |
462 | { |
463 | assert(tls); |
464 | return pthread_getspecific(*(pthread_key_t*)tls); |
465 | } |
466 | |
467 | /*! set the thread local storage pointer */ |
468 | void setTls(tls_t tls, void* const ptr) |
469 | { |
470 | assert(tls); |
471 | if (pthread_setspecific(*(pthread_key_t*)tls, ptr) != 0) |
472 | FATAL("pthread_setspecific failed" ); |
473 | } |
474 | |
475 | /*! destroys thread local storage identifier */ |
476 | void destroyTls(tls_t tls) |
477 | { |
478 | assert(tls); |
479 | if (pthread_key_delete(*(pthread_key_t*)tls) != 0) |
480 | FATAL("pthread_key_delete failed" ); |
481 | delete (pthread_key_t*)tls; |
482 | } |
483 | } |
484 | |
485 | #endif |
486 | |