1// Copyright 2009-2021 Intel Corporation
2// SPDX-License-Identifier: Apache-2.0
3
4#include "thread.h"
5#include "sysinfo.h"
6#include "string.h"
7
8#include <iostream>
9#if defined(__ARM_NEON)
10#include "../simd/arm/emulation.h"
11#else
12#include <xmmintrin.h>
13#if defined(__EMSCRIPTEN__)
14#include "../simd/wasm/emulation.h"
15#endif
16#endif
17
18#if defined(PTHREADS_WIN32)
19#pragma comment (lib, "pthreadVC.lib")
20#endif
21
22////////////////////////////////////////////////////////////////////////////////
23/// Windows Platform
24////////////////////////////////////////////////////////////////////////////////
25
26#if defined(__WIN32__)
27
28#define WIN32_LEAN_AND_MEAN
29#include <windows.h>
30
31namespace embree
32{
33 /*! set the affinity of a given thread */
34 void setAffinity(HANDLE thread, ssize_t affinity)
35 {
36 typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)();
37 typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD);
38 typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE, const GROUP_AFFINITY *, PGROUP_AFFINITY);
39 typedef BOOL (WINAPI *SetThreadIdealProcessorExFunc)(HANDLE, PPROCESSOR_NUMBER, PPROCESSOR_NUMBER);
40 HMODULE hlib = LoadLibrary("Kernel32");
41 GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount");
42 GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc)GetProcAddress(hlib, "GetActiveProcessorCount");
43 SetThreadGroupAffinityFunc pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hlib, "SetThreadGroupAffinity");
44 SetThreadIdealProcessorExFunc pSetThreadIdealProcessorEx = (SetThreadIdealProcessorExFunc)GetProcAddress(hlib, "SetThreadIdealProcessorEx");
45 if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx)
46 {
47 int groups = pGetActiveProcessorGroupCount();
48 int totalProcessors = 0, group = 0, number = 0;
49 for (int i = 0; i<groups; i++) {
50 int processors = pGetActiveProcessorCount(i);
51 if (totalProcessors + processors > affinity) {
52 group = i;
53 number = (int)affinity - totalProcessors;
54 break;
55 }
56 totalProcessors += processors;
57 }
58
59 GROUP_AFFINITY groupAffinity;
60 groupAffinity.Group = (WORD)group;
61 groupAffinity.Mask = (KAFFINITY)(uint64_t(1) << number);
62 groupAffinity.Reserved[0] = 0;
63 groupAffinity.Reserved[1] = 0;
64 groupAffinity.Reserved[2] = 0;
65 if (!pSetThreadGroupAffinity(thread, &groupAffinity, nullptr))
66 WARNING("SetThreadGroupAffinity failed"); // on purpose only a warning
67
68 PROCESSOR_NUMBER processorNumber;
69 processorNumber.Group = group;
70 processorNumber.Number = number;
71 processorNumber.Reserved = 0;
72 if (!pSetThreadIdealProcessorEx(thread, &processorNumber, nullptr))
73 WARNING("SetThreadIdealProcessorEx failed"); // on purpose only a warning
74 }
75 else
76 {
77 if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64_t(1) << affinity)))
78 WARNING("SetThreadAffinityMask failed"); // on purpose only a warning
79 if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1)
80 WARNING("SetThreadIdealProcessor failed"); // on purpose only a warning
81 }
82 }
83
84 /*! set affinity of the calling thread */
85 void setAffinity(ssize_t affinity) {
86 setAffinity(GetCurrentThread(), affinity);
87 }
88
89 struct ThreadStartupData
90 {
91 public:
92 ThreadStartupData (thread_func f, void* arg)
93 : f(f), arg(arg) {}
94 public:
95 thread_func f;
96 void* arg;
97 };
98
99 DWORD WINAPI threadStartup(LPVOID ptr)
100 {
101 ThreadStartupData* parg = (ThreadStartupData*) ptr;
102 _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));
103 parg->f(parg->arg);
104 delete parg;
105 return 0;
106 }
107
108#if !defined(PTHREADS_WIN32)
109
110 /*! creates a hardware thread running on specific core */
111 thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
112 {
113 HANDLE thread = CreateThread(nullptr, stack_size, threadStartup, new ThreadStartupData(f,arg), 0, nullptr);
114 if (thread == nullptr) FATAL("CreateThread failed");
115 if (threadID >= 0) setAffinity(thread, threadID);
116 return thread_t(thread);
117 }
118
119 /*! the thread calling this function gets yielded */
120 void yield() {
121 SwitchToThread();
122 }
123
124 /*! waits until the given thread has terminated */
125 void join(thread_t tid) {
126 WaitForSingleObject(HANDLE(tid), INFINITE);
127 CloseHandle(HANDLE(tid));
128 }
129
130 /*! destroy a hardware thread by its handle */
131 void destroyThread(thread_t tid) {
132 TerminateThread(HANDLE(tid),0);
133 CloseHandle(HANDLE(tid));
134 }
135
136 /*! creates thread local storage */
137 tls_t createTls() {
138 return tls_t(size_t(TlsAlloc()));
139 }
140
141 /*! set the thread local storage pointer */
142 void setTls(tls_t tls, void* const ptr) {
143 TlsSetValue(DWORD(size_t(tls)), ptr);
144 }
145
146 /*! return the thread local storage pointer */
147 void* getTls(tls_t tls) {
148 return TlsGetValue(DWORD(size_t(tls)));
149 }
150
151 /*! destroys thread local storage identifier */
152 void destroyTls(tls_t tls) {
153 TlsFree(DWORD(size_t(tls)));
154 }
155#endif
156}
157
158#endif
159
160////////////////////////////////////////////////////////////////////////////////
161/// Linux Platform
162////////////////////////////////////////////////////////////////////////////////
163
164#if defined(__LINUX__) && !defined(__ANDROID__)
165
166#include <fstream>
167#include <sstream>
168#include <algorithm>
169
170namespace embree
171{
172 static MutexSys mutex;
173 static std::vector<size_t> threadIDs;
174
175 /* changes thread ID mapping such that we first fill up all thread on one core */
176 size_t mapThreadID(size_t threadID)
177 {
178 Lock<MutexSys> lock(mutex);
179
180 if (threadIDs.size() == 0)
181 {
182 /* parse thread/CPU topology */
183 for (size_t cpuID=0;;cpuID++)
184 {
185 std::fstream fs;
186 std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string((long long)cpuID) + std::string("/topology/thread_siblings_list");
187 fs.open (cpu.c_str(), std::fstream::in);
188 if (fs.fail()) break;
189
190 int i;
191 while (fs >> i)
192 {
193 if (std::none_of(threadIDs.begin(),threadIDs.end(),[&] (int id) { return id == i; }))
194 threadIDs.push_back(i);
195 if (fs.peek() == ',')
196 fs.ignore();
197 }
198 fs.close();
199 }
200
201#if 0
202 for (size_t i=0;i<threadIDs.size();i++)
203 std::cout << i << " -> " << threadIDs[i] << std::endl;
204#endif
205
206 /* verify the mapping and do not use it if the mapping has errors */
207 for (size_t i=0;i<threadIDs.size();i++) {
208 for (size_t j=0;j<threadIDs.size();j++) {
209 if (i != j && threadIDs[i] == threadIDs[j]) {
210 threadIDs.clear();
211 }
212 }
213 }
214 }
215
216 /* re-map threadIDs if mapping is available */
217 size_t ID = threadID;
218 if (threadID < threadIDs.size())
219 ID = threadIDs[threadID];
220
221 /* find correct thread to affinitize to */
222 cpu_set_t set;
223 CPU_ZERO(&set);
224
225 if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0)
226 {
227 for (int i=0, j=0; i<CPU_SETSIZE; i++)
228 {
229 if (!CPU_ISSET(i,&set)) continue;
230
231 if (j == ID) {
232 ID = i;
233 break;
234 }
235 j++;
236 }
237 }
238
239 return ID;
240 }
241
242 /*! set affinity of the calling thread */
243 void setAffinity(ssize_t affinity)
244 {
245 cpu_set_t cset;
246 CPU_ZERO(&cset);
247 //size_t threadID = mapThreadID(affinity); // this is not working properly in LXC containers when some processors are disabled
248 size_t threadID = affinity;
249 CPU_SET(threadID, &cset);
250
251 pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset);
252 }
253}
254#endif
255
256////////////////////////////////////////////////////////////////////////////////
257/// Android Platform
258////////////////////////////////////////////////////////////////////////////////
259
260#if defined(__ANDROID__)
261
262namespace embree
263{
264 /*! set affinity of the calling thread */
265 void setAffinity(ssize_t affinity)
266 {
267 cpu_set_t cset;
268 CPU_ZERO(&cset);
269 CPU_SET(affinity, &cset);
270
271 sched_setaffinity(0, sizeof(cset), &cset);
272 }
273}
274#endif
275
276////////////////////////////////////////////////////////////////////////////////
277/// FreeBSD Platform
278////////////////////////////////////////////////////////////////////////////////
279
280#if defined(__FreeBSD__)
281
282#include <pthread_np.h>
283
284namespace embree
285{
286 /*! set affinity of the calling thread */
287 void setAffinity(ssize_t affinity)
288 {
289 cpuset_t cset;
290 CPU_ZERO(&cset);
291 CPU_SET(affinity, &cset);
292
293 pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset);
294 }
295}
296#endif
297
298////////////////////////////////////////////////////////////////////////////////
299/// WebAssembly Platform
300////////////////////////////////////////////////////////////////////////////////
301
302#if defined(__EMSCRIPTEN__)
303namespace embree
304{
305 /*! set affinity of the calling thread */
306 void setAffinity(ssize_t affinity)
307 {
308 // Setting thread affinity is not supported in WASM.
309 }
310}
311#endif
312
313////////////////////////////////////////////////////////////////////////////////
314/// MacOSX Platform
315////////////////////////////////////////////////////////////////////////////////
316
317#if defined(__MACOSX__)
318
319#include <mach/thread_act.h>
320#include <mach/thread_policy.h>
321#include <mach/mach_init.h>
322
323namespace embree
324{
325 /*! set affinity of the calling thread */
326 void setAffinity(ssize_t affinity)
327 {
328#if !defined(__ARM_NEON) // affinity seems not supported on M1 chip
329
330 thread_affinity_policy ap;
331 ap.affinity_tag = affinity;
332 if (thread_policy_set(mach_thread_self(),THREAD_AFFINITY_POLICY,(thread_policy_t)&ap,THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
333 WARNING("setting thread affinity failed"); // on purpose only a warning
334
335#endif
336 }
337}
338#endif
339
340////////////////////////////////////////////////////////////////////////////////
341/// Unix Platform
342////////////////////////////////////////////////////////////////////////////////
343
344#if defined(__UNIX__) || defined(PTHREADS_WIN32)
345
346#include <pthread.h>
347#include <sched.h>
348
349#if defined(__USE_NUMA__)
350#include <numa.h>
351#endif
352
353namespace embree
354{
355 struct ThreadStartupData
356 {
357 public:
358 ThreadStartupData (thread_func f, void* arg, int affinity)
359 : f(f), arg(arg), affinity(affinity) {}
360 public:
361 thread_func f;
362 void* arg;
363 ssize_t affinity;
364 };
365
366 static void* threadStartup(ThreadStartupData* parg)
367 {
368 _mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));
369
370 /*! Mac OS X does not support setting affinity at thread creation time */
371#if defined(__MACOSX__)
372 if (parg->affinity >= 0)
373 setAffinity(parg->affinity);
374#endif
375
376 parg->f(parg->arg);
377 delete parg;
378 return nullptr;
379 }
380
381 /*! creates a hardware thread running on specific core */
382 thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)
383 {
384 /* set stack size */
385 pthread_attr_t attr;
386 pthread_attr_init(&attr);
387 if (stack_size > 0) pthread_attr_setstacksize (&attr, stack_size);
388
389 /* create thread */
390 pthread_t* tid = new pthread_t;
391 if (pthread_create(tid,&attr,(void*(*)(void*))threadStartup,new ThreadStartupData(f,arg,threadID)) != 0) {
392 pthread_attr_destroy(&attr);
393 delete tid;
394 FATAL("pthread_create failed");
395 }
396 pthread_attr_destroy(&attr);
397
398 /* set affinity */
399#if defined(__LINUX__) && !defined(__ANDROID__)
400 if (threadID >= 0) {
401 cpu_set_t cset;
402 CPU_ZERO(&cset);
403 threadID = mapThreadID(threadID);
404 CPU_SET(threadID, &cset);
405 pthread_setaffinity_np(*tid, sizeof(cset), &cset);
406 }
407#elif defined(__FreeBSD__)
408 if (threadID >= 0) {
409 cpuset_t cset;
410 CPU_ZERO(&cset);
411 CPU_SET(threadID, &cset);
412 pthread_setaffinity_np(*tid, sizeof(cset), &cset);
413 }
414#elif defined(__ANDROID__)
415 if (threadID >= 0) {
416 cpu_set_t cset;
417 CPU_ZERO(&cset);
418 CPU_SET(threadID, &cset);
419 sched_setaffinity(pthread_gettid_np(*tid), sizeof(cset), &cset);
420 }
421#endif
422
423 return thread_t(tid);
424 }
425
426 /*! the thread calling this function gets yielded */
427 void yield() {
428 sched_yield();
429 }
430
431 /*! waits until the given thread has terminated */
432 void join(thread_t tid) {
433 if (pthread_join(*(pthread_t*)tid, nullptr) != 0)
434 FATAL("pthread_join failed");
435 delete (pthread_t*)tid;
436 }
437
438 /*! destroy a hardware thread by its handle */
439 void destroyThread(thread_t tid) {
440#if defined(__ANDROID__)
441 FATAL("Can't destroy threads on Android."); // pthread_cancel not implemented.
442#else
443 pthread_cancel(*(pthread_t*)tid);
444 delete (pthread_t*)tid;
445#endif
446 }
447
448 /*! creates thread local storage */
449 tls_t createTls()
450 {
451 pthread_key_t* key = new pthread_key_t;
452 if (pthread_key_create(key,nullptr) != 0) {
453 delete key;
454 FATAL("pthread_key_create failed");
455 }
456
457 return tls_t(key);
458 }
459
460 /*! return the thread local storage pointer */
461 void* getTls(tls_t tls)
462 {
463 assert(tls);
464 return pthread_getspecific(*(pthread_key_t*)tls);
465 }
466
467 /*! set the thread local storage pointer */
468 void setTls(tls_t tls, void* const ptr)
469 {
470 assert(tls);
471 if (pthread_setspecific(*(pthread_key_t*)tls, ptr) != 0)
472 FATAL("pthread_setspecific failed");
473 }
474
475 /*! destroys thread local storage identifier */
476 void destroyTls(tls_t tls)
477 {
478 assert(tls);
479 if (pthread_key_delete(*(pthread_key_t*)tls) != 0)
480 FATAL("pthread_key_delete failed");
481 delete (pthread_key_t*)tls;
482 }
483}
484
485#endif
486