1// ======================================================================== //
2// Copyright 2009-2019 Intel Corporation //
3// //
4// Licensed under the Apache License, Version 2.0 (the "License"); //
5// you may not use this file except in compliance with the License. //
6// You may obtain a copy of the License at //
7// //
8// http://www.apache.org/licenses/LICENSE-2.0 //
9// //
10// Unless required by applicable law or agreed to in writing, software //
11// distributed under the License is distributed on an "AS IS" BASIS, //
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
13// See the License for the specific language governing permissions and //
14// limitations under the License. //
15// ======================================================================== //
16
17#if defined(_MSC_VER)
18 #pragma warning (disable : 4146) // unary minus operator applied to unsigned type, result still unsigned
19#endif
20
21#if defined(__APPLE__)
22 #include <mach/thread_act.h>
23 #include <mach/mach_init.h>
24#endif
25
26#include "thread.h"
27#include <fstream>
28
29namespace oidn {
30
31#if defined(_WIN32)
32
33 // --------------------------------------------------------------------------
34 // ThreadAffinity - Windows
35 // --------------------------------------------------------------------------
36
37 ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
38 : Verbose(verbose)
39 {
40 HMODULE hLib = GetModuleHandle(TEXT("kernel32"));
41 pGetLogicalProcessorInformationEx = (GetLogicalProcessorInformationExFunc)GetProcAddress(hLib, "GetLogicalProcessorInformationEx");
42 pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hLib, "SetThreadGroupAffinity");
43
44 if (pGetLogicalProcessorInformationEx && pSetThreadGroupAffinity)
45 {
46 // Get logical processor information
47 PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer = nullptr;
48 DWORD bufferSize = 0;
49
50 // First call the function with an empty buffer to get the required buffer size
51 BOOL result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize);
52 if (result || GetLastError() != ERROR_INSUFFICIENT_BUFFER)
53 {
54 OIDN_WARNING("GetLogicalProcessorInformationEx failed");
55 return;
56 }
57
58 // Allocate the buffer
59 buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(bufferSize);
60 if (!buffer)
61 {
62 OIDN_WARNING("SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX allocation failed");
63 return;
64 }
65
66 // Call again the function but now with the properly sized buffer
67 result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize);
68 if (!result)
69 {
70 OIDN_WARNING("GetLogicalProcessorInformationEx failed");
71 free(buffer);
72 return;
73 }
74
75 // Iterate over the logical processor information structures
76 // There should be one structure for each physical core
77 char* ptr = (char*)buffer;
78 while (ptr < (char*)buffer + bufferSize)
79 {
80 PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX item = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)ptr;
81 if (item->Relationship == RelationProcessorCore && item->Processor.GroupCount > 0)
82 {
83 // Iterate over the groups
84 int numThreads = 0;
85 for (int group = 0; (group < item->Processor.GroupCount) && (numThreads < numThreadsPerCore); ++group)
86 {
87 GROUP_AFFINITY coreAffinity = item->Processor.GroupMask[group];
88 while ((coreAffinity.Mask != 0) && (numThreads < numThreadsPerCore))
89 {
90 // Extract the next set bit/thread from the mask
91 GROUP_AFFINITY threadAffinity = coreAffinity;
92 threadAffinity.Mask = threadAffinity.Mask & -threadAffinity.Mask;
93
94 // Push the affinity for this thread
95 affinities.push_back(threadAffinity);
96 oldAffinities.push_back(threadAffinity);
97 numThreads++;
98
99 // Remove this bit/thread from the mask
100 coreAffinity.Mask ^= threadAffinity.Mask;
101 }
102 }
103 }
104
105 // Next structure
106 ptr += item->Size;
107 }
108
109 // Free the buffer
110 free(buffer);
111 }
112 }
113
114 void ThreadAffinity::set(int threadIndex)
115 {
116 if (threadIndex >= (int)affinities.size())
117 return;
118
119 // Save the current affinity and set the new one
120 const HANDLE thread = GetCurrentThread();
121 if (!pSetThreadGroupAffinity(thread, &affinities[threadIndex], &oldAffinities[threadIndex]))
122 OIDN_WARNING("SetThreadGroupAffinity failed");
123 }
124
125 void ThreadAffinity::restore(int threadIndex)
126 {
127 if (threadIndex >= (int)affinities.size())
128 return;
129
130 // Restore the original affinity
131 const HANDLE thread = GetCurrentThread();
132 if (!pSetThreadGroupAffinity(thread, &oldAffinities[threadIndex], nullptr))
133 OIDN_WARNING("SetThreadGroupAffinity failed");
134 }
135
136#elif defined(__linux__)
137
138 // --------------------------------------------------------------------------
139 // ThreadAffinity - Linux
140 // --------------------------------------------------------------------------
141
142 ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
143 : Verbose(verbose)
144 {
145 std::vector<int> threadIds;
146
147 // Parse the thread/CPU topology
148 for (int cpuId = 0; ; cpuId++)
149 {
150 std::fstream fs;
151 std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string(cpuId) + std::string("/topology/thread_siblings_list");
152 fs.open(cpu.c_str(), std::fstream::in);
153 if (fs.fail()) break;
154
155 int i;
156 int j = 0;
157 while ((j < numThreadsPerCore) && (fs >> i))
158 {
159 if (std::none_of(threadIds.begin(), threadIds.end(), [&](int id) { return id == i; }))
160 threadIds.push_back(i);
161
162 if (fs.peek() == ',')
163 fs.ignore();
164 j++;
165 }
166
167 fs.close();
168 }
169
170 #if 0
171 for (size_t i = 0; i < thread_ids.size(); ++i)
172 std::cout << "thread " << i << " -> " << thread_ids[i] << std::endl;
173 #endif
174
175 // Create the affinity structures
176 affinities.resize(threadIds.size());
177 oldAffinities.resize(threadIds.size());
178
179 for (size_t i = 0; i < threadIds.size(); ++i)
180 {
181 cpu_set_t affinity;
182 CPU_ZERO(&affinity);
183 CPU_SET(threadIds[i], &affinity);
184
185 affinities[i] = affinity;
186 oldAffinities[i] = affinity;
187 }
188 }
189
190 void ThreadAffinity::set(int threadIndex)
191 {
192 if (threadIndex >= (int)affinities.size())
193 return;
194
195 const pthread_t thread = pthread_self();
196
197 // Save the current affinity
198 if (pthread_getaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0)
199 {
200 OIDN_WARNING("pthread_getaffinity_np failed");
201 oldAffinities[threadIndex] = affinities[threadIndex];
202 return;
203 }
204
205 // Set the new affinity
206 if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &affinities[threadIndex]) != 0)
207 OIDN_WARNING("pthread_setaffinity_np failed");
208 }
209
210 void ThreadAffinity::restore(int threadIndex)
211 {
212 if (threadIndex >= (int)affinities.size())
213 return;
214
215 const pthread_t thread = pthread_self();
216
217 // Restore the original affinity
218 if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0)
219 OIDN_WARNING("pthread_setaffinity_np failed");
220 }
221
222#elif defined(__APPLE__)
223
224 // --------------------------------------------------------------------------
225 // ThreadAffinity - macOS
226 // --------------------------------------------------------------------------
227
228 ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
229 : Verbose(verbose)
230 {
231 // Query the thread/CPU topology
232 int numPhysicalCpus;
233 int numLogicalCpus;
234
235 if (!getSysctl("hw.physicalcpu", numPhysicalCpus) || !getSysctl("hw.logicalcpu", numLogicalCpus))
236 {
237 OIDN_WARNING("sysctlbyname failed");
238 return;
239 }
240
241 if ((numLogicalCpus % numPhysicalCpus != 0) && (numThreadsPerCore > 1))
242 return; // this shouldn't happen
243 const int maxThreadsPerCore = numLogicalCpus / numPhysicalCpus;
244
245 // Create the affinity structures
246 // macOS doesn't support binding a thread to a specific core, but we can at least group threads which
247 // should be on the same core together
248 for (int core = 1; core <= numPhysicalCpus; ++core) // tags start from 1!
249 {
250 thread_affinity_policy affinity;
251 affinity.affinity_tag = core;
252
253 for (int thread = 0; thread < min(numThreadsPerCore, maxThreadsPerCore); ++thread)
254 {
255 affinities.push_back(affinity);
256 oldAffinities.push_back(affinity);
257 }
258 }
259 }
260
261 void ThreadAffinity::set(int threadIndex)
262 {
263 if (threadIndex >= (int)affinities.size())
264 return;
265
266 const auto thread = mach_thread_self();
267
268 // Save the current affinity
269 mach_msg_type_number_t policyCount = THREAD_AFFINITY_POLICY_COUNT;
270 boolean_t getDefault = FALSE;
271 if (thread_policy_get(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], &policyCount, &getDefault) != KERN_SUCCESS)
272 {
273 OIDN_WARNING("thread_policy_get failed");
274 oldAffinities[threadIndex] = affinities[threadIndex];
275 return;
276 }
277
278 // Set the new affinity
279 if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&affinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
280 OIDN_WARNING("thread_policy_set failed");
281 }
282
283 void ThreadAffinity::restore(int threadIndex)
284 {
285 if (threadIndex >= (int)affinities.size())
286 return;
287
288 const auto thread = mach_thread_self();
289
290 // Restore the original affinity
291 if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
292 OIDN_WARNING("thread_policy_set failed");
293 }
294
295#endif
296
297} // namespace oidn
298