1 | // ======================================================================== // |
2 | // Copyright 2009-2019 Intel Corporation // |
3 | // // |
4 | // Licensed under the Apache License, Version 2.0 (the "License"); // |
5 | // you may not use this file except in compliance with the License. // |
6 | // You may obtain a copy of the License at // |
7 | // // |
8 | // http://www.apache.org/licenses/LICENSE-2.0 // |
9 | // // |
10 | // Unless required by applicable law or agreed to in writing, software // |
11 | // distributed under the License is distributed on an "AS IS" BASIS, // |
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // |
13 | // See the License for the specific language governing permissions and // |
14 | // limitations under the License. // |
15 | // ======================================================================== // |
16 | |
17 | #if defined(_MSC_VER) |
18 | #pragma warning (disable : 4146) // unary minus operator applied to unsigned type, result still unsigned |
19 | #endif |
20 | |
21 | #if defined(__APPLE__) |
22 | #include <mach/thread_act.h> |
23 | #include <mach/mach_init.h> |
24 | #endif |
25 | |
26 | #include "thread.h" |
27 | #include <fstream> |
28 | |
29 | namespace oidn { |
30 | |
31 | #if defined(_WIN32) |
32 | |
33 | // -------------------------------------------------------------------------- |
34 | // ThreadAffinity - Windows |
35 | // -------------------------------------------------------------------------- |
36 | |
37 | ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose) |
38 | : Verbose(verbose) |
39 | { |
40 | HMODULE hLib = GetModuleHandle(TEXT("kernel32" )); |
41 | pGetLogicalProcessorInformationEx = (GetLogicalProcessorInformationExFunc)GetProcAddress(hLib, "GetLogicalProcessorInformationEx" ); |
42 | pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hLib, "SetThreadGroupAffinity" ); |
43 | |
44 | if (pGetLogicalProcessorInformationEx && pSetThreadGroupAffinity) |
45 | { |
46 | // Get logical processor information |
47 | PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer = nullptr; |
48 | DWORD bufferSize = 0; |
49 | |
50 | // First call the function with an empty buffer to get the required buffer size |
51 | BOOL result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize); |
52 | if (result || GetLastError() != ERROR_INSUFFICIENT_BUFFER) |
53 | { |
54 | OIDN_WARNING("GetLogicalProcessorInformationEx failed" ); |
55 | return; |
56 | } |
57 | |
58 | // Allocate the buffer |
59 | buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(bufferSize); |
60 | if (!buffer) |
61 | { |
62 | OIDN_WARNING("SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX allocation failed" ); |
63 | return; |
64 | } |
65 | |
66 | // Call again the function but now with the properly sized buffer |
67 | result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize); |
68 | if (!result) |
69 | { |
70 | OIDN_WARNING("GetLogicalProcessorInformationEx failed" ); |
71 | free(buffer); |
72 | return; |
73 | } |
74 | |
75 | // Iterate over the logical processor information structures |
76 | // There should be one structure for each physical core |
77 | char* ptr = (char*)buffer; |
78 | while (ptr < (char*)buffer + bufferSize) |
79 | { |
80 | PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX item = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)ptr; |
81 | if (item->Relationship == RelationProcessorCore && item->Processor.GroupCount > 0) |
82 | { |
83 | // Iterate over the groups |
84 | int numThreads = 0; |
85 | for (int group = 0; (group < item->Processor.GroupCount) && (numThreads < numThreadsPerCore); ++group) |
86 | { |
87 | GROUP_AFFINITY coreAffinity = item->Processor.GroupMask[group]; |
88 | while ((coreAffinity.Mask != 0) && (numThreads < numThreadsPerCore)) |
89 | { |
90 | // Extract the next set bit/thread from the mask |
91 | GROUP_AFFINITY threadAffinity = coreAffinity; |
92 | threadAffinity.Mask = threadAffinity.Mask & -threadAffinity.Mask; |
93 | |
94 | // Push the affinity for this thread |
95 | affinities.push_back(threadAffinity); |
96 | oldAffinities.push_back(threadAffinity); |
97 | numThreads++; |
98 | |
99 | // Remove this bit/thread from the mask |
100 | coreAffinity.Mask ^= threadAffinity.Mask; |
101 | } |
102 | } |
103 | } |
104 | |
105 | // Next structure |
106 | ptr += item->Size; |
107 | } |
108 | |
109 | // Free the buffer |
110 | free(buffer); |
111 | } |
112 | } |
113 | |
114 | void ThreadAffinity::set(int threadIndex) |
115 | { |
116 | if (threadIndex >= (int)affinities.size()) |
117 | return; |
118 | |
119 | // Save the current affinity and set the new one |
120 | const HANDLE thread = GetCurrentThread(); |
121 | if (!pSetThreadGroupAffinity(thread, &affinities[threadIndex], &oldAffinities[threadIndex])) |
122 | OIDN_WARNING("SetThreadGroupAffinity failed" ); |
123 | } |
124 | |
125 | void ThreadAffinity::restore(int threadIndex) |
126 | { |
127 | if (threadIndex >= (int)affinities.size()) |
128 | return; |
129 | |
130 | // Restore the original affinity |
131 | const HANDLE thread = GetCurrentThread(); |
132 | if (!pSetThreadGroupAffinity(thread, &oldAffinities[threadIndex], nullptr)) |
133 | OIDN_WARNING("SetThreadGroupAffinity failed" ); |
134 | } |
135 | |
136 | #elif defined(__linux__) |
137 | |
138 | // -------------------------------------------------------------------------- |
139 | // ThreadAffinity - Linux |
140 | // -------------------------------------------------------------------------- |
141 | |
142 | ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose) |
143 | : Verbose(verbose) |
144 | { |
145 | std::vector<int> threadIds; |
146 | |
147 | // Parse the thread/CPU topology |
148 | for (int cpuId = 0; ; cpuId++) |
149 | { |
150 | std::fstream fs; |
151 | std::string cpu = std::string("/sys/devices/system/cpu/cpu" ) + std::to_string(cpuId) + std::string("/topology/thread_siblings_list" ); |
152 | fs.open(cpu.c_str(), std::fstream::in); |
153 | if (fs.fail()) break; |
154 | |
155 | int i; |
156 | int j = 0; |
157 | while ((j < numThreadsPerCore) && (fs >> i)) |
158 | { |
159 | if (std::none_of(threadIds.begin(), threadIds.end(), [&](int id) { return id == i; })) |
160 | threadIds.push_back(i); |
161 | |
162 | if (fs.peek() == ',') |
163 | fs.ignore(); |
164 | j++; |
165 | } |
166 | |
167 | fs.close(); |
168 | } |
169 | |
170 | #if 0 |
171 | for (size_t i = 0; i < thread_ids.size(); ++i) |
172 | std::cout << "thread " << i << " -> " << thread_ids[i] << std::endl; |
173 | #endif |
174 | |
175 | // Create the affinity structures |
176 | affinities.resize(threadIds.size()); |
177 | oldAffinities.resize(threadIds.size()); |
178 | |
179 | for (size_t i = 0; i < threadIds.size(); ++i) |
180 | { |
181 | cpu_set_t affinity; |
182 | CPU_ZERO(&affinity); |
183 | CPU_SET(threadIds[i], &affinity); |
184 | |
185 | affinities[i] = affinity; |
186 | oldAffinities[i] = affinity; |
187 | } |
188 | } |
189 | |
190 | void ThreadAffinity::set(int threadIndex) |
191 | { |
192 | if (threadIndex >= (int)affinities.size()) |
193 | return; |
194 | |
195 | const pthread_t thread = pthread_self(); |
196 | |
197 | // Save the current affinity |
198 | if (pthread_getaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0) |
199 | { |
200 | OIDN_WARNING("pthread_getaffinity_np failed" ); |
201 | oldAffinities[threadIndex] = affinities[threadIndex]; |
202 | return; |
203 | } |
204 | |
205 | // Set the new affinity |
206 | if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &affinities[threadIndex]) != 0) |
207 | OIDN_WARNING("pthread_setaffinity_np failed" ); |
208 | } |
209 | |
210 | void ThreadAffinity::restore(int threadIndex) |
211 | { |
212 | if (threadIndex >= (int)affinities.size()) |
213 | return; |
214 | |
215 | const pthread_t thread = pthread_self(); |
216 | |
217 | // Restore the original affinity |
218 | if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0) |
219 | OIDN_WARNING("pthread_setaffinity_np failed" ); |
220 | } |
221 | |
222 | #elif defined(__APPLE__) |
223 | |
224 | // -------------------------------------------------------------------------- |
225 | // ThreadAffinity - macOS |
226 | // -------------------------------------------------------------------------- |
227 | |
228 | ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose) |
229 | : Verbose(verbose) |
230 | { |
231 | // Query the thread/CPU topology |
232 | int numPhysicalCpus; |
233 | int numLogicalCpus; |
234 | |
235 | if (!getSysctl("hw.physicalcpu" , numPhysicalCpus) || !getSysctl("hw.logicalcpu" , numLogicalCpus)) |
236 | { |
237 | OIDN_WARNING("sysctlbyname failed" ); |
238 | return; |
239 | } |
240 | |
241 | if ((numLogicalCpus % numPhysicalCpus != 0) && (numThreadsPerCore > 1)) |
242 | return; // this shouldn't happen |
243 | const int maxThreadsPerCore = numLogicalCpus / numPhysicalCpus; |
244 | |
245 | // Create the affinity structures |
246 | // macOS doesn't support binding a thread to a specific core, but we can at least group threads which |
247 | // should be on the same core together |
248 | for (int core = 1; core <= numPhysicalCpus; ++core) // tags start from 1! |
249 | { |
250 | thread_affinity_policy affinity; |
251 | affinity.affinity_tag = core; |
252 | |
253 | for (int thread = 0; thread < min(numThreadsPerCore, maxThreadsPerCore); ++thread) |
254 | { |
255 | affinities.push_back(affinity); |
256 | oldAffinities.push_back(affinity); |
257 | } |
258 | } |
259 | } |
260 | |
261 | void ThreadAffinity::set(int threadIndex) |
262 | { |
263 | if (threadIndex >= (int)affinities.size()) |
264 | return; |
265 | |
266 | const auto thread = mach_thread_self(); |
267 | |
268 | // Save the current affinity |
269 | mach_msg_type_number_t policyCount = THREAD_AFFINITY_POLICY_COUNT; |
270 | boolean_t getDefault = FALSE; |
271 | if (thread_policy_get(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], &policyCount, &getDefault) != KERN_SUCCESS) |
272 | { |
273 | OIDN_WARNING("thread_policy_get failed" ); |
274 | oldAffinities[threadIndex] = affinities[threadIndex]; |
275 | return; |
276 | } |
277 | |
278 | // Set the new affinity |
279 | if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&affinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS) |
280 | OIDN_WARNING("thread_policy_set failed" ); |
281 | } |
282 | |
283 | void ThreadAffinity::restore(int threadIndex) |
284 | { |
285 | if (threadIndex >= (int)affinities.size()) |
286 | return; |
287 | |
288 | const auto thread = mach_thread_self(); |
289 | |
290 | // Restore the original affinity |
291 | if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS) |
292 | OIDN_WARNING("thread_policy_set failed" ); |
293 | } |
294 | |
295 | #endif |
296 | |
297 | } // namespace oidn |
298 | |