thread.cpp source code [Godot/thirdparty/oidn/common/thread.cpp]

1	// ======================================================================== //
2	// Copyright 2009-2019 Intel Corporation //
3	// //
4	// Licensed under the Apache License, Version 2.0 (the "License"); //
5	// you may not use this file except in compliance with the License. //
6	// You may obtain a copy of the License at //
7	// //
8	// http://www.apache.org/licenses/LICENSE-2.0 //
9	// //
10	// Unless required by applicable law or agreed to in writing, software //
11	// distributed under the License is distributed on an "AS IS" BASIS, //
12	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
13	// See the License for the specific language governing permissions and //
14	// limitations under the License. //
15	// ======================================================================== //
16
17	#if defined(_MSC_VER)
18	#pragma warning (disable : 4146) // unary minus operator applied to unsigned type, result still unsigned
19	#endif
20
21	#if defined(__APPLE__)
22	#include <mach/thread_act.h>
23	#include <mach/mach_init.h>
24	#endif
25
26	#include "thread.h"
27	#include <fstream>
28
29	namespace oidn {
30
31	#if defined(_WIN32)
32
33	// --------------------------------------------------------------------------
34	// ThreadAffinity - Windows
35	// --------------------------------------------------------------------------
36
37	ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
38	: Verbose(verbose)
39	{
40	HMODULE hLib = GetModuleHandle(TEXT("kernel32"));
41	pGetLogicalProcessorInformationEx = (GetLogicalProcessorInformationExFunc)GetProcAddress(hLib, "GetLogicalProcessorInformationEx");
42	pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hLib, "SetThreadGroupAffinity");
43
44	if (pGetLogicalProcessorInformationEx && pSetThreadGroupAffinity)
45	{
46	// Get logical processor information
47	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer = nullptr;
48	DWORD bufferSize = `0`;
49
50	// First call the function with an empty buffer to get the required buffer size
51	BOOL result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize);
52	if (result \|\| GetLastError() != ERROR_INSUFFICIENT_BUFFER)
53	{
54	OIDN_WARNING("GetLogicalProcessorInformationEx failed");
55	return;
56	}
57
58	// Allocate the buffer
59	buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(bufferSize);
60	if (!buffer)
61	{
62	OIDN_WARNING("SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX allocation failed");
63	return;
64	}
65
66	// Call again the function but now with the properly sized buffer
67	result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize);
68	if (!result)
69	{
70	OIDN_WARNING("GetLogicalProcessorInformationEx failed");
71	free(buffer);
72	return;
73	}
74
75	// Iterate over the logical processor information structures
76	// There should be one structure for each physical core
77	char* ptr = (char*)buffer;
78	while (ptr < (char*)buffer + bufferSize)
79	{
80	PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX item = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)ptr;
81	if (item->Relationship == RelationProcessorCore && item->Processor.GroupCount > `0`)
82	{
83	// Iterate over the groups
84	int numThreads = `0`;
85	for (int group = `0`; (group < item->Processor.GroupCount) && (numThreads < numThreadsPerCore); ++group)
86	{
87	GROUP_AFFINITY coreAffinity = item->Processor.GroupMask[group];
88	while ((coreAffinity.Mask != `0`) && (numThreads < numThreadsPerCore))
89	{
90	// Extract the next set bit/thread from the mask
91	GROUP_AFFINITY threadAffinity = coreAffinity;
92	threadAffinity.Mask = threadAffinity.Mask & -threadAffinity.Mask;
93
94	// Push the affinity for this thread
95	affinities.push_back(threadAffinity);
96	oldAffinities.push_back(threadAffinity);
97	numThreads++;
98
99	// Remove this bit/thread from the mask
100	coreAffinity.Mask ^= threadAffinity.Mask;
101	}
102	}
103	}
104
105	// Next structure
106	ptr += item->Size;
107	}
108
109	// Free the buffer
110	free(buffer);
111	}
112	}
113
114	void ThreadAffinity::set(int threadIndex)
115	{
116	if (threadIndex >= (int)affinities.size())
117	return;
118
119	// Save the current affinity and set the new one
120	const HANDLE thread = GetCurrentThread();
121	if (!pSetThreadGroupAffinity(thread, &affinities[threadIndex], &oldAffinities[threadIndex]))
122	OIDN_WARNING("SetThreadGroupAffinity failed");
123	}
124
125	void ThreadAffinity::restore(int threadIndex)
126	{
127	if (threadIndex >= (int)affinities.size())
128	return;
129
130	// Restore the original affinity
131	const HANDLE thread = GetCurrentThread();
132	if (!pSetThreadGroupAffinity(thread, &oldAffinities[threadIndex], nullptr))
133	OIDN_WARNING("SetThreadGroupAffinity failed");
134	}
135
136	#elif defined(__linux__)
137
138	// --------------------------------------------------------------------------
139	// ThreadAffinity - Linux
140	// --------------------------------------------------------------------------
141
142	ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
143	: Verbose (verbose)
144	{
145	std::vector<int> threadIds;
146
147	// Parse the thread/CPU topology
148	for (int cpuId = `0`; ; cpuId++)
149	{
150	std::fstream fs;
151	std::string cpu = std::string ("/sys/devices/system/cpu/cpu") + std::to_string(cpuId) + std::string ("/topology/thread_siblings_list");
152	fs.open(cpu.c_str(), std::fstream::in);
153	if (fs.fail()) break;
154
155	int i;
156	int j = `0`;
157	while ((j < numThreadsPerCore) && (fs >> i))
158	{
159	if (std::none_of(threadIds.begin(), threadIds.end(), [&](int id) { return id == i; }))
160	threadIds.push_back(i);
161
162	if (fs.peek() == `','`)
163	fs.ignore();
164	j++;
165	}
166
167	fs.close();
168	}
169
170	#if 0
171	for (size_t i = `0`; i < thread_ids.size(); ++i)
172	std::cout << "thread " << i << " -> " << thread_ids[i] << std::endl;
173	#endif
174
175	// Create the affinity structures
176	affinities.resize(threadIds.size());
177	oldAffinities.resize(threadIds.size());
178
179	for (size_t i = `0`; i < threadIds.size(); ++i)
180	{
181	cpu_set_t affinity;
182	CPU_ZERO(&affinity);
183	CPU_SET(threadIds[i], &affinity);
184
185	affinities [i] = affinity;
186	oldAffinities [i] = affinity;
187	}
188	}
189
190	void ThreadAffinity::set(int threadIndex)
191	{
192	if (threadIndex >= (int)affinities.size())
193	return;
194
195	const pthread_t thread = pthread_self();
196
197	// Save the current affinity
198	if (pthread_getaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities [threadIndex]) != `0`)
199	{
200	OIDN_WARNING("pthread_getaffinity_np failed");
201	oldAffinities [threadIndex] = affinities [threadIndex];
202	return;
203	}
204
205	// Set the new affinity
206	if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &affinities [threadIndex]) != `0`)
207	OIDN_WARNING("pthread_setaffinity_np failed");
208	}
209
210	void ThreadAffinity::restore(int threadIndex)
211	{
212	if (threadIndex >= (int)affinities.size())
213	return;
214
215	const pthread_t thread = pthread_self();
216
217	// Restore the original affinity
218	if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities [threadIndex]) != `0`)
219	OIDN_WARNING("pthread_setaffinity_np failed");
220	}
221
222	#elif defined(__APPLE__)
223
224	// --------------------------------------------------------------------------
225	// ThreadAffinity - macOS
226	// --------------------------------------------------------------------------
227
228	ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose)
229	: Verbose(verbose)
230	{
231	// Query the thread/CPU topology
232	int numPhysicalCpus;
233	int numLogicalCpus;
234
235	if (!getSysctl("hw.physicalcpu", numPhysicalCpus) \|\| !getSysctl("hw.logicalcpu", numLogicalCpus))
236	{
237	OIDN_WARNING("sysctlbyname failed");
238	return;
239	}
240
241	if ((numLogicalCpus % numPhysicalCpus != `0`) && (numThreadsPerCore > `1`))
242	return; // this shouldn't happen
243	const int maxThreadsPerCore = numLogicalCpus / numPhysicalCpus;
244
245	// Create the affinity structures
246	// macOS doesn't support binding a thread to a specific core, but we can at least group threads which
247	// should be on the same core together
248	for (int core = `1`; core <= numPhysicalCpus; ++core) // tags start from 1!
249	{
250	thread_affinity_policy affinity;
251	affinity.affinity_tag = core;
252
253	for (int thread = `0`; thread < min(numThreadsPerCore, maxThreadsPerCore); ++thread)
254	{
255	affinities.push_back(affinity);
256	oldAffinities.push_back(affinity);
257	}
258	}
259	}
260
261	void ThreadAffinity::set(int threadIndex)
262	{
263	if (threadIndex >= (int)affinities.size())
264	return;
265
266	const auto thread = mach_thread_self();
267
268	// Save the current affinity
269	mach_msg_type_number_t policyCount = THREAD_AFFINITY_POLICY_COUNT;
270	boolean_t getDefault = FALSE;
271	if (thread_policy_get(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], &policyCount, &getDefault) != KERN_SUCCESS)
272	{
273	OIDN_WARNING("thread_policy_get failed");
274	oldAffinities[threadIndex] = affinities[threadIndex];
275	return;
276	}
277
278	// Set the new affinity
279	if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&affinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
280	OIDN_WARNING("thread_policy_set failed");
281	}
282
283	void ThreadAffinity::restore(int threadIndex)
284	{
285	if (threadIndex >= (int)affinities.size())
286	return;
287
288	const auto thread = mach_thread_self();
289
290	// Restore the original affinity
291	if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)
292	OIDN_WARNING("thread_policy_set failed");
293	}
294
295	#endif
296
297	} // namespace oidn
298

Browse the source code of Godot/thirdparty/oidn/common/thread.cpp