numa.cpp source code [CoreCLR/pal/src/numa/numa.cpp]

1	// Licensed to the .NET Foundation under one or more agreements.
2	// The .NET Foundation licenses this file to you under the MIT license.
3	// See the LICENSE file in the project root for more information.
4
5	/++*
6
7
8
9	Module Name:
10
11	numa.cpp
12
13	Abstract:
14
15	Implementation of NUMA related APIs
16
17	--/*
18
19	#include "pal/dbgmsg.h"
20	SET_DEFAULT_DEBUG_CHANNEL(NUMA);
21
22	#include "pal/palinternal.h"
23	#include "pal/dbgmsg.h"
24	#include "pal/numa.h"
25	#include "pal/corunix.hpp"
26	#include "pal/thread.hpp"
27
28	#if HAVE_PTHREAD_NP_H
29	#include <pthread_np.h>
30	#endif
31
32	#include <pthread.h>
33	#include <dlfcn.h>
34	#ifdef __FreeBSD__
35	#include <stdlib.h>
36	#else
37	#include <alloca.h>
38	#endif
39
40	#include <algorithm>
41
42	#include "numashim.h"
43
44	using namespace CorUnix;
45
46	#if HAVE_CPUSET_T
47	typedef cpuset_t cpu_set_t;
48	#endif
49
50	// CPU affinity descriptor
51	struct CpuAffinity
52	{
53	// NUMA node
54	BYTE Node;
55	// CPU number relative to the group the CPU is in
56	BYTE Number;
57	// CPU group
58	WORD Group;
59	};
60
61	// Array mapping global CPU index to its affinity
62	CpuAffinity *g_cpuToAffinity = NULL;
63
64	// Array mapping CPU group and index in the group to the global CPU index
65	short *g_groupAndIndexToCpu = NULL;
66	// Array mapping CPU group to the corresponding affinity mask of the CPUs in the group
67	KAFFINITY *g_groupToCpuMask = NULL;
68	// Array mapping CPU group to the number of processors in the group
69	BYTE *g_groupToCpuCount = NULL;
70
71	// Total number of processors in the system
72	int g_cpuCount = `0`;
73	// Total number of possible processors in the system
74	int g_possibleCpuCount = `0`;
75	// Total number of CPU groups
76	int g_groupCount = `0`;
77	// The highest NUMA node available
78	int g_highestNumaNode = `0`;
79	// Is numa available
80	bool g_numaAvailable = false;
81
82	void* numaHandle = nullptr;
83
84	#if HAVE_NUMA_H
85	#define PER_FUNCTION_BLOCK(fn) decltype(fn)* fn##_ptr;
86	FOR_ALL_NUMA_FUNCTIONS
87	#undef PER_FUNCTION_BLOCK
88	#endif // HAVE_NUMA_H
89
90	static const int MaxCpusPerGroup = `8` * sizeof(KAFFINITY);
91	static const WORD NO_GROUP = `0xffff`;
92
93	/++*
94	Function:
95	FreeLookupArrays
96
97	Free CPU and group lookup arrays
98	--/*
99	VOID
100	FreeLookupArrays()
101	{
102	free(g_groupAndIndexToCpu);
103	free(g_cpuToAffinity);
104	free(g_groupToCpuMask);
105	free(g_groupToCpuCount);
106
107	g_groupAndIndexToCpu = NULL;
108	g_cpuToAffinity = NULL;
109	g_groupToCpuMask = NULL;
110	g_groupToCpuCount = NULL;
111	}
112
113	/++*
114	Function:
115	AllocateLookupArrays
116
117	Allocate CPU and group lookup arrays
118	Return TRUE if the allocation succeeded
119	--/*
120	BOOL
121	AllocateLookupArrays()
122	{
123	g_groupAndIndexToCpu = (short)malloc(g_groupCount MaxCpusPerGroup * sizeof(short));
124	if (g_groupAndIndexToCpu == NULL)
125	{
126	goto FAILED;
127	}
128
129	g_cpuToAffinity = (CpuAffinity)malloc(g_possibleCpuCount sizeof(CpuAffinity));
130	if (g_cpuToAffinity == NULL)
131	{
132	goto FAILED;
133	}
134
135	g_groupToCpuMask = (KAFFINITY)malloc(g_groupCount sizeof(KAFFINITY));
136	if (g_groupToCpuMask == NULL)
137	{
138	goto FAILED;
139	}
140
141	g_groupToCpuCount = (BYTE)malloc(g_groupCount sizeof(BYTE));
142	if (g_groupToCpuCount == NULL)
143	{
144	goto FAILED;
145	}
146
147	memset(g_groupAndIndexToCpu, `0xff`, g_groupCount * MaxCpusPerGroup * sizeof(short));
148	memset(g_cpuToAffinity, `0xff`, g_possibleCpuCount * sizeof(CpuAffinity));
149	memset(g_groupToCpuMask, `0`, g_groupCount * sizeof(KAFFINITY));
150	memset(g_groupToCpuCount, `0`, g_groupCount * sizeof(BYTE));
151
152	return TRUE;
153
154	FAILED:
155	FreeLookupArrays();
156
157	return FALSE;
158	}
159
160	/++*
161	Function:
162	GetFullAffinityMask
163
164	Get affinity mask for the specified number of processors with all
165	the processors enabled.
166	--/*
167	KAFFINITY GetFullAffinityMask(int cpuCount)
168	{
169	return ((KAFFINITY)`1` << (cpuCount)) - `1`;
170	}
171
172	/++*
173	Function:
174	NUMASupportInitialize
175
176	Initialize data structures for getting and setting thread affinities to processors and
177	querying NUMA related processor information.
178	On systems with no NUMA support, it behaves as if there was a single NUMA node with
179	a single group of processors.
180	--/*
181	BOOL
182	NUMASupportInitialize()
183	{
184	#if HAVE_NUMA_H
185	numaHandle = dlopen("libnuma.so", RTLD_LAZY);
186	if (numaHandle == `0`)
187	{
188	numaHandle = dlopen("libnuma.so.1", RTLD_LAZY);
189	}
190	if (numaHandle != `0`)
191	{
192	dlsym(numaHandle, "numa_allocate_cpumask");
193	#define PER_FUNCTION_BLOCK(fn) \
194	fn##_ptr = (decltype(fn)*)dlsym(numaHandle, #fn); \
195	if (fn##_ptr == NULL) { fprintf(stderr, "Cannot get symbol " #fn " from libnuma\n"); abort(); }
196	FOR_ALL_NUMA_FUNCTIONS
197	#undef PER_FUNCTION_BLOCK
198
199	if (numa_available() == -`1`)
200	{
201	dlclose(numaHandle);
202	}
203	else
204	{
205	g_numaAvailable = true;
206
207	struct bitmask *mask = numa_allocate_cpumask();
208	int numaNodesCount = numa_max_node() + `1`;
209
210	g_possibleCpuCount = numa_num_possible_cpus();
211	g_cpuCount = `0`;
212	g_groupCount = `0`;
213
214	for (int i = `0`; i < numaNodesCount; i++)
215	{
216	int st = numa_node_to_cpus(i, mask);
217	// The only failure that can happen is that the mask is not large enough
218	// but that cannot happen since the mask was allocated by numa_allocate_cpumask
219	_ASSERTE(st == `0`);
220	unsigned int nodeCpuCount = numa_bitmask_weight(mask);
221	g_cpuCount += nodeCpuCount;
222	unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - `1`) / MaxCpusPerGroup;
223	g_groupCount += nodeGroupCount;
224	}
225
226	if (!AllocateLookupArrays())
227	{
228	dlclose(numaHandle);
229	return FALSE;
230	}
231
232	WORD currentGroup = `0`;
233	int currentGroupCpus = `0`;
234
235	for (int i = `0`; i < numaNodesCount; i++)
236	{
237	int st = numa_node_to_cpus(i, mask);
238	// The only failure that can happen is that the mask is not large enough
239	// but that cannot happen since the mask was allocated by numa_allocate_cpumask
240	_ASSERTE(st == `0`);
241	unsigned int nodeCpuCount = numa_bitmask_weight(mask);
242	unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - `1`) / MaxCpusPerGroup;
243	for (int j = `0`; j < g_possibleCpuCount; j++)
244	{
245	if (numa_bitmask_isbitset(mask, j))
246	{
247	if (currentGroupCpus == MaxCpusPerGroup)
248	{
249	g_groupToCpuCount[currentGroup] = MaxCpusPerGroup;
250	g_groupToCpuMask[currentGroup] = GetFullAffinityMask(MaxCpusPerGroup);
251	currentGroupCpus = `0`;
252	currentGroup++;
253	}
254	g_cpuToAffinity[j].Node = i;
255	g_cpuToAffinity[j].Group = currentGroup;
256	g_cpuToAffinity[j].Number = currentGroupCpus;
257	g_groupAndIndexToCpu[currentGroup * MaxCpusPerGroup + currentGroupCpus] = j;
258	currentGroupCpus++;
259	}
260	}
261
262	if (currentGroupCpus != `0`)
263	{
264	g_groupToCpuCount[currentGroup] = currentGroupCpus;
265	g_groupToCpuMask[currentGroup] = GetFullAffinityMask(currentGroupCpus);
266	currentGroupCpus = `0`;
267	currentGroup++;
268	}
269	}
270
271	numa_free_cpumask(mask);
272
273	g_highestNumaNode = numa_max_node();
274	}
275	}
276	#endif // HAVE_NUMA_H
277	if (!g_numaAvailable)
278	{
279	// No NUMA
280	g_possibleCpuCount = PAL_GetLogicalCpuCountFromOS();
281	g_cpuCount = PAL_GetLogicalCpuCountFromOS();
282	g_groupCount = `1`;
283	g_highestNumaNode = `0`;
284
285	if (!AllocateLookupArrays())
286	{
287	return FALSE;
288	}
289
290	for (int i = `0`; i < g_possibleCpuCount; i++)
291	{
292	g_cpuToAffinity[i].Number = i;
293	g_cpuToAffinity[i].Group = `0`;
294	}
295	}
296
297	return TRUE;
298	}
299
300	/++*
301	Function:
302	NUMASupportCleanup
303
304	Cleanup of the NUMA support data structures
305	--/*
306	VOID
307	NUMASupportCleanup()
308	{
309	FreeLookupArrays();
310	#if HAVE_NUMA_H
311	if (g_numaAvailable)
312	{
313	dlclose(numaHandle);
314	}
315	#endif // HAVE_NUMA_H
316	}
317
318	/++*
319	Function:
320	GetNumaHighestNodeNumber
321
322	See MSDN doc.
323	--/*
324	BOOL
325	PALAPI
326	GetNumaHighestNodeNumber(
327	OUT PULONG HighestNodeNumber
328	)
329	{
330	PERF_ENTRY(GetNumaHighestNodeNumber);
331	ENTRY("GetNumaHighestNodeNumber(HighestNodeNumber=%p)\n", HighestNodeNumber);
332	*HighestNodeNumber = (ULONG)g_highestNumaNode;
333
334	BOOL success = TRUE;
335
336	LOGEXIT("GetNumaHighestNodeNumber returns BOOL %d\n", success);
337	PERF_EXIT(GetNumaHighestNodeNumber);
338
339	return success;
340	}
341
342	/++*
343	Function:
344	GetNumaProcessorNodeEx
345
346	See MSDN doc.
347	--/*
348	BOOL
349	PALAPI
350	GetNumaProcessorNodeEx(
351	IN PPROCESSOR_NUMBER Processor,
352	OUT PUSHORT NodeNumber
353	)
354	{
355	PERF_ENTRY(GetNumaProcessorNodeEx);
356	ENTRY("GetNumaProcessorNodeEx(Processor=%p, NodeNumber=%p)\n", Processor, NodeNumber);
357
358	BOOL success = FALSE;
359
360	if ((Processor->Group < g_groupCount) &&
361	(Processor->Number < MaxCpusPerGroup) &&
362	(Processor->Reserved == `0`))
363	{
364	short cpu = g_groupAndIndexToCpu[Processor->Group * MaxCpusPerGroup + Processor->Number];
365	if (cpu != -`1`)
366	{
367	*NodeNumber = g_cpuToAffinity[cpu].Node;
368	success = TRUE;
369	}
370	}
371
372	if (!success)
373	{
374	*NodeNumber = `0xffff`;
375	SetLastError(ERROR_INVALID_PARAMETER);
376	}
377
378	LOGEXIT("GetNumaProcessorNodeEx returns BOOL %d\n", success);
379	PERF_EXIT(GetNumaProcessorNodeEx);
380
381	return success;
382	}
383
384	/++*
385	Function:
386	GetLogicalProcessorInformationEx
387
388	See MSDN doc.
389	--/*
390	BOOL
391	PALAPI
392	GetLogicalProcessorInformationEx(
393	IN LOGICAL_PROCESSOR_RELATIONSHIP RelationshipType,
394	OUT OPTIONAL PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer,
395	IN OUT PDWORD ReturnedLength
396	)
397	{
398	PERF_ENTRY(GetLogicalProcessorInformationEx);
399	ENTRY("GetLogicalProcessorInformationEx(RelationshipType=%d, Buffer=%p, ReturnedLength=%p)\n", RelationshipType, Buffer, ReturnedLength);
400
401	BOOL success = FALSE;
402
403	if (RelationshipType == RelationGroup)
404	{
405	size_t requiredSize = __builtin_offsetof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, Group);
406	requiredSize += __builtin_offsetof(GROUP_RELATIONSHIP, GroupInfo);
407	requiredSize += g_groupCount * sizeof(PROCESSOR_GROUP_INFO);
408
409	if (*ReturnedLength >= requiredSize)
410	{
411	Buffer->Relationship = RelationGroup;
412	Buffer->Size = requiredSize;
413	Buffer->Group.MaximumGroupCount = g_groupCount;
414	Buffer->Group.ActiveGroupCount = g_groupCount;
415	for (int i = `0`; i < g_groupCount; i++)
416	{
417	Buffer->Group.GroupInfo[i].MaximumProcessorCount = MaxCpusPerGroup;
418	Buffer->Group.GroupInfo[i].ActiveProcessorCount = g_groupToCpuCount[i];
419	Buffer->Group.GroupInfo[i].ActiveProcessorMask = g_groupToCpuMask[i];
420	}
421
422	success = TRUE;
423	}
424	else
425	{
426	SetLastError(ERROR_INSUFFICIENT_BUFFER);
427	}
428
429	*ReturnedLength = requiredSize;
430	}
431	else
432	{
433	// We only support the group relationship
434	SetLastError(ERROR_INVALID_PARAMETER);
435	}
436
437	LOGEXIT("GetLogicalProcessorInformationEx returns BOOL %d\n", success);
438	PERF_EXIT(GetLogicalProcessorInformationEx);
439
440	return success;
441	}
442
443	/++*
444	Function:
445	GetThreadGroupAffinityInternal
446
447	Get the group affinity for the specified pthread
448	--/*
449	BOOL
450	GetThreadGroupAffinityInternal(
451	IN pthread_t thread,
452	OUT PGROUP_AFFINITY GroupAffinity
453	)
454	{
455	BOOL success = FALSE;
456
457	#if HAVE_PTHREAD_GETAFFINITY_NP
458	cpu_set_t cpuSet;
459
460	int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
461
462	if (st == `0`)
463	{
464	WORD group = NO_GROUP;
465	KAFFINITY mask = `0`;
466
467	for (int i = `0`; i < g_possibleCpuCount; i++)
468	{
469	if (CPU_ISSET(i, &cpuSet))
470	{
471	WORD g = g_cpuToAffinity[i].Group;
472	// Unless the thread affinity was already set by SetThreadGroupAffinity, it is possible that
473	// the current thread has affinity with processors from multiple groups. So we report just the
474	// first group we find.
475	if (group == NO_GROUP \|\| g == group)
476	{
477	group = g;
478	mask \|= ((KAFFINITY)`1`) << g_cpuToAffinity[i].Number;
479	}
480	}
481	}
482
483	GroupAffinity->Group = group;
484	GroupAffinity->Mask = mask;
485	success = TRUE;
486	}
487	else
488	{
489	SetLastError(ERROR_GEN_FAILURE);
490	}
491	#else // HAVE_PTHREAD_GETAFFINITY_NP
492	// There is no API to manage thread affinity, so let's return a group affinity
493	// with all the CPUs on the system.
494	GroupAffinity->Group = `0`;
495	GroupAffinity->Mask = GetFullAffinityMask(g_possibleCpuCount);
496	success = TRUE;
497	#endif // HAVE_PTHREAD_GETAFFINITY_NP
498
499	return success;
500	}
501
502	/++*
503	Function:
504	GetThreadGroupAffinity
505
506	See MSDN doc.
507	--/*
508	BOOL
509	PALAPI
510	GetThreadGroupAffinity(
511	IN HANDLE hThread,
512	OUT PGROUP_AFFINITY GroupAffinity
513	)
514	{
515	PERF_ENTRY(GetThreadGroupAffinity);
516	ENTRY("GetThreadGroupAffinity(hThread=%p, GroupAffinity=%p)\n", hThread, GroupAffinity);
517	CPalThread *pCurrentThread = InternalGetCurrentThread();
518	CPalThread *pTargetThread = NULL;
519	IPalObject *pTargetThreadObject = NULL;
520
521	PAL_ERROR palErr =
522	InternalGetThreadDataFromHandle(pCurrentThread, hThread,
523	`0`, // THREAD_SET_CONTEXT
524	&pTargetThread, &pTargetThreadObject);
525
526	if (NO_ERROR != palErr)
527	{
528	ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
529	palErr);
530	return FALSE;
531	}
532
533	BOOL success = GetThreadGroupAffinityInternal(
534	pTargetThread->GetPThreadSelf(), GroupAffinity);
535	LOGEXIT("GetThreadGroupAffinity returns BOOL %d\n", success);
536	PERF_EXIT(GetThreadGroupAffinity);
537
538	return success;
539	}
540
541
542	/++*
543	Function:
544	SetThreadGroupAffinity
545
546	See MSDN doc.
547	--/*
548	BOOL
549	PALAPI
550	SetThreadGroupAffinity(
551	IN HANDLE hThread,
552	IN const GROUP_AFFINITY *GroupAffinity,
553	OUT OPTIONAL PGROUP_AFFINITY PreviousGroupAffinity
554	)
555	{
556	PERF_ENTRY(SetThreadGroupAffinity);
557	ENTRY("SetThreadGroupAffinity(hThread=%p, GroupAffinity=%p, PreviousGroupAffinity=%p)\n", hThread, GroupAffinity, PreviousGroupAffinity);
558
559	CPalThread *pCurrentThread = InternalGetCurrentThread();
560	CPalThread *pTargetThread = NULL;
561	IPalObject *pTargetThreadObject = NULL;
562
563	PAL_ERROR palErr =
564	InternalGetThreadDataFromHandle(pCurrentThread, hThread,
565	`0`, // THREAD_SET_CONTEXT
566	&pTargetThread, &pTargetThreadObject);
567
568	if (NO_ERROR != palErr)
569	{
570	ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
571	palErr);
572	return FALSE;
573	}
574
575	pthread_t thread = pTargetThread->GetPThreadSelf();
576
577	if (PreviousGroupAffinity != NULL)
578	{
579	GetThreadGroupAffinityInternal(thread, PreviousGroupAffinity);
580	}
581
582	#if HAVE_PTHREAD_GETAFFINITY_NP
583	int groupStartIndex = GroupAffinity->Group * MaxCpusPerGroup;
584	KAFFINITY mask = `1`;
585	cpu_set_t cpuSet;
586	CPU_ZERO(&cpuSet);
587
588	for (int i = `0`; i < MaxCpusPerGroup; i++, mask <<= `1`)
589	{
590	if (GroupAffinity->Mask & mask)
591	{
592	int cpu = g_groupAndIndexToCpu[groupStartIndex + i];
593	if (cpu != -`1`)
594	{
595	CPU_SET(cpu, &cpuSet);
596	}
597	}
598	}
599
600	int st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
601
602	if (st != `0`)
603	{
604	switch (st)
605	{
606	case EINVAL:
607	// There is no processor in the mask that is allowed to execute the process
608	SetLastError(ERROR_INVALID_PARAMETER);
609	break;
610	case ESRCH:
611	SetLastError(ERROR_INVALID_HANDLE);
612	break;
613	default:
614	SetLastError(ERROR_GEN_FAILURE);
615	break;
616	}
617	}
618
619	BOOL success = (st == `0`);
620	#else // HAVE_PTHREAD_GETAFFINITY_NP
621	// There is no API to manage thread affinity, so let's ignore the request
622	BOOL success = TRUE;
623	#endif // HAVE_PTHREAD_GETAFFINITY_NP
624
625	LOGEXIT("SetThreadGroupAffinity returns BOOL %d\n", success);
626	PERF_EXIT(SetThreadGroupAffinity);
627
628	return success;
629	}
630
631	/++*
632	Function:
633	SetThreadAffinityMask
634
635	See MSDN doc.
636	--/*
637	DWORD_PTR
638	PALAPI
639	SetThreadAffinityMask(
640	IN HANDLE hThread,
641	IN DWORD_PTR dwThreadAffinityMask
642	)
643	{
644	PERF_ENTRY(SetThreadAffinityMask);
645	ENTRY("SetThreadAffinityMask(hThread=%p, dwThreadAffinityMask=%p)\n", hThread, dwThreadAffinityMask);
646
647	CPalThread *pCurrentThread = InternalGetCurrentThread();
648	CPalThread *pTargetThread = NULL;
649	IPalObject *pTargetThreadObject = NULL;
650
651	PAL_ERROR palErr =
652	InternalGetThreadDataFromHandle(pCurrentThread, hThread,
653	`0`, // THREAD_SET_CONTEXT
654	&pTargetThread, &pTargetThreadObject);
655
656	if (NO_ERROR != palErr)
657	{
658	ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
659	palErr);
660	return `0`;
661	}
662
663	pthread_t thread = pTargetThread->GetPThreadSelf();
664
665	#if HAVE_PTHREAD_GETAFFINITY_NP
666	cpu_set_t prevCpuSet;
667	CPU_ZERO(&prevCpuSet);
668	KAFFINITY prevMask = `0`;
669
670	int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &prevCpuSet);
671
672	if (st == `0`)
673	{
674	for (int i = `0`; i < std::min(`8` * (int)sizeof(KAFFINITY), g_possibleCpuCount); i++)
675	{
676	if (CPU_ISSET(i, &prevCpuSet))
677	{
678	prevMask \|= ((KAFFINITY)`1`) << i;
679	}
680	}
681	}
682
683	cpu_set_t cpuSet;
684	CPU_ZERO(&cpuSet);
685
686	int cpu = `0`;
687	while (dwThreadAffinityMask)
688	{
689	if (dwThreadAffinityMask & `1`)
690	{
691	CPU_SET(cpu, &cpuSet);
692	}
693	cpu++;
694	dwThreadAffinityMask >>= `1`;
695	}
696
697	st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
698
699	if (st != `0`)
700	{
701	switch (st)
702	{
703	case EINVAL:
704	// There is no processor in the mask that is allowed to execute the
705	// process
706	SetLastError(ERROR_INVALID_PARAMETER);
707	break;
708	case ESRCH:
709	SetLastError(ERROR_INVALID_HANDLE);
710	break;
711	default:
712	SetLastError(ERROR_GEN_FAILURE);
713	break;
714	}
715	}
716
717	DWORD_PTR ret = (st == `0`) ? prevMask : `0`;
718	#else // HAVE_PTHREAD_GETAFFINITY_NP
719	// There is no API to manage thread affinity, so let's ignore the request
720	DWORD_PTR ret = `0`;
721	#endif // HAVE_PTHREAD_GETAFFINITY_NP
722	LOGEXIT("SetThreadAffinityMask returns %lu\n", ret);
723	PERF_EXIT(SetThreadAffinityMask);
724
725	return ret;
726	}
727
728	/++*
729	Function:
730	GetCurrentProcessorNumberEx
731
732	See MSDN doc.
733	--/*
734	VOID
735	PALAPI
736	GetCurrentProcessorNumberEx(
737	OUT PPROCESSOR_NUMBER ProcNumber
738	)
739	{
740	PERF_ENTRY(GetCurrentProcessorNumberEx);
741	ENTRY("GetCurrentProcessorNumberEx(ProcNumber=%p\n", ProcNumber);
742
743	DWORD cpu = GetCurrentProcessorNumber();
744	_ASSERTE(cpu < g_possibleCpuCount);
745	ProcNumber->Group = g_cpuToAffinity[cpu].Group;
746	ProcNumber->Number = g_cpuToAffinity[cpu].Number;
747
748	LOGEXIT("GetCurrentProcessorNumberEx\n");
749	PERF_EXIT(GetCurrentProcessorNumberEx);
750	}
751
752	/++*
753	Function:
754	GetProcessAffinityMask
755
756	See MSDN doc.
757	--/*
758	BOOL
759	PALAPI
760	GetProcessAffinityMask(
761	IN HANDLE hProcess,
762	OUT PDWORD_PTR lpProcessAffinityMask,
763	OUT PDWORD_PTR lpSystemAffinityMask
764	)
765	{
766	PERF_ENTRY(GetProcessAffinityMask);
767	ENTRY("GetProcessAffinityMask(hProcess=%p, lpProcessAffinityMask=%p, lpSystemAffinityMask=%p\n", hProcess, lpProcessAffinityMask, lpSystemAffinityMask);
768
769	BOOL success = FALSE;
770
771	if (hProcess == GetCurrentProcess())
772	{
773	DWORD_PTR systemMask = GetFullAffinityMask(g_cpuCount);
774
775	#if HAVE_SCHED_GETAFFINITY
776	int pid = getpid();
777	cpu_set_t cpuSet;
778	int st = sched_getaffinity(pid, sizeof(cpu_set_t), &cpuSet);
779	if (st == `0`)
780	{
781	WORD group = NO_GROUP;
782	DWORD_PTR processMask = `0`;
783
784	for (int i = `0`; i < g_possibleCpuCount; i++)
785	{
786	if (CPU_ISSET(i, &cpuSet))
787	{
788	WORD g = g_cpuToAffinity[i].Group;
789	if (group == NO_GROUP \|\| g == group)
790	{
791	group = g;
792	processMask \|= ((DWORD_PTR)`1`) << g_cpuToAffinity[i].Number;
793	}
794	else
795	{
796	// The process has affinity in more than one group, in such case
797	// the function needs to return zero in both masks.
798	processMask = `0`;
799	systemMask = `0`;
800	group = NO_GROUP;
801	break;
802	}
803	}
804	}
805
806	success = TRUE;
807
808	*lpProcessAffinityMask = processMask;
809	*lpSystemAffinityMask = systemMask;
810	}
811	else if (errno == EINVAL)
812	{
813	// There are more processors than can fit in a cpu_set_t
814	// return zero in both masks.
815	*lpProcessAffinityMask = `0`;
816	*lpSystemAffinityMask = `0`;
817	success = TRUE;
818	}
819	else
820	{
821	// We should not get any of the errors that the sched_getaffinity can return since none
822	// of them applies for the current thread, so this is an unexpected kind of failure.
823	SetLastError(ERROR_GEN_FAILURE);
824	}
825	#else // HAVE_SCHED_GETAFFINITY
826	// There is no API to manage thread affinity, so let's return both affinity masks
827	// with all the CPUs on the system set.
828	*lpSystemAffinityMask = systemMask;
829	*lpProcessAffinityMask = systemMask;
830
831	success = TRUE;
832	#endif // HAVE_SCHED_GETAFFINITY
833	}
834	else
835	{
836	// PAL supports getting affinity mask for the current process only
837	SetLastError(ERROR_INVALID_PARAMETER);
838	}
839
840	LOGEXIT("GetProcessAffinityMask returns BOOL %d\n", success);
841	PERF_EXIT(GetProcessAffinityMask);
842
843	return success;
844	}
845
846	/++*
847	Function:
848	VirtualAllocExNuma
849
850	See MSDN doc.
851	--/*
852	LPVOID
853	PALAPI
854	VirtualAllocExNuma(
855	IN HANDLE hProcess,
856	IN OPTIONAL LPVOID lpAddress,
857	IN SIZE_T dwSize,
858	IN DWORD flAllocationType,
859	IN DWORD flProtect,
860	IN DWORD nndPreferred
861	)
862	{
863	PERF_ENTRY(VirtualAllocExNuma);
864	ENTRY("VirtualAllocExNuma(hProcess=%p, lpAddress=%p, dwSize=%u, flAllocationType=%#x, flProtect=%#x, nndPreferred=%d\n",
865	hProcess, lpAddress, dwSize, flAllocationType, flProtect, nndPreferred);
866
867	LPVOID result = NULL;
868
869	if (hProcess == GetCurrentProcess())
870	{
871	if (nndPreferred <= g_highestNumaNode)
872	{
873	result = VirtualAlloc(lpAddress, dwSize, flAllocationType, flProtect);
874	#if HAVE_NUMA_H
875	if (result != NULL && g_numaAvailable)
876	{
877	int nodeMaskLength = (g_highestNumaNode + `1` + sizeof(unsigned long) - `1`) / sizeof(unsigned long);
878	unsigned long nodeMask = (unsigned* long)alloca(nodeMaskLength sizeof(unsigned long));
879	memset(nodeMask, `0`, nodeMaskLength);
880
881	int index = nndPreferred / sizeof(unsigned long);
882	int mask = ((unsigned long)`1`) << (nndPreferred & (sizeof(unsigned long) - `1`));
883	nodeMask[index] = mask;
884
885	int st = mbind(result, dwSize, MPOL_PREFERRED, nodeMask, g_highestNumaNode, `0`);
886
887	_ASSERTE(st == `0`);
888	// If the mbind fails, we still return the allocated memory since the nndPreferred is just a hint
889	}
890	#endif // HAVE_NUMA_H
891	}
892	else
893	{
894	// The specified node number is larger than the maximum available one
895	SetLastError(ERROR_INVALID_PARAMETER);
896	}
897	}
898	else
899	{
900	// PAL supports allocating from the current process virtual space only
901	SetLastError(ERROR_INVALID_PARAMETER);
902	}
903
904	LOGEXIT("VirtualAllocExNuma returns %p\n", result);
905	PERF_EXIT(VirtualAllocExNuma);
906
907	return result;
908	}
909
910	/++*
911	Function:
912	SetThreadIdealProcessorEx
913
914	See MSDN doc.
915	--/*
916	BOOL
917	PALAPI
918	SetThreadIdealProcessorEx(
919	IN HANDLE hThread,
920	IN PPROCESSOR_NUMBER lpIdealProcessor,
921	OUT PPROCESSOR_NUMBER lpPreviousIdealProcessor)
922	{
923	PERF_ENTRY(SetThreadIdealProcessorEx);
924	ENTRY("SetThreadIdealProcessorEx(hThread=%p, lpIdealProcessor=%p)\n", hThread, lpIdealProcessor);
925
926	CPalThread *pCurrentThread = InternalGetCurrentThread();
927	CPalThread *pTargetThread = NULL;
928	IPalObject *pTargetThreadObject = NULL;
929
930	PAL_ERROR palErr =
931	InternalGetThreadDataFromHandle(pCurrentThread, hThread,
932	`0`, // THREAD_SET_CONTEXT
933	&pTargetThread, &pTargetThreadObject);
934
935	if (NO_ERROR != palErr)
936	{
937	ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
938	palErr);
939	return `0`;
940	}
941
942	pthread_t thread = pTargetThread->GetPThreadSelf();
943
944	#if HAVE_PTHREAD_GETAFFINITY_NP
945	int cpu = -`1`;
946	if ((lpIdealProcessor->Group < g_groupCount) &&
947	(lpIdealProcessor->Number < MaxCpusPerGroup) &&
948	(lpIdealProcessor->Reserved == `0`))
949	{
950	cpu = g_groupAndIndexToCpu[lpIdealProcessor->Group * MaxCpusPerGroup + lpIdealProcessor->Number];
951	}
952
953	if (cpu == -`1`)
954	{
955	SetLastError(ERROR_INVALID_PARAMETER);
956	return FALSE;
957	}
958
959	if (lpPreviousIdealProcessor != NULL)
960	{
961	cpu_set_t prevCpuSet;
962	CPU_ZERO(&prevCpuSet);
963	DWORD prevCpu = GetCurrentProcessorNumber();
964
965	int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &prevCpuSet);
966
967	if (st == `0`)
968	{
969	for (int i = `0`; i < g_possibleCpuCount; i++)
970	{
971	if (CPU_ISSET(i, &prevCpuSet))
972	{
973	prevCpu = i;
974	break;
975	}
976	}
977	}
978
979	_ASSERTE(prevCpu < g_possibleCpuCount);
980	lpPreviousIdealProcessor->Group = g_cpuToAffinity[prevCpu].Group;
981	lpPreviousIdealProcessor->Number = g_cpuToAffinity[prevCpu].Number;
982	lpPreviousIdealProcessor->Reserved = `0`;
983	}
984
985	cpu_set_t cpuSet;
986	CPU_ZERO(&cpuSet);
987	CPU_SET(cpu, &cpuSet);
988
989	int st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
990
991	if (st != `0`)
992	{
993	switch (st)
994	{
995	case EINVAL:
996	// There is no processor in the mask that is allowed to execute the
997	// process
998	SetLastError(ERROR_INVALID_PARAMETER);
999	break;
1000	case ESRCH:
1001	SetLastError(ERROR_INVALID_HANDLE);
1002	break;
1003	default:
1004	SetLastError(ERROR_GEN_FAILURE);
1005	break;
1006	}
1007	}
1008
1009	BOOL success = (st == `0`);
1010
1011	#else // HAVE_PTHREAD_GETAFFINITY_NP
1012	// There is no API to manage thread affinity, so let's ignore the request
1013	BOOL success = FALSE;
1014	#endif // HAVE_PTHREAD_GETAFFINITY_NP
1015
1016	LOGEXIT("SetThreadIdealProcessorEx returns BOOL %d\n", success);
1017	PERF_EXIT(SetThreadIdealProcessorEx);
1018
1019	return success;
1020	}
1021

Browse the source code of CoreCLR/pal/src/numa/numa.cpp