1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5/*++
6
7
8
9Module Name:
10
11 numa.cpp
12
13Abstract:
14
15 Implementation of NUMA related APIs
16
17--*/
18
19#include "pal/dbgmsg.h"
20SET_DEFAULT_DEBUG_CHANNEL(NUMA);
21
22#include "pal/palinternal.h"
23#include "pal/dbgmsg.h"
24#include "pal/numa.h"
25#include "pal/corunix.hpp"
26#include "pal/thread.hpp"
27
28#if HAVE_PTHREAD_NP_H
29#include <pthread_np.h>
30#endif
31
32#include <pthread.h>
33#include <dlfcn.h>
34#ifdef __FreeBSD__
35#include <stdlib.h>
36#else
37#include <alloca.h>
38#endif
39
40#include <algorithm>
41
42#include "numashim.h"
43
44using namespace CorUnix;
45
46#if HAVE_CPUSET_T
47typedef cpuset_t cpu_set_t;
48#endif
49
50// CPU affinity descriptor
51struct CpuAffinity
52{
53 // NUMA node
54 BYTE Node;
55 // CPU number relative to the group the CPU is in
56 BYTE Number;
57 // CPU group
58 WORD Group;
59};
60
61// Array mapping global CPU index to its affinity
62CpuAffinity *g_cpuToAffinity = NULL;
63
64// Array mapping CPU group and index in the group to the global CPU index
65short *g_groupAndIndexToCpu = NULL;
66// Array mapping CPU group to the corresponding affinity mask of the CPUs in the group
67KAFFINITY *g_groupToCpuMask = NULL;
68// Array mapping CPU group to the number of processors in the group
69BYTE *g_groupToCpuCount = NULL;
70
71// Total number of processors in the system
72int g_cpuCount = 0;
73// Total number of possible processors in the system
74int g_possibleCpuCount = 0;
75// Total number of CPU groups
76int g_groupCount = 0;
77// The highest NUMA node available
78int g_highestNumaNode = 0;
79// Is numa available
80bool g_numaAvailable = false;
81
82void* numaHandle = nullptr;
83
84#if HAVE_NUMA_H
85#define PER_FUNCTION_BLOCK(fn) decltype(fn)* fn##_ptr;
86FOR_ALL_NUMA_FUNCTIONS
87#undef PER_FUNCTION_BLOCK
88#endif // HAVE_NUMA_H
89
90static const int MaxCpusPerGroup = 8 * sizeof(KAFFINITY);
91static const WORD NO_GROUP = 0xffff;
92
93/*++
94Function:
95 FreeLookupArrays
96
97Free CPU and group lookup arrays
98--*/
99VOID
100FreeLookupArrays()
101{
102 free(g_groupAndIndexToCpu);
103 free(g_cpuToAffinity);
104 free(g_groupToCpuMask);
105 free(g_groupToCpuCount);
106
107 g_groupAndIndexToCpu = NULL;
108 g_cpuToAffinity = NULL;
109 g_groupToCpuMask = NULL;
110 g_groupToCpuCount = NULL;
111}
112
113/*++
114Function:
115 AllocateLookupArrays
116
117Allocate CPU and group lookup arrays
118Return TRUE if the allocation succeeded
119--*/
120BOOL
121AllocateLookupArrays()
122{
123 g_groupAndIndexToCpu = (short*)malloc(g_groupCount * MaxCpusPerGroup * sizeof(short));
124 if (g_groupAndIndexToCpu == NULL)
125 {
126 goto FAILED;
127 }
128
129 g_cpuToAffinity = (CpuAffinity*)malloc(g_possibleCpuCount * sizeof(CpuAffinity));
130 if (g_cpuToAffinity == NULL)
131 {
132 goto FAILED;
133 }
134
135 g_groupToCpuMask = (KAFFINITY*)malloc(g_groupCount * sizeof(KAFFINITY));
136 if (g_groupToCpuMask == NULL)
137 {
138 goto FAILED;
139 }
140
141 g_groupToCpuCount = (BYTE*)malloc(g_groupCount * sizeof(BYTE));
142 if (g_groupToCpuCount == NULL)
143 {
144 goto FAILED;
145 }
146
147 memset(g_groupAndIndexToCpu, 0xff, g_groupCount * MaxCpusPerGroup * sizeof(short));
148 memset(g_cpuToAffinity, 0xff, g_possibleCpuCount * sizeof(CpuAffinity));
149 memset(g_groupToCpuMask, 0, g_groupCount * sizeof(KAFFINITY));
150 memset(g_groupToCpuCount, 0, g_groupCount * sizeof(BYTE));
151
152 return TRUE;
153
154FAILED:
155 FreeLookupArrays();
156
157 return FALSE;
158}
159
160/*++
161Function:
162 GetFullAffinityMask
163
164Get affinity mask for the specified number of processors with all
165the processors enabled.
166--*/
167KAFFINITY GetFullAffinityMask(int cpuCount)
168{
169 return ((KAFFINITY)1 << (cpuCount)) - 1;
170}
171
172/*++
173Function:
174 NUMASupportInitialize
175
176Initialize data structures for getting and setting thread affinities to processors and
177querying NUMA related processor information.
178On systems with no NUMA support, it behaves as if there was a single NUMA node with
179a single group of processors.
180--*/
181BOOL
182NUMASupportInitialize()
183{
184#if HAVE_NUMA_H
185 numaHandle = dlopen("libnuma.so", RTLD_LAZY);
186 if (numaHandle == 0)
187 {
188 numaHandle = dlopen("libnuma.so.1", RTLD_LAZY);
189 }
190 if (numaHandle != 0)
191 {
192 dlsym(numaHandle, "numa_allocate_cpumask");
193#define PER_FUNCTION_BLOCK(fn) \
194 fn##_ptr = (decltype(fn)*)dlsym(numaHandle, #fn); \
195 if (fn##_ptr == NULL) { fprintf(stderr, "Cannot get symbol " #fn " from libnuma\n"); abort(); }
196FOR_ALL_NUMA_FUNCTIONS
197#undef PER_FUNCTION_BLOCK
198
199 if (numa_available() == -1)
200 {
201 dlclose(numaHandle);
202 }
203 else
204 {
205 g_numaAvailable = true;
206
207 struct bitmask *mask = numa_allocate_cpumask();
208 int numaNodesCount = numa_max_node() + 1;
209
210 g_possibleCpuCount = numa_num_possible_cpus();
211 g_cpuCount = 0;
212 g_groupCount = 0;
213
214 for (int i = 0; i < numaNodesCount; i++)
215 {
216 int st = numa_node_to_cpus(i, mask);
217 // The only failure that can happen is that the mask is not large enough
218 // but that cannot happen since the mask was allocated by numa_allocate_cpumask
219 _ASSERTE(st == 0);
220 unsigned int nodeCpuCount = numa_bitmask_weight(mask);
221 g_cpuCount += nodeCpuCount;
222 unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup;
223 g_groupCount += nodeGroupCount;
224 }
225
226 if (!AllocateLookupArrays())
227 {
228 dlclose(numaHandle);
229 return FALSE;
230 }
231
232 WORD currentGroup = 0;
233 int currentGroupCpus = 0;
234
235 for (int i = 0; i < numaNodesCount; i++)
236 {
237 int st = numa_node_to_cpus(i, mask);
238 // The only failure that can happen is that the mask is not large enough
239 // but that cannot happen since the mask was allocated by numa_allocate_cpumask
240 _ASSERTE(st == 0);
241 unsigned int nodeCpuCount = numa_bitmask_weight(mask);
242 unsigned int nodeGroupCount = (nodeCpuCount + MaxCpusPerGroup - 1) / MaxCpusPerGroup;
243 for (int j = 0; j < g_possibleCpuCount; j++)
244 {
245 if (numa_bitmask_isbitset(mask, j))
246 {
247 if (currentGroupCpus == MaxCpusPerGroup)
248 {
249 g_groupToCpuCount[currentGroup] = MaxCpusPerGroup;
250 g_groupToCpuMask[currentGroup] = GetFullAffinityMask(MaxCpusPerGroup);
251 currentGroupCpus = 0;
252 currentGroup++;
253 }
254 g_cpuToAffinity[j].Node = i;
255 g_cpuToAffinity[j].Group = currentGroup;
256 g_cpuToAffinity[j].Number = currentGroupCpus;
257 g_groupAndIndexToCpu[currentGroup * MaxCpusPerGroup + currentGroupCpus] = j;
258 currentGroupCpus++;
259 }
260 }
261
262 if (currentGroupCpus != 0)
263 {
264 g_groupToCpuCount[currentGroup] = currentGroupCpus;
265 g_groupToCpuMask[currentGroup] = GetFullAffinityMask(currentGroupCpus);
266 currentGroupCpus = 0;
267 currentGroup++;
268 }
269 }
270
271 numa_free_cpumask(mask);
272
273 g_highestNumaNode = numa_max_node();
274 }
275 }
276#endif // HAVE_NUMA_H
277 if (!g_numaAvailable)
278 {
279 // No NUMA
280 g_possibleCpuCount = PAL_GetLogicalCpuCountFromOS();
281 g_cpuCount = PAL_GetLogicalCpuCountFromOS();
282 g_groupCount = 1;
283 g_highestNumaNode = 0;
284
285 if (!AllocateLookupArrays())
286 {
287 return FALSE;
288 }
289
290 for (int i = 0; i < g_possibleCpuCount; i++)
291 {
292 g_cpuToAffinity[i].Number = i;
293 g_cpuToAffinity[i].Group = 0;
294 }
295 }
296
297 return TRUE;
298}
299
300/*++
301Function:
302 NUMASupportCleanup
303
304Cleanup of the NUMA support data structures
305--*/
306VOID
307NUMASupportCleanup()
308{
309 FreeLookupArrays();
310#if HAVE_NUMA_H
311 if (g_numaAvailable)
312 {
313 dlclose(numaHandle);
314 }
315#endif // HAVE_NUMA_H
316}
317
318/*++
319Function:
320 GetNumaHighestNodeNumber
321
322See MSDN doc.
323--*/
324BOOL
325PALAPI
326GetNumaHighestNodeNumber(
327 OUT PULONG HighestNodeNumber
328)
329{
330 PERF_ENTRY(GetNumaHighestNodeNumber);
331 ENTRY("GetNumaHighestNodeNumber(HighestNodeNumber=%p)\n", HighestNodeNumber);
332 *HighestNodeNumber = (ULONG)g_highestNumaNode;
333
334 BOOL success = TRUE;
335
336 LOGEXIT("GetNumaHighestNodeNumber returns BOOL %d\n", success);
337 PERF_EXIT(GetNumaHighestNodeNumber);
338
339 return success;
340}
341
342/*++
343Function:
344 GetNumaProcessorNodeEx
345
346See MSDN doc.
347--*/
348BOOL
349PALAPI
350GetNumaProcessorNodeEx(
351 IN PPROCESSOR_NUMBER Processor,
352 OUT PUSHORT NodeNumber
353)
354{
355 PERF_ENTRY(GetNumaProcessorNodeEx);
356 ENTRY("GetNumaProcessorNodeEx(Processor=%p, NodeNumber=%p)\n", Processor, NodeNumber);
357
358 BOOL success = FALSE;
359
360 if ((Processor->Group < g_groupCount) &&
361 (Processor->Number < MaxCpusPerGroup) &&
362 (Processor->Reserved == 0))
363 {
364 short cpu = g_groupAndIndexToCpu[Processor->Group * MaxCpusPerGroup + Processor->Number];
365 if (cpu != -1)
366 {
367 *NodeNumber = g_cpuToAffinity[cpu].Node;
368 success = TRUE;
369 }
370 }
371
372 if (!success)
373 {
374 *NodeNumber = 0xffff;
375 SetLastError(ERROR_INVALID_PARAMETER);
376 }
377
378 LOGEXIT("GetNumaProcessorNodeEx returns BOOL %d\n", success);
379 PERF_EXIT(GetNumaProcessorNodeEx);
380
381 return success;
382}
383
384/*++
385Function:
386 GetLogicalProcessorInformationEx
387
388See MSDN doc.
389--*/
390BOOL
391PALAPI
392GetLogicalProcessorInformationEx(
393 IN LOGICAL_PROCESSOR_RELATIONSHIP RelationshipType,
394 OUT OPTIONAL PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer,
395 IN OUT PDWORD ReturnedLength
396)
397{
398 PERF_ENTRY(GetLogicalProcessorInformationEx);
399 ENTRY("GetLogicalProcessorInformationEx(RelationshipType=%d, Buffer=%p, ReturnedLength=%p)\n", RelationshipType, Buffer, ReturnedLength);
400
401 BOOL success = FALSE;
402
403 if (RelationshipType == RelationGroup)
404 {
405 size_t requiredSize = __builtin_offsetof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, Group);
406 requiredSize += __builtin_offsetof(GROUP_RELATIONSHIP, GroupInfo);
407 requiredSize += g_groupCount * sizeof(PROCESSOR_GROUP_INFO);
408
409 if (*ReturnedLength >= requiredSize)
410 {
411 Buffer->Relationship = RelationGroup;
412 Buffer->Size = requiredSize;
413 Buffer->Group.MaximumGroupCount = g_groupCount;
414 Buffer->Group.ActiveGroupCount = g_groupCount;
415 for (int i = 0; i < g_groupCount; i++)
416 {
417 Buffer->Group.GroupInfo[i].MaximumProcessorCount = MaxCpusPerGroup;
418 Buffer->Group.GroupInfo[i].ActiveProcessorCount = g_groupToCpuCount[i];
419 Buffer->Group.GroupInfo[i].ActiveProcessorMask = g_groupToCpuMask[i];
420 }
421
422 success = TRUE;
423 }
424 else
425 {
426 SetLastError(ERROR_INSUFFICIENT_BUFFER);
427 }
428
429 *ReturnedLength = requiredSize;
430 }
431 else
432 {
433 // We only support the group relationship
434 SetLastError(ERROR_INVALID_PARAMETER);
435 }
436
437 LOGEXIT("GetLogicalProcessorInformationEx returns BOOL %d\n", success);
438 PERF_EXIT(GetLogicalProcessorInformationEx);
439
440 return success;
441}
442
443/*++
444Function:
445 GetThreadGroupAffinityInternal
446
447Get the group affinity for the specified pthread
448--*/
449BOOL
450GetThreadGroupAffinityInternal(
451 IN pthread_t thread,
452 OUT PGROUP_AFFINITY GroupAffinity
453)
454{
455 BOOL success = FALSE;
456
457#if HAVE_PTHREAD_GETAFFINITY_NP
458 cpu_set_t cpuSet;
459
460 int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
461
462 if (st == 0)
463 {
464 WORD group = NO_GROUP;
465 KAFFINITY mask = 0;
466
467 for (int i = 0; i < g_possibleCpuCount; i++)
468 {
469 if (CPU_ISSET(i, &cpuSet))
470 {
471 WORD g = g_cpuToAffinity[i].Group;
472 // Unless the thread affinity was already set by SetThreadGroupAffinity, it is possible that
473 // the current thread has affinity with processors from multiple groups. So we report just the
474 // first group we find.
475 if (group == NO_GROUP || g == group)
476 {
477 group = g;
478 mask |= ((KAFFINITY)1) << g_cpuToAffinity[i].Number;
479 }
480 }
481 }
482
483 GroupAffinity->Group = group;
484 GroupAffinity->Mask = mask;
485 success = TRUE;
486 }
487 else
488 {
489 SetLastError(ERROR_GEN_FAILURE);
490 }
491#else // HAVE_PTHREAD_GETAFFINITY_NP
492 // There is no API to manage thread affinity, so let's return a group affinity
493 // with all the CPUs on the system.
494 GroupAffinity->Group = 0;
495 GroupAffinity->Mask = GetFullAffinityMask(g_possibleCpuCount);
496 success = TRUE;
497#endif // HAVE_PTHREAD_GETAFFINITY_NP
498
499 return success;
500}
501
502/*++
503Function:
504 GetThreadGroupAffinity
505
506See MSDN doc.
507--*/
508BOOL
509PALAPI
510GetThreadGroupAffinity(
511 IN HANDLE hThread,
512 OUT PGROUP_AFFINITY GroupAffinity
513)
514{
515 PERF_ENTRY(GetThreadGroupAffinity);
516 ENTRY("GetThreadGroupAffinity(hThread=%p, GroupAffinity=%p)\n", hThread, GroupAffinity);
517 CPalThread *pCurrentThread = InternalGetCurrentThread();
518 CPalThread *pTargetThread = NULL;
519 IPalObject *pTargetThreadObject = NULL;
520
521 PAL_ERROR palErr =
522 InternalGetThreadDataFromHandle(pCurrentThread, hThread,
523 0, // THREAD_SET_CONTEXT
524 &pTargetThread, &pTargetThreadObject);
525
526 if (NO_ERROR != palErr)
527 {
528 ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
529 palErr);
530 return FALSE;
531 }
532
533 BOOL success = GetThreadGroupAffinityInternal(
534 pTargetThread->GetPThreadSelf(), GroupAffinity);
535 LOGEXIT("GetThreadGroupAffinity returns BOOL %d\n", success);
536 PERF_EXIT(GetThreadGroupAffinity);
537
538 return success;
539}
540
541
542/*++
543Function:
544 SetThreadGroupAffinity
545
546See MSDN doc.
547--*/
548BOOL
549PALAPI
550SetThreadGroupAffinity(
551 IN HANDLE hThread,
552 IN const GROUP_AFFINITY *GroupAffinity,
553 OUT OPTIONAL PGROUP_AFFINITY PreviousGroupAffinity
554)
555{
556 PERF_ENTRY(SetThreadGroupAffinity);
557 ENTRY("SetThreadGroupAffinity(hThread=%p, GroupAffinity=%p, PreviousGroupAffinity=%p)\n", hThread, GroupAffinity, PreviousGroupAffinity);
558
559 CPalThread *pCurrentThread = InternalGetCurrentThread();
560 CPalThread *pTargetThread = NULL;
561 IPalObject *pTargetThreadObject = NULL;
562
563 PAL_ERROR palErr =
564 InternalGetThreadDataFromHandle(pCurrentThread, hThread,
565 0, // THREAD_SET_CONTEXT
566 &pTargetThread, &pTargetThreadObject);
567
568 if (NO_ERROR != palErr)
569 {
570 ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
571 palErr);
572 return FALSE;
573 }
574
575 pthread_t thread = pTargetThread->GetPThreadSelf();
576
577 if (PreviousGroupAffinity != NULL)
578 {
579 GetThreadGroupAffinityInternal(thread, PreviousGroupAffinity);
580 }
581
582#if HAVE_PTHREAD_GETAFFINITY_NP
583 int groupStartIndex = GroupAffinity->Group * MaxCpusPerGroup;
584 KAFFINITY mask = 1;
585 cpu_set_t cpuSet;
586 CPU_ZERO(&cpuSet);
587
588 for (int i = 0; i < MaxCpusPerGroup; i++, mask <<= 1)
589 {
590 if (GroupAffinity->Mask & mask)
591 {
592 int cpu = g_groupAndIndexToCpu[groupStartIndex + i];
593 if (cpu != -1)
594 {
595 CPU_SET(cpu, &cpuSet);
596 }
597 }
598 }
599
600 int st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
601
602 if (st != 0)
603 {
604 switch (st)
605 {
606 case EINVAL:
607 // There is no processor in the mask that is allowed to execute the process
608 SetLastError(ERROR_INVALID_PARAMETER);
609 break;
610 case ESRCH:
611 SetLastError(ERROR_INVALID_HANDLE);
612 break;
613 default:
614 SetLastError(ERROR_GEN_FAILURE);
615 break;
616 }
617 }
618
619 BOOL success = (st == 0);
620#else // HAVE_PTHREAD_GETAFFINITY_NP
621 // There is no API to manage thread affinity, so let's ignore the request
622 BOOL success = TRUE;
623#endif // HAVE_PTHREAD_GETAFFINITY_NP
624
625 LOGEXIT("SetThreadGroupAffinity returns BOOL %d\n", success);
626 PERF_EXIT(SetThreadGroupAffinity);
627
628 return success;
629}
630
631/*++
632Function:
633 SetThreadAffinityMask
634
635See MSDN doc.
636--*/
637DWORD_PTR
638PALAPI
639SetThreadAffinityMask(
640 IN HANDLE hThread,
641 IN DWORD_PTR dwThreadAffinityMask
642)
643{
644 PERF_ENTRY(SetThreadAffinityMask);
645 ENTRY("SetThreadAffinityMask(hThread=%p, dwThreadAffinityMask=%p)\n", hThread, dwThreadAffinityMask);
646
647 CPalThread *pCurrentThread = InternalGetCurrentThread();
648 CPalThread *pTargetThread = NULL;
649 IPalObject *pTargetThreadObject = NULL;
650
651 PAL_ERROR palErr =
652 InternalGetThreadDataFromHandle(pCurrentThread, hThread,
653 0, // THREAD_SET_CONTEXT
654 &pTargetThread, &pTargetThreadObject);
655
656 if (NO_ERROR != palErr)
657 {
658 ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
659 palErr);
660 return 0;
661 }
662
663 pthread_t thread = pTargetThread->GetPThreadSelf();
664
665#if HAVE_PTHREAD_GETAFFINITY_NP
666 cpu_set_t prevCpuSet;
667 CPU_ZERO(&prevCpuSet);
668 KAFFINITY prevMask = 0;
669
670 int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &prevCpuSet);
671
672 if (st == 0)
673 {
674 for (int i = 0; i < std::min(8 * (int)sizeof(KAFFINITY), g_possibleCpuCount); i++)
675 {
676 if (CPU_ISSET(i, &prevCpuSet))
677 {
678 prevMask |= ((KAFFINITY)1) << i;
679 }
680 }
681 }
682
683 cpu_set_t cpuSet;
684 CPU_ZERO(&cpuSet);
685
686 int cpu = 0;
687 while (dwThreadAffinityMask)
688 {
689 if (dwThreadAffinityMask & 1)
690 {
691 CPU_SET(cpu, &cpuSet);
692 }
693 cpu++;
694 dwThreadAffinityMask >>= 1;
695 }
696
697 st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
698
699 if (st != 0)
700 {
701 switch (st)
702 {
703 case EINVAL:
704 // There is no processor in the mask that is allowed to execute the
705 // process
706 SetLastError(ERROR_INVALID_PARAMETER);
707 break;
708 case ESRCH:
709 SetLastError(ERROR_INVALID_HANDLE);
710 break;
711 default:
712 SetLastError(ERROR_GEN_FAILURE);
713 break;
714 }
715 }
716
717 DWORD_PTR ret = (st == 0) ? prevMask : 0;
718#else // HAVE_PTHREAD_GETAFFINITY_NP
719 // There is no API to manage thread affinity, so let's ignore the request
720 DWORD_PTR ret = 0;
721#endif // HAVE_PTHREAD_GETAFFINITY_NP
722 LOGEXIT("SetThreadAffinityMask returns %lu\n", ret);
723 PERF_EXIT(SetThreadAffinityMask);
724
725 return ret;
726}
727
728/*++
729Function:
730 GetCurrentProcessorNumberEx
731
732See MSDN doc.
733--*/
734VOID
735PALAPI
736GetCurrentProcessorNumberEx(
737 OUT PPROCESSOR_NUMBER ProcNumber
738)
739{
740 PERF_ENTRY(GetCurrentProcessorNumberEx);
741 ENTRY("GetCurrentProcessorNumberEx(ProcNumber=%p\n", ProcNumber);
742
743 DWORD cpu = GetCurrentProcessorNumber();
744 _ASSERTE(cpu < g_possibleCpuCount);
745 ProcNumber->Group = g_cpuToAffinity[cpu].Group;
746 ProcNumber->Number = g_cpuToAffinity[cpu].Number;
747
748 LOGEXIT("GetCurrentProcessorNumberEx\n");
749 PERF_EXIT(GetCurrentProcessorNumberEx);
750}
751
752/*++
753Function:
754 GetProcessAffinityMask
755
756See MSDN doc.
757--*/
758BOOL
759PALAPI
760GetProcessAffinityMask(
761 IN HANDLE hProcess,
762 OUT PDWORD_PTR lpProcessAffinityMask,
763 OUT PDWORD_PTR lpSystemAffinityMask
764)
765{
766 PERF_ENTRY(GetProcessAffinityMask);
767 ENTRY("GetProcessAffinityMask(hProcess=%p, lpProcessAffinityMask=%p, lpSystemAffinityMask=%p\n", hProcess, lpProcessAffinityMask, lpSystemAffinityMask);
768
769 BOOL success = FALSE;
770
771 if (hProcess == GetCurrentProcess())
772 {
773 DWORD_PTR systemMask = GetFullAffinityMask(g_cpuCount);
774
775#if HAVE_SCHED_GETAFFINITY
776 int pid = getpid();
777 cpu_set_t cpuSet;
778 int st = sched_getaffinity(pid, sizeof(cpu_set_t), &cpuSet);
779 if (st == 0)
780 {
781 WORD group = NO_GROUP;
782 DWORD_PTR processMask = 0;
783
784 for (int i = 0; i < g_possibleCpuCount; i++)
785 {
786 if (CPU_ISSET(i, &cpuSet))
787 {
788 WORD g = g_cpuToAffinity[i].Group;
789 if (group == NO_GROUP || g == group)
790 {
791 group = g;
792 processMask |= ((DWORD_PTR)1) << g_cpuToAffinity[i].Number;
793 }
794 else
795 {
796 // The process has affinity in more than one group, in such case
797 // the function needs to return zero in both masks.
798 processMask = 0;
799 systemMask = 0;
800 group = NO_GROUP;
801 break;
802 }
803 }
804 }
805
806 success = TRUE;
807
808 *lpProcessAffinityMask = processMask;
809 *lpSystemAffinityMask = systemMask;
810 }
811 else if (errno == EINVAL)
812 {
813 // There are more processors than can fit in a cpu_set_t
814 // return zero in both masks.
815 *lpProcessAffinityMask = 0;
816 *lpSystemAffinityMask = 0;
817 success = TRUE;
818 }
819 else
820 {
821 // We should not get any of the errors that the sched_getaffinity can return since none
822 // of them applies for the current thread, so this is an unexpected kind of failure.
823 SetLastError(ERROR_GEN_FAILURE);
824 }
825#else // HAVE_SCHED_GETAFFINITY
826 // There is no API to manage thread affinity, so let's return both affinity masks
827 // with all the CPUs on the system set.
828 *lpSystemAffinityMask = systemMask;
829 *lpProcessAffinityMask = systemMask;
830
831 success = TRUE;
832#endif // HAVE_SCHED_GETAFFINITY
833 }
834 else
835 {
836 // PAL supports getting affinity mask for the current process only
837 SetLastError(ERROR_INVALID_PARAMETER);
838 }
839
840 LOGEXIT("GetProcessAffinityMask returns BOOL %d\n", success);
841 PERF_EXIT(GetProcessAffinityMask);
842
843 return success;
844}
845
846/*++
847Function:
848 VirtualAllocExNuma
849
850See MSDN doc.
851--*/
852LPVOID
853PALAPI
854VirtualAllocExNuma(
855 IN HANDLE hProcess,
856 IN OPTIONAL LPVOID lpAddress,
857 IN SIZE_T dwSize,
858 IN DWORD flAllocationType,
859 IN DWORD flProtect,
860 IN DWORD nndPreferred
861)
862{
863 PERF_ENTRY(VirtualAllocExNuma);
864 ENTRY("VirtualAllocExNuma(hProcess=%p, lpAddress=%p, dwSize=%u, flAllocationType=%#x, flProtect=%#x, nndPreferred=%d\n",
865 hProcess, lpAddress, dwSize, flAllocationType, flProtect, nndPreferred);
866
867 LPVOID result = NULL;
868
869 if (hProcess == GetCurrentProcess())
870 {
871 if (nndPreferred <= g_highestNumaNode)
872 {
873 result = VirtualAlloc(lpAddress, dwSize, flAllocationType, flProtect);
874#if HAVE_NUMA_H
875 if (result != NULL && g_numaAvailable)
876 {
877 int nodeMaskLength = (g_highestNumaNode + 1 + sizeof(unsigned long) - 1) / sizeof(unsigned long);
878 unsigned long *nodeMask = (unsigned long*)alloca(nodeMaskLength * sizeof(unsigned long));
879 memset(nodeMask, 0, nodeMaskLength);
880
881 int index = nndPreferred / sizeof(unsigned long);
882 int mask = ((unsigned long)1) << (nndPreferred & (sizeof(unsigned long) - 1));
883 nodeMask[index] = mask;
884
885 int st = mbind(result, dwSize, MPOL_PREFERRED, nodeMask, g_highestNumaNode, 0);
886
887 _ASSERTE(st == 0);
888 // If the mbind fails, we still return the allocated memory since the nndPreferred is just a hint
889 }
890#endif // HAVE_NUMA_H
891 }
892 else
893 {
894 // The specified node number is larger than the maximum available one
895 SetLastError(ERROR_INVALID_PARAMETER);
896 }
897 }
898 else
899 {
900 // PAL supports allocating from the current process virtual space only
901 SetLastError(ERROR_INVALID_PARAMETER);
902 }
903
904 LOGEXIT("VirtualAllocExNuma returns %p\n", result);
905 PERF_EXIT(VirtualAllocExNuma);
906
907 return result;
908}
909
910/*++
911Function:
912 SetThreadIdealProcessorEx
913
914See MSDN doc.
915--*/
916BOOL
917PALAPI
918SetThreadIdealProcessorEx(
919 IN HANDLE hThread,
920 IN PPROCESSOR_NUMBER lpIdealProcessor,
921 OUT PPROCESSOR_NUMBER lpPreviousIdealProcessor)
922{
923 PERF_ENTRY(SetThreadIdealProcessorEx);
924 ENTRY("SetThreadIdealProcessorEx(hThread=%p, lpIdealProcessor=%p)\n", hThread, lpIdealProcessor);
925
926 CPalThread *pCurrentThread = InternalGetCurrentThread();
927 CPalThread *pTargetThread = NULL;
928 IPalObject *pTargetThreadObject = NULL;
929
930 PAL_ERROR palErr =
931 InternalGetThreadDataFromHandle(pCurrentThread, hThread,
932 0, // THREAD_SET_CONTEXT
933 &pTargetThread, &pTargetThreadObject);
934
935 if (NO_ERROR != palErr)
936 {
937 ERROR("Unable to obtain thread data for handle %p (error %x)!\n", hThread,
938 palErr);
939 return 0;
940 }
941
942 pthread_t thread = pTargetThread->GetPThreadSelf();
943
944#if HAVE_PTHREAD_GETAFFINITY_NP
945 int cpu = -1;
946 if ((lpIdealProcessor->Group < g_groupCount) &&
947 (lpIdealProcessor->Number < MaxCpusPerGroup) &&
948 (lpIdealProcessor->Reserved == 0))
949 {
950 cpu = g_groupAndIndexToCpu[lpIdealProcessor->Group * MaxCpusPerGroup + lpIdealProcessor->Number];
951 }
952
953 if (cpu == -1)
954 {
955 SetLastError(ERROR_INVALID_PARAMETER);
956 return FALSE;
957 }
958
959 if (lpPreviousIdealProcessor != NULL)
960 {
961 cpu_set_t prevCpuSet;
962 CPU_ZERO(&prevCpuSet);
963 DWORD prevCpu = GetCurrentProcessorNumber();
964
965 int st = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &prevCpuSet);
966
967 if (st == 0)
968 {
969 for (int i = 0; i < g_possibleCpuCount; i++)
970 {
971 if (CPU_ISSET(i, &prevCpuSet))
972 {
973 prevCpu = i;
974 break;
975 }
976 }
977 }
978
979 _ASSERTE(prevCpu < g_possibleCpuCount);
980 lpPreviousIdealProcessor->Group = g_cpuToAffinity[prevCpu].Group;
981 lpPreviousIdealProcessor->Number = g_cpuToAffinity[prevCpu].Number;
982 lpPreviousIdealProcessor->Reserved = 0;
983 }
984
985 cpu_set_t cpuSet;
986 CPU_ZERO(&cpuSet);
987 CPU_SET(cpu, &cpuSet);
988
989 int st = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuSet);
990
991 if (st != 0)
992 {
993 switch (st)
994 {
995 case EINVAL:
996 // There is no processor in the mask that is allowed to execute the
997 // process
998 SetLastError(ERROR_INVALID_PARAMETER);
999 break;
1000 case ESRCH:
1001 SetLastError(ERROR_INVALID_HANDLE);
1002 break;
1003 default:
1004 SetLastError(ERROR_GEN_FAILURE);
1005 break;
1006 }
1007 }
1008
1009 BOOL success = (st == 0);
1010
1011#else // HAVE_PTHREAD_GETAFFINITY_NP
1012 // There is no API to manage thread affinity, so let's ignore the request
1013 BOOL success = FALSE;
1014#endif // HAVE_PTHREAD_GETAFFINITY_NP
1015
1016 LOGEXIT("SetThreadIdealProcessorEx returns BOOL %d\n", success);
1017 PERF_EXIT(SetThreadIdealProcessorEx);
1018
1019 return success;
1020}
1021