| 1 | // ======================================================================== // | 
| 2 | // Copyright 2009-2019 Intel Corporation                                    // | 
| 3 | //                                                                          // | 
| 4 | // Licensed under the Apache License, Version 2.0 (the "License");          // | 
| 5 | // you may not use this file except in compliance with the License.         // | 
| 6 | // You may obtain a copy of the License at                                  // | 
| 7 | //                                                                          // | 
| 8 | //     http://www.apache.org/licenses/LICENSE-2.0                           // | 
| 9 | //                                                                          // | 
| 10 | // Unless required by applicable law or agreed to in writing, software      // | 
| 11 | // distributed under the License is distributed on an "AS IS" BASIS,        // | 
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // | 
| 13 | // See the License for the specific language governing permissions and      // | 
| 14 | // limitations under the License.                                           // | 
| 15 | // ======================================================================== // | 
| 16 |  | 
| 17 | #if defined(_MSC_VER) | 
| 18 |   #pragma warning (disable : 4146) // unary minus operator applied to unsigned type, result still unsigned | 
| 19 | #endif | 
| 20 |  | 
| 21 | #if defined(__APPLE__) | 
| 22 |   #include <mach/thread_act.h> | 
| 23 |   #include <mach/mach_init.h> | 
| 24 | #endif | 
| 25 |  | 
| 26 | #include "thread.h" | 
| 27 | #include <fstream> | 
| 28 |  | 
| 29 | namespace oidn { | 
| 30 |  | 
| 31 | #if defined(_WIN32) | 
| 32 |  | 
| 33 |   // -------------------------------------------------------------------------- | 
| 34 |   // ThreadAffinity - Windows | 
| 35 |   // -------------------------------------------------------------------------- | 
| 36 |  | 
| 37 |   ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose) | 
| 38 |     : Verbose(verbose) | 
| 39 |   { | 
| 40 |     HMODULE hLib = GetModuleHandle(TEXT("kernel32" )); | 
| 41 |     pGetLogicalProcessorInformationEx = (GetLogicalProcessorInformationExFunc)GetProcAddress(hLib, "GetLogicalProcessorInformationEx" ); | 
| 42 |     pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hLib, "SetThreadGroupAffinity" ); | 
| 43 |  | 
| 44 |     if (pGetLogicalProcessorInformationEx && pSetThreadGroupAffinity) | 
| 45 |     { | 
| 46 |       // Get logical processor information | 
| 47 |       PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX buffer = nullptr; | 
| 48 |       DWORD bufferSize = 0; | 
| 49 |  | 
| 50 |       // First call the function with an empty buffer to get the required buffer size | 
| 51 |       BOOL result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize); | 
| 52 |       if (result || GetLastError() != ERROR_INSUFFICIENT_BUFFER) | 
| 53 |       { | 
| 54 |         OIDN_WARNING("GetLogicalProcessorInformationEx failed" ); | 
| 55 |         return; | 
| 56 |       } | 
| 57 |  | 
| 58 |       // Allocate the buffer | 
| 59 |       buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)malloc(bufferSize); | 
| 60 |       if (!buffer) | 
| 61 |       { | 
| 62 |         OIDN_WARNING("SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX allocation failed" ); | 
| 63 |         return; | 
| 64 |       } | 
| 65 |  | 
| 66 |       // Call again the function but now with the properly sized buffer | 
| 67 |       result = pGetLogicalProcessorInformationEx(RelationProcessorCore, buffer, &bufferSize); | 
| 68 |       if (!result) | 
| 69 |       { | 
| 70 |         OIDN_WARNING("GetLogicalProcessorInformationEx failed" ); | 
| 71 |         free(buffer); | 
| 72 |         return; | 
| 73 |       } | 
| 74 |  | 
| 75 |       // Iterate over the logical processor information structures | 
| 76 |       // There should be one structure for each physical core | 
| 77 |       char* ptr = (char*)buffer; | 
| 78 |       while (ptr < (char*)buffer + bufferSize) | 
| 79 |       { | 
| 80 |         PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX item = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)ptr; | 
| 81 |         if (item->Relationship == RelationProcessorCore && item->Processor.GroupCount > 0) | 
| 82 |         { | 
| 83 |           // Iterate over the groups | 
| 84 |           int numThreads = 0; | 
| 85 |           for (int group = 0; (group < item->Processor.GroupCount) && (numThreads < numThreadsPerCore); ++group) | 
| 86 |           { | 
| 87 |             GROUP_AFFINITY coreAffinity = item->Processor.GroupMask[group]; | 
| 88 |             while ((coreAffinity.Mask != 0) && (numThreads < numThreadsPerCore)) | 
| 89 |             { | 
| 90 |               // Extract the next set bit/thread from the mask | 
| 91 |               GROUP_AFFINITY threadAffinity = coreAffinity; | 
| 92 |               threadAffinity.Mask = threadAffinity.Mask & -threadAffinity.Mask; | 
| 93 |  | 
| 94 |               // Push the affinity for this thread | 
| 95 |               affinities.push_back(threadAffinity); | 
| 96 |               oldAffinities.push_back(threadAffinity); | 
| 97 |               numThreads++; | 
| 98 |  | 
| 99 |               // Remove this bit/thread from the mask | 
| 100 |               coreAffinity.Mask ^= threadAffinity.Mask; | 
| 101 |             } | 
| 102 |           } | 
| 103 |         } | 
| 104 |  | 
| 105 |         // Next structure | 
| 106 |         ptr += item->Size; | 
| 107 |       } | 
| 108 |  | 
| 109 |       // Free the buffer | 
| 110 |       free(buffer); | 
| 111 |     } | 
| 112 |   } | 
| 113 |  | 
| 114 |   void ThreadAffinity::set(int threadIndex) | 
| 115 |   { | 
| 116 |     if (threadIndex >= (int)affinities.size()) | 
| 117 |       return; | 
| 118 |  | 
| 119 |     // Save the current affinity and set the new one | 
| 120 |     const HANDLE thread = GetCurrentThread(); | 
| 121 |     if (!pSetThreadGroupAffinity(thread, &affinities[threadIndex], &oldAffinities[threadIndex])) | 
| 122 |       OIDN_WARNING("SetThreadGroupAffinity failed" ); | 
| 123 |   } | 
| 124 |  | 
| 125 |   void ThreadAffinity::restore(int threadIndex) | 
| 126 |   { | 
| 127 |     if (threadIndex >= (int)affinities.size()) | 
| 128 |       return; | 
| 129 |  | 
| 130 |     // Restore the original affinity | 
| 131 |     const HANDLE thread = GetCurrentThread(); | 
| 132 |     if (!pSetThreadGroupAffinity(thread, &oldAffinities[threadIndex], nullptr)) | 
| 133 |       OIDN_WARNING("SetThreadGroupAffinity failed" ); | 
| 134 |   } | 
| 135 |  | 
| 136 | #elif defined(__linux__) | 
| 137 |  | 
| 138 |   // -------------------------------------------------------------------------- | 
| 139 |   // ThreadAffinity - Linux | 
| 140 |   // -------------------------------------------------------------------------- | 
| 141 |  | 
| 142 |   ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose) | 
| 143 |     : Verbose(verbose) | 
| 144 |   { | 
| 145 |     std::vector<int> threadIds; | 
| 146 |  | 
| 147 |     // Parse the thread/CPU topology | 
| 148 |     for (int cpuId = 0; ; cpuId++) | 
| 149 |     { | 
| 150 |       std::fstream fs; | 
| 151 |       std::string cpu = std::string("/sys/devices/system/cpu/cpu" ) + std::to_string(cpuId) + std::string("/topology/thread_siblings_list" ); | 
| 152 |       fs.open(cpu.c_str(), std::fstream::in); | 
| 153 |       if (fs.fail()) break; | 
| 154 |  | 
| 155 |       int i; | 
| 156 |       int j = 0; | 
| 157 |       while ((j < numThreadsPerCore) && (fs >> i)) | 
| 158 |       { | 
| 159 |         if (std::none_of(threadIds.begin(), threadIds.end(), [&](int id) { return id == i; })) | 
| 160 |           threadIds.push_back(i); | 
| 161 |  | 
| 162 |         if (fs.peek() == ',') | 
| 163 |           fs.ignore(); | 
| 164 |         j++; | 
| 165 |       } | 
| 166 |  | 
| 167 |       fs.close(); | 
| 168 |     } | 
| 169 |  | 
| 170 |   #if 0 | 
| 171 |     for (size_t i = 0; i < thread_ids.size(); ++i) | 
| 172 |       std::cout << "thread "  << i << " -> "  << thread_ids[i] << std::endl; | 
| 173 |   #endif | 
| 174 |  | 
| 175 |     // Create the affinity structures | 
| 176 |     affinities.resize(threadIds.size()); | 
| 177 |     oldAffinities.resize(threadIds.size()); | 
| 178 |  | 
| 179 |     for (size_t i = 0; i < threadIds.size(); ++i) | 
| 180 |     { | 
| 181 |       cpu_set_t affinity; | 
| 182 |       CPU_ZERO(&affinity); | 
| 183 |       CPU_SET(threadIds[i], &affinity); | 
| 184 |  | 
| 185 |       affinities[i] = affinity; | 
| 186 |       oldAffinities[i] = affinity; | 
| 187 |     } | 
| 188 |   } | 
| 189 |  | 
| 190 |   void ThreadAffinity::set(int threadIndex) | 
| 191 |   { | 
| 192 |     if (threadIndex >= (int)affinities.size()) | 
| 193 |       return; | 
| 194 |  | 
| 195 |     const pthread_t thread = pthread_self(); | 
| 196 |  | 
| 197 |     // Save the current affinity | 
| 198 |     if (pthread_getaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0) | 
| 199 |     { | 
| 200 |       OIDN_WARNING("pthread_getaffinity_np failed" ); | 
| 201 |       oldAffinities[threadIndex] = affinities[threadIndex]; | 
| 202 |       return; | 
| 203 |     } | 
| 204 |  | 
| 205 |     // Set the new affinity | 
| 206 |     if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &affinities[threadIndex]) != 0) | 
| 207 |       OIDN_WARNING("pthread_setaffinity_np failed" ); | 
| 208 |   } | 
| 209 |  | 
| 210 |   void ThreadAffinity::restore(int threadIndex) | 
| 211 |   { | 
| 212 |     if (threadIndex >= (int)affinities.size()) | 
| 213 |       return; | 
| 214 |  | 
| 215 |     const pthread_t thread = pthread_self(); | 
| 216 |  | 
| 217 |     // Restore the original affinity | 
| 218 |     if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &oldAffinities[threadIndex]) != 0) | 
| 219 |       OIDN_WARNING("pthread_setaffinity_np failed" ); | 
| 220 |   } | 
| 221 |  | 
| 222 | #elif defined(__APPLE__) | 
| 223 |  | 
| 224 |   // -------------------------------------------------------------------------- | 
| 225 |   // ThreadAffinity - macOS | 
| 226 |   // -------------------------------------------------------------------------- | 
| 227 |  | 
| 228 |   ThreadAffinity::ThreadAffinity(int numThreadsPerCore, int verbose) | 
| 229 |     : Verbose(verbose) | 
| 230 |   { | 
| 231 |     // Query the thread/CPU topology | 
| 232 |     int numPhysicalCpus; | 
| 233 |     int numLogicalCpus; | 
| 234 |  | 
| 235 |     if (!getSysctl("hw.physicalcpu" , numPhysicalCpus) || !getSysctl("hw.logicalcpu" , numLogicalCpus)) | 
| 236 |     { | 
| 237 |       OIDN_WARNING("sysctlbyname failed" ); | 
| 238 |       return; | 
| 239 |     } | 
| 240 |  | 
| 241 |     if ((numLogicalCpus % numPhysicalCpus != 0) && (numThreadsPerCore > 1)) | 
| 242 |       return; // this shouldn't happen | 
| 243 |     const int maxThreadsPerCore = numLogicalCpus / numPhysicalCpus; | 
| 244 |  | 
| 245 |     // Create the affinity structures | 
| 246 |     // macOS doesn't support binding a thread to a specific core, but we can at least group threads which | 
| 247 |     // should be on the same core together | 
| 248 |     for (int core = 1; core <= numPhysicalCpus; ++core) // tags start from 1! | 
| 249 |     { | 
| 250 |       thread_affinity_policy affinity; | 
| 251 |       affinity.affinity_tag = core; | 
| 252 |  | 
| 253 |       for (int thread = 0; thread < min(numThreadsPerCore, maxThreadsPerCore); ++thread) | 
| 254 |       { | 
| 255 |         affinities.push_back(affinity); | 
| 256 |         oldAffinities.push_back(affinity); | 
| 257 |       } | 
| 258 |     } | 
| 259 |   } | 
| 260 |  | 
| 261 |   void ThreadAffinity::set(int threadIndex) | 
| 262 |   { | 
| 263 |     if (threadIndex >= (int)affinities.size()) | 
| 264 |       return; | 
| 265 |  | 
| 266 |     const auto thread = mach_thread_self(); | 
| 267 |  | 
| 268 |     // Save the current affinity | 
| 269 |     mach_msg_type_number_t policyCount = THREAD_AFFINITY_POLICY_COUNT; | 
| 270 |     boolean_t getDefault = FALSE; | 
| 271 |     if (thread_policy_get(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], &policyCount, &getDefault) != KERN_SUCCESS) | 
| 272 |     { | 
| 273 |       OIDN_WARNING("thread_policy_get failed" ); | 
| 274 |       oldAffinities[threadIndex] = affinities[threadIndex]; | 
| 275 |       return; | 
| 276 |     } | 
| 277 |  | 
| 278 |     // Set the new affinity | 
| 279 |     if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&affinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS) | 
| 280 |       OIDN_WARNING("thread_policy_set failed" ); | 
| 281 |   } | 
| 282 |  | 
| 283 |   void ThreadAffinity::restore(int threadIndex) | 
| 284 |   { | 
| 285 |     if (threadIndex >= (int)affinities.size()) | 
| 286 |       return; | 
| 287 |  | 
| 288 |     const auto thread = mach_thread_self(); | 
| 289 |  | 
| 290 |     // Restore the original affinity | 
| 291 |     if (thread_policy_set(thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&oldAffinities[threadIndex], THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS) | 
| 292 |       OIDN_WARNING("thread_policy_set failed" ); | 
| 293 |   } | 
| 294 |  | 
| 295 | #endif | 
| 296 |  | 
| 297 | } // namespace oidn | 
| 298 |  |