1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | // From Apache Impala (incubating) as of 2016-01-29. |
19 | |
20 | #include "arrow/util/cpu-info.h" |
21 | |
22 | #ifdef __APPLE__ |
23 | #include <sys/sysctl.h> |
24 | #endif |
25 | |
26 | #include <stdlib.h> |
27 | #include <string.h> |
28 | |
29 | #ifndef _MSC_VER |
30 | #include <unistd.h> |
31 | #endif |
32 | |
33 | #ifdef _WIN32 |
34 | #include <intrin.h> |
35 | #include <array> |
36 | #include <bitset> |
37 | #include "arrow/util/windows_compatibility.h" |
38 | |
39 | #endif |
40 | |
41 | #include <boost/algorithm/string/predicate.hpp> |
42 | #include <boost/algorithm/string/trim.hpp> |
43 | |
44 | #include <algorithm> |
45 | #include <cstdint> |
46 | #include <fstream> |
47 | #include <memory> |
48 | #include <mutex> |
49 | #include <string> |
50 | |
51 | #include "arrow/util/logging.h" |
52 | |
53 | using boost::algorithm::contains; |
54 | using boost::algorithm::trim; |
55 | using std::max; |
56 | |
57 | namespace arrow { |
58 | namespace internal { |
59 | |
60 | static struct { |
61 | std::string name; |
62 | int64_t flag; |
63 | } flag_mappings[] = { |
64 | {"ssse3" , CpuInfo::SSSE3}, |
65 | {"sse4_1" , CpuInfo::SSE4_1}, |
66 | {"sse4_2" , CpuInfo::SSE4_2}, |
67 | {"popcnt" , CpuInfo::POPCNT}, |
68 | }; |
69 | static const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]); |
70 | |
71 | namespace { |
72 | |
73 | // Helper function to parse for hardware flags. |
74 | // values contains a list of space-seperated flags. check to see if the flags we |
75 | // care about are present. |
76 | // Returns a bitmap of flags. |
77 | int64_t ParseCPUFlags(const std::string& values) { |
78 | int64_t flags = 0; |
79 | for (int i = 0; i < num_flags; ++i) { |
80 | if (contains(values, flag_mappings[i].name)) { |
81 | flags |= flag_mappings[i].flag; |
82 | } |
83 | } |
84 | return flags; |
85 | } |
86 | |
87 | } // namespace |
88 | |
89 | #ifdef _WIN32 |
90 | bool RetrieveCacheSize(int64_t* cache_sizes) { |
91 | if (!cache_sizes) { |
92 | return false; |
93 | } |
94 | PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = nullptr; |
95 | PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer_position = nullptr; |
96 | DWORD buffer_size = 0; |
97 | DWORD offset = 0; |
98 | typedef BOOL(WINAPI * GetLogicalProcessorInformationFuncPointer)(void*, void*); |
99 | GetLogicalProcessorInformationFuncPointer func_pointer = |
100 | (GetLogicalProcessorInformationFuncPointer)GetProcAddress( |
101 | GetModuleHandle("kernel32" ), "GetLogicalProcessorInformation" ); |
102 | |
103 | if (!func_pointer) { |
104 | return false; |
105 | } |
106 | |
107 | // Get buffer size |
108 | if (func_pointer(buffer, &buffer_size) && GetLastError() != ERROR_INSUFFICIENT_BUFFER) |
109 | return false; |
110 | |
111 | buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(buffer_size); |
112 | |
113 | if (!buffer || !func_pointer(buffer, &buffer_size)) { |
114 | return false; |
115 | } |
116 | |
117 | buffer_position = buffer; |
118 | while (offset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= buffer_size) { |
119 | if (RelationCache == buffer_position->Relationship) { |
120 | PCACHE_DESCRIPTOR cache = &buffer_position->Cache; |
121 | if (cache->Level >= 1 && cache->Level <= 3) { |
122 | cache_sizes[cache->Level - 1] += cache->Size; |
123 | } |
124 | } |
125 | offset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); |
126 | buffer_position++; |
127 | } |
128 | |
129 | if (buffer) { |
130 | free(buffer); |
131 | } |
132 | return true; |
133 | } |
134 | |
135 | bool RetrieveCPUInfo(int64_t* hardware_flags, std::string* model_name) { |
136 | if (!hardware_flags || !model_name) { |
137 | return false; |
138 | } |
139 | const int register_ECX_id = 1; |
140 | int highest_valid_id = 0; |
141 | int highest_extended_valid_id = 0; |
142 | std::bitset<32> features_ECX; |
143 | std::array<int, 4> cpu_info; |
144 | |
145 | // Get highest valid id |
146 | __cpuid(cpu_info.data(), 0); |
147 | highest_valid_id = cpu_info[0]; |
148 | |
149 | if (highest_valid_id <= register_ECX_id) return false; |
150 | |
151 | __cpuidex(cpu_info.data(), register_ECX_id, 0); |
152 | features_ECX = cpu_info[2]; |
153 | |
154 | // Get highest extended id |
155 | __cpuid(cpu_info.data(), 0x80000000); |
156 | highest_extended_valid_id = cpu_info[0]; |
157 | |
158 | // Retrieve CPU model name |
159 | if (highest_extended_valid_id >= 0x80000004) { |
160 | model_name->clear(); |
161 | for (int i = 0x80000002; i <= 0x80000004; ++i) { |
162 | __cpuidex(cpu_info.data(), i, 0); |
163 | *model_name += |
164 | std::string(reinterpret_cast<char*>(cpu_info.data()), sizeof(cpu_info)); |
165 | } |
166 | } |
167 | |
168 | if (features_ECX[9]) *hardware_flags |= CpuInfo::SSSE3; |
169 | if (features_ECX[19]) *hardware_flags |= CpuInfo::SSE4_1; |
170 | if (features_ECX[20]) *hardware_flags |= CpuInfo::SSE4_2; |
171 | if (features_ECX[23]) *hardware_flags |= CpuInfo::POPCNT; |
172 | return true; |
173 | } |
174 | #endif |
175 | |
176 | CpuInfo::CpuInfo() : hardware_flags_(0), num_cores_(1), model_name_("unknown" ) {} |
177 | |
178 | std::unique_ptr<CpuInfo> g_cpu_info; |
179 | static std::mutex cpuinfo_mutex; |
180 | |
181 | CpuInfo* CpuInfo::GetInstance() { |
182 | std::lock_guard<std::mutex> lock(cpuinfo_mutex); |
183 | if (!g_cpu_info) { |
184 | g_cpu_info.reset(new CpuInfo); |
185 | g_cpu_info->Init(); |
186 | } |
187 | return g_cpu_info.get(); |
188 | } |
189 | |
190 | void CpuInfo::Init() { |
191 | std::string line; |
192 | std::string name; |
193 | std::string value; |
194 | |
195 | float max_mhz = 0; |
196 | int num_cores = 0; |
197 | |
198 | memset(&cache_sizes_, 0, sizeof(cache_sizes_)); |
199 | |
200 | #ifdef _WIN32 |
201 | SYSTEM_INFO system_info; |
202 | GetSystemInfo(&system_info); |
203 | num_cores = system_info.dwNumberOfProcessors; |
204 | |
205 | LARGE_INTEGER performance_frequency; |
206 | if (QueryPerformanceFrequency(&performance_frequency)) { |
207 | max_mhz = static_cast<float>(performance_frequency.QuadPart); |
208 | } |
209 | #else |
210 | // Read from /proc/cpuinfo |
211 | std::ifstream cpuinfo("/proc/cpuinfo" , std::ios::in); |
212 | while (cpuinfo) { |
213 | getline(cpuinfo, line); |
214 | size_t colon = line.find(':'); |
215 | if (colon != std::string::npos) { |
216 | name = line.substr(0, colon - 1); |
217 | value = line.substr(colon + 1, std::string::npos); |
218 | trim(name); |
219 | trim(value); |
220 | if (name.compare("flags" ) == 0) { |
221 | hardware_flags_ |= ParseCPUFlags(value); |
222 | } else if (name.compare("cpu MHz" ) == 0) { |
223 | // Every core will report a different speed. We'll take the max, assuming |
224 | // that when impala is running, the core will not be in a lower power state. |
225 | // TODO: is there a more robust way to do this, such as |
226 | // Window's QueryPerformanceFrequency() |
227 | float mhz = static_cast<float>(atof(value.c_str())); |
228 | max_mhz = max(mhz, max_mhz); |
229 | } else if (name.compare("processor" ) == 0) { |
230 | ++num_cores; |
231 | } else if (name.compare("model name" ) == 0) { |
232 | model_name_ = value; |
233 | } |
234 | } |
235 | } |
236 | if (cpuinfo.is_open()) cpuinfo.close(); |
237 | #endif |
238 | |
239 | #ifdef __APPLE__ |
240 | // On Mac OS X use sysctl() to get the cache sizes |
241 | size_t len = 0; |
242 | sysctlbyname("hw.cachesize" , NULL, &len, NULL, 0); |
243 | uint64_t* data = static_cast<uint64_t*>(malloc(len)); |
244 | sysctlbyname("hw.cachesize" , data, &len, NULL, 0); |
245 | DCHECK_GE(len / sizeof(uint64_t), 3); |
246 | for (size_t i = 0; i < 3; ++i) { |
247 | cache_sizes_[i] = data[i]; |
248 | } |
249 | #elif _WIN32 |
250 | if (!RetrieveCacheSize(cache_sizes_)) { |
251 | SetDefaultCacheSize(); |
252 | } |
253 | RetrieveCPUInfo(&hardware_flags_, &model_name_); |
254 | #else |
255 | SetDefaultCacheSize(); |
256 | #endif |
257 | |
258 | if (max_mhz != 0) { |
259 | cycles_per_ms_ = static_cast<int64_t>(max_mhz); |
260 | #ifndef _WIN32 |
261 | cycles_per_ms_ *= 1000; |
262 | #endif |
263 | } else { |
264 | cycles_per_ms_ = 1000000; |
265 | } |
266 | original_hardware_flags_ = hardware_flags_; |
267 | |
268 | if (num_cores > 0) { |
269 | num_cores_ = num_cores; |
270 | } else { |
271 | num_cores_ = 1; |
272 | } |
273 | } |
274 | |
275 | void CpuInfo::VerifyCpuRequirements() { |
276 | if (!IsSupported(CpuInfo::SSSE3)) { |
277 | DCHECK(false) << "CPU does not support the Supplemental SSE3 instruction set" ; |
278 | } |
279 | } |
280 | |
281 | bool CpuInfo::CanUseSSE4_2() const { |
282 | #ifdef ARROW_USE_SIMD |
283 | return IsSupported(CpuInfo::SSE4_2); |
284 | #else |
285 | return false; |
286 | #endif |
287 | } |
288 | |
289 | void CpuInfo::EnableFeature(int64_t flag, bool enable) { |
290 | if (!enable) { |
291 | hardware_flags_ &= ~flag; |
292 | } else { |
293 | // Can't turn something on that can't be supported |
294 | DCHECK_NE(original_hardware_flags_ & flag, 0); |
295 | hardware_flags_ |= flag; |
296 | } |
297 | } |
298 | |
299 | int64_t CpuInfo::hardware_flags() { return hardware_flags_; } |
300 | |
301 | int64_t CpuInfo::CacheSize(CacheLevel level) { return cache_sizes_[level]; } |
302 | |
303 | int64_t CpuInfo::cycles_per_ms() { return cycles_per_ms_; } |
304 | |
305 | int CpuInfo::num_cores() { return num_cores_; } |
306 | |
307 | std::string CpuInfo::model_name() { return model_name_; } |
308 | |
309 | void CpuInfo::SetDefaultCacheSize() { |
310 | #ifndef _SC_LEVEL1_DCACHE_SIZE |
311 | // Provide reasonable default values if no info |
312 | cache_sizes_[0] = 32 * 1024; // Level 1: 32k |
313 | cache_sizes_[1] = 256 * 1024; // Level 2: 256k |
314 | cache_sizes_[2] = 3072 * 1024; // Level 3: 3M |
315 | #else |
316 | // Call sysconf to query for the cache sizes |
317 | cache_sizes_[0] = sysconf(_SC_LEVEL1_DCACHE_SIZE); |
318 | cache_sizes_[1] = sysconf(_SC_LEVEL2_CACHE_SIZE); |
319 | cache_sizes_[2] = sysconf(_SC_LEVEL3_CACHE_SIZE); |
320 | #endif |
321 | } |
322 | |
323 | } // namespace internal |
324 | } // namespace arrow |
325 | |