1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// From Apache Impala (incubating) as of 2016-01-29.
19
20#include "arrow/util/cpu-info.h"
21
22#ifdef __APPLE__
23#include <sys/sysctl.h>
24#endif
25
26#include <stdlib.h>
27#include <string.h>
28
29#ifndef _MSC_VER
30#include <unistd.h>
31#endif
32
33#ifdef _WIN32
34#include <intrin.h>
35#include <array>
36#include <bitset>
37#include "arrow/util/windows_compatibility.h"
38
39#endif
40
41#include <boost/algorithm/string/predicate.hpp>
42#include <boost/algorithm/string/trim.hpp>
43
44#include <algorithm>
45#include <cstdint>
46#include <fstream>
47#include <memory>
48#include <mutex>
49#include <string>
50
51#include "arrow/util/logging.h"
52
53using boost::algorithm::contains;
54using boost::algorithm::trim;
55using std::max;
56
57namespace arrow {
58namespace internal {
59
60static struct {
61 std::string name;
62 int64_t flag;
63} flag_mappings[] = {
64 {"ssse3", CpuInfo::SSSE3},
65 {"sse4_1", CpuInfo::SSE4_1},
66 {"sse4_2", CpuInfo::SSE4_2},
67 {"popcnt", CpuInfo::POPCNT},
68};
69static const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]);
70
71namespace {
72
73// Helper function to parse for hardware flags.
74// values contains a list of space-seperated flags. check to see if the flags we
75// care about are present.
76// Returns a bitmap of flags.
77int64_t ParseCPUFlags(const std::string& values) {
78 int64_t flags = 0;
79 for (int i = 0; i < num_flags; ++i) {
80 if (contains(values, flag_mappings[i].name)) {
81 flags |= flag_mappings[i].flag;
82 }
83 }
84 return flags;
85}
86
87} // namespace
88
89#ifdef _WIN32
90bool RetrieveCacheSize(int64_t* cache_sizes) {
91 if (!cache_sizes) {
92 return false;
93 }
94 PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = nullptr;
95 PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer_position = nullptr;
96 DWORD buffer_size = 0;
97 DWORD offset = 0;
98 typedef BOOL(WINAPI * GetLogicalProcessorInformationFuncPointer)(void*, void*);
99 GetLogicalProcessorInformationFuncPointer func_pointer =
100 (GetLogicalProcessorInformationFuncPointer)GetProcAddress(
101 GetModuleHandle("kernel32"), "GetLogicalProcessorInformation");
102
103 if (!func_pointer) {
104 return false;
105 }
106
107 // Get buffer size
108 if (func_pointer(buffer, &buffer_size) && GetLastError() != ERROR_INSUFFICIENT_BUFFER)
109 return false;
110
111 buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(buffer_size);
112
113 if (!buffer || !func_pointer(buffer, &buffer_size)) {
114 return false;
115 }
116
117 buffer_position = buffer;
118 while (offset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= buffer_size) {
119 if (RelationCache == buffer_position->Relationship) {
120 PCACHE_DESCRIPTOR cache = &buffer_position->Cache;
121 if (cache->Level >= 1 && cache->Level <= 3) {
122 cache_sizes[cache->Level - 1] += cache->Size;
123 }
124 }
125 offset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
126 buffer_position++;
127 }
128
129 if (buffer) {
130 free(buffer);
131 }
132 return true;
133}
134
135bool RetrieveCPUInfo(int64_t* hardware_flags, std::string* model_name) {
136 if (!hardware_flags || !model_name) {
137 return false;
138 }
139 const int register_ECX_id = 1;
140 int highest_valid_id = 0;
141 int highest_extended_valid_id = 0;
142 std::bitset<32> features_ECX;
143 std::array<int, 4> cpu_info;
144
145 // Get highest valid id
146 __cpuid(cpu_info.data(), 0);
147 highest_valid_id = cpu_info[0];
148
149 if (highest_valid_id <= register_ECX_id) return false;
150
151 __cpuidex(cpu_info.data(), register_ECX_id, 0);
152 features_ECX = cpu_info[2];
153
154 // Get highest extended id
155 __cpuid(cpu_info.data(), 0x80000000);
156 highest_extended_valid_id = cpu_info[0];
157
158 // Retrieve CPU model name
159 if (highest_extended_valid_id >= 0x80000004) {
160 model_name->clear();
161 for (int i = 0x80000002; i <= 0x80000004; ++i) {
162 __cpuidex(cpu_info.data(), i, 0);
163 *model_name +=
164 std::string(reinterpret_cast<char*>(cpu_info.data()), sizeof(cpu_info));
165 }
166 }
167
168 if (features_ECX[9]) *hardware_flags |= CpuInfo::SSSE3;
169 if (features_ECX[19]) *hardware_flags |= CpuInfo::SSE4_1;
170 if (features_ECX[20]) *hardware_flags |= CpuInfo::SSE4_2;
171 if (features_ECX[23]) *hardware_flags |= CpuInfo::POPCNT;
172 return true;
173}
174#endif
175
176CpuInfo::CpuInfo() : hardware_flags_(0), num_cores_(1), model_name_("unknown") {}
177
178std::unique_ptr<CpuInfo> g_cpu_info;
179static std::mutex cpuinfo_mutex;
180
181CpuInfo* CpuInfo::GetInstance() {
182 std::lock_guard<std::mutex> lock(cpuinfo_mutex);
183 if (!g_cpu_info) {
184 g_cpu_info.reset(new CpuInfo);
185 g_cpu_info->Init();
186 }
187 return g_cpu_info.get();
188}
189
190void CpuInfo::Init() {
191 std::string line;
192 std::string name;
193 std::string value;
194
195 float max_mhz = 0;
196 int num_cores = 0;
197
198 memset(&cache_sizes_, 0, sizeof(cache_sizes_));
199
200#ifdef _WIN32
201 SYSTEM_INFO system_info;
202 GetSystemInfo(&system_info);
203 num_cores = system_info.dwNumberOfProcessors;
204
205 LARGE_INTEGER performance_frequency;
206 if (QueryPerformanceFrequency(&performance_frequency)) {
207 max_mhz = static_cast<float>(performance_frequency.QuadPart);
208 }
209#else
210 // Read from /proc/cpuinfo
211 std::ifstream cpuinfo("/proc/cpuinfo", std::ios::in);
212 while (cpuinfo) {
213 getline(cpuinfo, line);
214 size_t colon = line.find(':');
215 if (colon != std::string::npos) {
216 name = line.substr(0, colon - 1);
217 value = line.substr(colon + 1, std::string::npos);
218 trim(name);
219 trim(value);
220 if (name.compare("flags") == 0) {
221 hardware_flags_ |= ParseCPUFlags(value);
222 } else if (name.compare("cpu MHz") == 0) {
223 // Every core will report a different speed. We'll take the max, assuming
224 // that when impala is running, the core will not be in a lower power state.
225 // TODO: is there a more robust way to do this, such as
226 // Window's QueryPerformanceFrequency()
227 float mhz = static_cast<float>(atof(value.c_str()));
228 max_mhz = max(mhz, max_mhz);
229 } else if (name.compare("processor") == 0) {
230 ++num_cores;
231 } else if (name.compare("model name") == 0) {
232 model_name_ = value;
233 }
234 }
235 }
236 if (cpuinfo.is_open()) cpuinfo.close();
237#endif
238
239#ifdef __APPLE__
240 // On Mac OS X use sysctl() to get the cache sizes
241 size_t len = 0;
242 sysctlbyname("hw.cachesize", NULL, &len, NULL, 0);
243 uint64_t* data = static_cast<uint64_t*>(malloc(len));
244 sysctlbyname("hw.cachesize", data, &len, NULL, 0);
245 DCHECK_GE(len / sizeof(uint64_t), 3);
246 for (size_t i = 0; i < 3; ++i) {
247 cache_sizes_[i] = data[i];
248 }
249#elif _WIN32
250 if (!RetrieveCacheSize(cache_sizes_)) {
251 SetDefaultCacheSize();
252 }
253 RetrieveCPUInfo(&hardware_flags_, &model_name_);
254#else
255 SetDefaultCacheSize();
256#endif
257
258 if (max_mhz != 0) {
259 cycles_per_ms_ = static_cast<int64_t>(max_mhz);
260#ifndef _WIN32
261 cycles_per_ms_ *= 1000;
262#endif
263 } else {
264 cycles_per_ms_ = 1000000;
265 }
266 original_hardware_flags_ = hardware_flags_;
267
268 if (num_cores > 0) {
269 num_cores_ = num_cores;
270 } else {
271 num_cores_ = 1;
272 }
273}
274
275void CpuInfo::VerifyCpuRequirements() {
276 if (!IsSupported(CpuInfo::SSSE3)) {
277 DCHECK(false) << "CPU does not support the Supplemental SSE3 instruction set";
278 }
279}
280
281bool CpuInfo::CanUseSSE4_2() const {
282#ifdef ARROW_USE_SIMD
283 return IsSupported(CpuInfo::SSE4_2);
284#else
285 return false;
286#endif
287}
288
289void CpuInfo::EnableFeature(int64_t flag, bool enable) {
290 if (!enable) {
291 hardware_flags_ &= ~flag;
292 } else {
293 // Can't turn something on that can't be supported
294 DCHECK_NE(original_hardware_flags_ & flag, 0);
295 hardware_flags_ |= flag;
296 }
297}
298
299int64_t CpuInfo::hardware_flags() { return hardware_flags_; }
300
301int64_t CpuInfo::CacheSize(CacheLevel level) { return cache_sizes_[level]; }
302
303int64_t CpuInfo::cycles_per_ms() { return cycles_per_ms_; }
304
305int CpuInfo::num_cores() { return num_cores_; }
306
307std::string CpuInfo::model_name() { return model_name_; }
308
309void CpuInfo::SetDefaultCacheSize() {
310#ifndef _SC_LEVEL1_DCACHE_SIZE
311 // Provide reasonable default values if no info
312 cache_sizes_[0] = 32 * 1024; // Level 1: 32k
313 cache_sizes_[1] = 256 * 1024; // Level 2: 256k
314 cache_sizes_[2] = 3072 * 1024; // Level 3: 3M
315#else
316 // Call sysconf to query for the cache sizes
317 cache_sizes_[0] = sysconf(_SC_LEVEL1_DCACHE_SIZE);
318 cache_sizes_[1] = sysconf(_SC_LEVEL2_CACHE_SIZE);
319 cache_sizes_[2] = sysconf(_SC_LEVEL3_CACHE_SIZE);
320#endif
321}
322
323} // namespace internal
324} // namespace arrow
325