1 | // Copyright 2015 Google Inc. All rights reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "sysinfo.h" |
16 | #include "internal_macros.h" |
17 | |
18 | #ifdef BENCHMARK_OS_WINDOWS |
19 | #include <Shlwapi.h> |
20 | #include <VersionHelpers.h> |
21 | #include <Windows.h> |
22 | #else |
23 | #include <fcntl.h> |
24 | #ifndef __Fuchsia__ |
25 | #include <sys/resource.h> |
26 | #endif |
27 | #include <sys/time.h> |
28 | #include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD |
29 | #include <unistd.h> |
30 | #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX |
31 | #include <sys/sysctl.h> |
32 | #endif |
33 | #endif |
34 | |
35 | #include <cerrno> |
36 | #include <cstdint> |
37 | #include <cstdio> |
38 | #include <cstdlib> |
39 | #include <cstring> |
40 | #include <iostream> |
41 | #include <limits> |
42 | #include <mutex> |
43 | |
44 | #include "arraysize.h" |
45 | #include "check.h" |
46 | #include "cycleclock.h" |
47 | #include "internal_macros.h" |
48 | #include "log.h" |
49 | #include "sleep.h" |
50 | #include "string_util.h" |
51 | |
52 | namespace benchmark { |
53 | namespace { |
54 | std::once_flag cpuinfo_init; |
55 | double cpuinfo_cycles_per_second = 1.0; |
56 | int cpuinfo_num_cpus = 1; // Conservative guess |
57 | |
58 | #if !defined BENCHMARK_OS_MACOSX |
59 | const int64_t estimate_time_ms = 1000; |
60 | |
61 | // Helper function estimates cycles/sec by observing cycles elapsed during |
62 | // sleep(). Using small sleep time decreases accuracy significantly. |
63 | int64_t EstimateCyclesPerSecond() { |
64 | const int64_t start_ticks = cycleclock::Now(); |
65 | SleepForMilliseconds(estimate_time_ms); |
66 | return cycleclock::Now() - start_ticks; |
67 | } |
68 | #endif |
69 | |
70 | #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN |
71 | // Helper function for reading an int from a file. Returns true if successful |
72 | // and the memory location pointed to by value is set to the value read. |
73 | bool ReadIntFromFile(const char* file, long* value) { |
74 | bool ret = false; |
75 | int fd = open(file, O_RDONLY); |
76 | if (fd != -1) { |
77 | char line[1024]; |
78 | char* err; |
79 | memset(line, '\0', sizeof(line)); |
80 | CHECK(read(fd, line, sizeof(line) - 1)); |
81 | const long temp_value = strtol(line, &err, 10); |
82 | if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { |
83 | *value = temp_value; |
84 | ret = true; |
85 | } |
86 | close(fd); |
87 | } |
88 | return ret; |
89 | } |
90 | #endif |
91 | |
92 | #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN |
93 | static std::string convertToLowerCase(std::string s) { |
94 | for (auto& ch : s) |
95 | ch = std::tolower(ch); |
96 | return s; |
97 | } |
98 | static bool startsWithKey(std::string Value, std::string Key, |
99 | bool IgnoreCase = true) { |
100 | if (IgnoreCase) { |
101 | Key = convertToLowerCase(std::move(Key)); |
102 | Value = convertToLowerCase(std::move(Value)); |
103 | } |
104 | return Value.compare(0, Key.size(), Key) == 0; |
105 | } |
106 | #endif |
107 | |
108 | void InitializeSystemInfo() { |
109 | #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN |
110 | char line[1024]; |
111 | char* err; |
112 | long freq; |
113 | |
114 | bool saw_mhz = false; |
115 | |
116 | // If the kernel is exporting the tsc frequency use that. There are issues |
117 | // where cpuinfo_max_freq cannot be relied on because the BIOS may be |
118 | // exporintg an invalid p-state (on x86) or p-states may be used to put the |
119 | // processor in a new mode (turbo mode). Essentially, those frequencies |
120 | // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as |
121 | // well. |
122 | if (!saw_mhz && |
123 | ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz" , &freq)) { |
124 | // The value is in kHz (as the file name suggests). For example, on a |
125 | // 2GHz warpstation, the file contains the value "2000000". |
126 | cpuinfo_cycles_per_second = freq * 1000.0; |
127 | saw_mhz = true; |
128 | } |
129 | |
130 | // If CPU scaling is in effect, we want to use the *maximum* frequency, |
131 | // not whatever CPU speed some random processor happens to be using now. |
132 | if (!saw_mhz && |
133 | ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq" , |
134 | &freq)) { |
135 | // The value is in kHz. For example, on a 2GHz warpstation, the file |
136 | // contains the value "2000000". |
137 | cpuinfo_cycles_per_second = freq * 1000.0; |
138 | saw_mhz = true; |
139 | } |
140 | |
141 | // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq. |
142 | const char* pname = "/proc/cpuinfo" ; |
143 | int fd = open(pname, O_RDONLY); |
144 | if (fd == -1) { |
145 | perror(pname); |
146 | if (!saw_mhz) { |
147 | cpuinfo_cycles_per_second = |
148 | static_cast<double>(EstimateCyclesPerSecond()); |
149 | } |
150 | return; |
151 | } |
152 | |
153 | double bogo_clock = 1.0; |
154 | bool saw_bogo = false; |
155 | long max_cpu_id = 0; |
156 | int num_cpus = 0; |
157 | line[0] = line[1] = '\0'; |
158 | size_t chars_read = 0; |
159 | do { // we'll exit when the last read didn't read anything |
160 | // Move the next line to the beginning of the buffer |
161 | const size_t oldlinelen = strlen(line); |
162 | if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line |
163 | line[0] = '\0'; |
164 | else // still other lines left to save |
165 | memmove(line, line + oldlinelen + 1, sizeof(line) - (oldlinelen + 1)); |
166 | // Terminate the new line, reading more if we can't find the newline |
167 | char* newline = strchr(line, '\n'); |
168 | if (newline == nullptr) { |
169 | const size_t linelen = strlen(line); |
170 | const size_t bytes_to_read = sizeof(line) - 1 - linelen; |
171 | CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes |
172 | chars_read = read(fd, line + linelen, bytes_to_read); |
173 | line[linelen + chars_read] = '\0'; |
174 | newline = strchr(line, '\n'); |
175 | } |
176 | if (newline != nullptr) *newline = '\0'; |
177 | |
178 | // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only |
179 | // accept postive values. Some environments (virtual machines) report zero, |
180 | // which would cause infinite looping in WallTime_Init. |
181 | if (!saw_mhz && startsWithKey(line, "cpu MHz" )) { |
182 | const char* freqstr = strchr(line, ':'); |
183 | if (freqstr) { |
184 | cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0; |
185 | if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0) |
186 | saw_mhz = true; |
187 | } |
188 | } else if (startsWithKey(line, "bogomips" )) { |
189 | const char* freqstr = strchr(line, ':'); |
190 | if (freqstr) { |
191 | bogo_clock = strtod(freqstr + 1, &err) * 1000000.0; |
192 | if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0) |
193 | saw_bogo = true; |
194 | } |
195 | } else if (startsWithKey(line, "processor" , /*IgnoreCase*/false)) { |
196 | // The above comparison is case-sensitive because ARM kernels often |
197 | // include a "Processor" line that tells you about the CPU, distinct |
198 | // from the usual "processor" lines that give you CPU ids. No current |
199 | // Linux architecture is using "Processor" for CPU ids. |
200 | num_cpus++; // count up every time we see an "processor :" entry |
201 | const char* id_str = strchr(line, ':'); |
202 | if (id_str) { |
203 | const long cpu_id = strtol(id_str + 1, &err, 10); |
204 | if (id_str[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id) |
205 | max_cpu_id = cpu_id; |
206 | } |
207 | } |
208 | } while (chars_read > 0); |
209 | close(fd); |
210 | |
211 | if (!saw_mhz) { |
212 | if (saw_bogo) { |
213 | // If we didn't find anything better, we'll use bogomips, but |
214 | // we're not happy about it. |
215 | cpuinfo_cycles_per_second = bogo_clock; |
216 | } else { |
217 | // If we don't even have bogomips, we'll use the slow estimation. |
218 | cpuinfo_cycles_per_second = |
219 | static_cast<double>(EstimateCyclesPerSecond()); |
220 | } |
221 | } |
222 | if (num_cpus == 0) { |
223 | fprintf(stderr, "Failed to read num. CPUs correctly from /proc/cpuinfo\n" ); |
224 | } else { |
225 | if ((max_cpu_id + 1) != num_cpus) { |
226 | fprintf(stderr, |
227 | "CPU ID assignments in /proc/cpuinfo seem messed up." |
228 | " This is usually caused by a bad BIOS.\n" ); |
229 | } |
230 | cpuinfo_num_cpus = num_cpus; |
231 | } |
232 | |
233 | #elif defined BENCHMARK_OS_FREEBSD |
234 | // For this sysctl to work, the machine must be configured without |
235 | // SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0 |
236 | // and later. Before that, it's a 32-bit quantity (and gives the |
237 | // wrong answer on machines faster than 2^32 Hz). See |
238 | // http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html |
239 | // But also compare FreeBSD 7.0: |
240 | // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223 |
241 | // 231 error = sysctl_handle_quad(oidp, &freq, 0, req); |
242 | // To FreeBSD 6.3 (it's the same in 6-STABLE): |
243 | // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131 |
244 | // 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); |
245 | #if __FreeBSD__ >= 7 |
246 | uint64_t hz = 0; |
247 | #else |
248 | unsigned int hz = 0; |
249 | #endif |
250 | size_t sz = sizeof(hz); |
251 | const char* sysctl_path = "machdep.tsc_freq" ; |
252 | if (sysctlbyname(sysctl_path, &hz, &sz, nullptr, 0) != 0) { |
253 | fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n" , |
254 | sysctl_path, strerror(errno)); |
255 | cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond()); |
256 | } else { |
257 | cpuinfo_cycles_per_second = hz; |
258 | } |
259 | // TODO: also figure out cpuinfo_num_cpus |
260 | |
261 | #elif defined BENCHMARK_OS_WINDOWS |
262 | // In NT, read MHz from the registry. If we fail to do so or we're in win9x |
263 | // then make a crude estimate. |
264 | DWORD data, data_size = sizeof(data); |
265 | if (IsWindowsXPOrGreater() && |
266 | SUCCEEDED( |
267 | SHGetValueA(HKEY_LOCAL_MACHINE, |
268 | "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0" , |
269 | "~MHz" , nullptr, &data, &data_size))) |
270 | cpuinfo_cycles_per_second = |
271 | static_cast<double>((int64_t)data * (int64_t)(1000 * 1000)); // was mhz |
272 | else |
273 | cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond()); |
274 | |
275 | SYSTEM_INFO sysinfo; |
276 | // Use memset as opposed to = {} to avoid GCC missing initializer false |
277 | // positives. |
278 | std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO)); |
279 | GetSystemInfo(&sysinfo); |
280 | cpuinfo_num_cpus = sysinfo.dwNumberOfProcessors; // number of logical |
281 | // processors in the current |
282 | // group |
283 | |
284 | #elif defined BENCHMARK_OS_MACOSX |
285 | int32_t num_cpus = 0; |
286 | size_t size = sizeof(num_cpus); |
287 | if (::sysctlbyname("hw.ncpu" , &num_cpus, &size, nullptr, 0) == 0 && |
288 | (size == sizeof(num_cpus))) { |
289 | cpuinfo_num_cpus = num_cpus; |
290 | } else { |
291 | fprintf(stderr, "%s\n" , strerror(errno)); |
292 | std::exit(EXIT_FAILURE); |
293 | } |
294 | int64_t cpu_freq = 0; |
295 | size = sizeof(cpu_freq); |
296 | if (::sysctlbyname("hw.cpufrequency" , &cpu_freq, &size, nullptr, 0) == 0 && |
297 | (size == sizeof(cpu_freq))) { |
298 | cpuinfo_cycles_per_second = cpu_freq; |
299 | } else { |
300 | fprintf(stderr, "%s\n" , strerror(errno)); |
301 | std::exit(EXIT_FAILURE); |
302 | } |
303 | #else |
304 | // Generic cycles per second counter |
305 | cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond()); |
306 | #endif |
307 | } |
308 | |
309 | } // end namespace |
310 | |
311 | double CyclesPerSecond(void) { |
312 | std::call_once(cpuinfo_init, InitializeSystemInfo); |
313 | return cpuinfo_cycles_per_second; |
314 | } |
315 | |
316 | int NumCPUs(void) { |
317 | std::call_once(cpuinfo_init, InitializeSystemInfo); |
318 | return cpuinfo_num_cpus; |
319 | } |
320 | |
321 | // The ""'s catch people who don't pass in a literal for "str" |
322 | #define strliterallen(str) (sizeof("" str "") - 1) |
323 | |
324 | // Must use a string literal for prefix. |
325 | #define memprefix(str, len, prefix) \ |
326 | ((((len) >= strliterallen(prefix)) && \ |
327 | std::memcmp(str, prefix, strliterallen(prefix)) == 0) \ |
328 | ? str + strliterallen(prefix) \ |
329 | : nullptr) |
330 | |
331 | bool CpuScalingEnabled() { |
332 | #ifndef BENCHMARK_OS_WINDOWS |
333 | // On Linux, the CPUfreq subsystem exposes CPU information as files on the |
334 | // local file system. If reading the exported files fails, then we may not be |
335 | // running on Linux, so we silently ignore all the read errors. |
336 | for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) { |
337 | std::string governor_file = |
338 | StrCat("/sys/devices/system/cpu/cpu" , cpu, "/cpufreq/scaling_governor" ); |
339 | FILE* file = fopen(governor_file.c_str(), "r" ); |
340 | if (!file) break; |
341 | char buff[16]; |
342 | size_t bytes_read = fread(buff, 1, sizeof(buff), file); |
343 | fclose(file); |
344 | if (memprefix(buff, bytes_read, "performance" ) == nullptr) return true; |
345 | } |
346 | #endif |
347 | return false; |
348 | } |
349 | |
350 | } // end namespace benchmark |
351 | |