| 1 | // Copyright 2015 Google Inc. All rights reserved. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #include "sysinfo.h" |
| 16 | #include "internal_macros.h" |
| 17 | |
| 18 | #ifdef BENCHMARK_OS_WINDOWS |
| 19 | #include <Shlwapi.h> |
| 20 | #include <VersionHelpers.h> |
| 21 | #include <Windows.h> |
| 22 | #else |
| 23 | #include <fcntl.h> |
| 24 | #ifndef __Fuchsia__ |
| 25 | #include <sys/resource.h> |
| 26 | #endif |
| 27 | #include <sys/time.h> |
| 28 | #include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD |
| 29 | #include <unistd.h> |
| 30 | #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX |
| 31 | #include <sys/sysctl.h> |
| 32 | #endif |
| 33 | #endif |
| 34 | |
| 35 | #include <cerrno> |
| 36 | #include <cstdint> |
| 37 | #include <cstdio> |
| 38 | #include <cstdlib> |
| 39 | #include <cstring> |
| 40 | #include <iostream> |
| 41 | #include <limits> |
| 42 | #include <mutex> |
| 43 | |
| 44 | #include "arraysize.h" |
| 45 | #include "check.h" |
| 46 | #include "cycleclock.h" |
| 47 | #include "internal_macros.h" |
| 48 | #include "log.h" |
| 49 | #include "sleep.h" |
| 50 | #include "string_util.h" |
| 51 | |
| 52 | namespace benchmark { |
| 53 | namespace { |
| 54 | std::once_flag cpuinfo_init; |
| 55 | double cpuinfo_cycles_per_second = 1.0; |
| 56 | int cpuinfo_num_cpus = 1; // Conservative guess |
| 57 | |
| 58 | #if !defined BENCHMARK_OS_MACOSX |
| 59 | const int64_t estimate_time_ms = 1000; |
| 60 | |
| 61 | // Helper function estimates cycles/sec by observing cycles elapsed during |
| 62 | // sleep(). Using small sleep time decreases accuracy significantly. |
| 63 | int64_t EstimateCyclesPerSecond() { |
| 64 | const int64_t start_ticks = cycleclock::Now(); |
| 65 | SleepForMilliseconds(estimate_time_ms); |
| 66 | return cycleclock::Now() - start_ticks; |
| 67 | } |
| 68 | #endif |
| 69 | |
| 70 | #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN |
| 71 | // Helper function for reading an int from a file. Returns true if successful |
| 72 | // and the memory location pointed to by value is set to the value read. |
| 73 | bool ReadIntFromFile(const char* file, long* value) { |
| 74 | bool ret = false; |
| 75 | int fd = open(file, O_RDONLY); |
| 76 | if (fd != -1) { |
| 77 | char line[1024]; |
| 78 | char* err; |
| 79 | memset(line, '\0', sizeof(line)); |
| 80 | CHECK(read(fd, line, sizeof(line) - 1)); |
| 81 | const long temp_value = strtol(line, &err, 10); |
| 82 | if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { |
| 83 | *value = temp_value; |
| 84 | ret = true; |
| 85 | } |
| 86 | close(fd); |
| 87 | } |
| 88 | return ret; |
| 89 | } |
| 90 | #endif |
| 91 | |
| 92 | #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN |
| 93 | static std::string convertToLowerCase(std::string s) { |
| 94 | for (auto& ch : s) |
| 95 | ch = std::tolower(ch); |
| 96 | return s; |
| 97 | } |
| 98 | static bool startsWithKey(std::string Value, std::string Key, |
| 99 | bool IgnoreCase = true) { |
| 100 | if (IgnoreCase) { |
| 101 | Key = convertToLowerCase(std::move(Key)); |
| 102 | Value = convertToLowerCase(std::move(Value)); |
| 103 | } |
| 104 | return Value.compare(0, Key.size(), Key) == 0; |
| 105 | } |
| 106 | #endif |
| 107 | |
| 108 | void InitializeSystemInfo() { |
| 109 | #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN |
| 110 | char line[1024]; |
| 111 | char* err; |
| 112 | long freq; |
| 113 | |
| 114 | bool saw_mhz = false; |
| 115 | |
| 116 | // If the kernel is exporting the tsc frequency use that. There are issues |
| 117 | // where cpuinfo_max_freq cannot be relied on because the BIOS may be |
| 118 | // exporintg an invalid p-state (on x86) or p-states may be used to put the |
| 119 | // processor in a new mode (turbo mode). Essentially, those frequencies |
| 120 | // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as |
| 121 | // well. |
| 122 | if (!saw_mhz && |
| 123 | ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz" , &freq)) { |
| 124 | // The value is in kHz (as the file name suggests). For example, on a |
| 125 | // 2GHz warpstation, the file contains the value "2000000". |
| 126 | cpuinfo_cycles_per_second = freq * 1000.0; |
| 127 | saw_mhz = true; |
| 128 | } |
| 129 | |
| 130 | // If CPU scaling is in effect, we want to use the *maximum* frequency, |
| 131 | // not whatever CPU speed some random processor happens to be using now. |
| 132 | if (!saw_mhz && |
| 133 | ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq" , |
| 134 | &freq)) { |
| 135 | // The value is in kHz. For example, on a 2GHz warpstation, the file |
| 136 | // contains the value "2000000". |
| 137 | cpuinfo_cycles_per_second = freq * 1000.0; |
| 138 | saw_mhz = true; |
| 139 | } |
| 140 | |
| 141 | // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq. |
| 142 | const char* pname = "/proc/cpuinfo" ; |
| 143 | int fd = open(pname, O_RDONLY); |
| 144 | if (fd == -1) { |
| 145 | perror(pname); |
| 146 | if (!saw_mhz) { |
| 147 | cpuinfo_cycles_per_second = |
| 148 | static_cast<double>(EstimateCyclesPerSecond()); |
| 149 | } |
| 150 | return; |
| 151 | } |
| 152 | |
| 153 | double bogo_clock = 1.0; |
| 154 | bool saw_bogo = false; |
| 155 | long max_cpu_id = 0; |
| 156 | int num_cpus = 0; |
| 157 | line[0] = line[1] = '\0'; |
| 158 | size_t chars_read = 0; |
| 159 | do { // we'll exit when the last read didn't read anything |
| 160 | // Move the next line to the beginning of the buffer |
| 161 | const size_t oldlinelen = strlen(line); |
| 162 | if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line |
| 163 | line[0] = '\0'; |
| 164 | else // still other lines left to save |
| 165 | memmove(line, line + oldlinelen + 1, sizeof(line) - (oldlinelen + 1)); |
| 166 | // Terminate the new line, reading more if we can't find the newline |
| 167 | char* newline = strchr(line, '\n'); |
| 168 | if (newline == nullptr) { |
| 169 | const size_t linelen = strlen(line); |
| 170 | const size_t bytes_to_read = sizeof(line) - 1 - linelen; |
| 171 | CHECK(bytes_to_read > 0); // because the memmove recovered >=1 bytes |
| 172 | chars_read = read(fd, line + linelen, bytes_to_read); |
| 173 | line[linelen + chars_read] = '\0'; |
| 174 | newline = strchr(line, '\n'); |
| 175 | } |
| 176 | if (newline != nullptr) *newline = '\0'; |
| 177 | |
| 178 | // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only |
| 179 | // accept postive values. Some environments (virtual machines) report zero, |
| 180 | // which would cause infinite looping in WallTime_Init. |
| 181 | if (!saw_mhz && startsWithKey(line, "cpu MHz" )) { |
| 182 | const char* freqstr = strchr(line, ':'); |
| 183 | if (freqstr) { |
| 184 | cpuinfo_cycles_per_second = strtod(freqstr + 1, &err) * 1000000.0; |
| 185 | if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0) |
| 186 | saw_mhz = true; |
| 187 | } |
| 188 | } else if (startsWithKey(line, "bogomips" )) { |
| 189 | const char* freqstr = strchr(line, ':'); |
| 190 | if (freqstr) { |
| 191 | bogo_clock = strtod(freqstr + 1, &err) * 1000000.0; |
| 192 | if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0) |
| 193 | saw_bogo = true; |
| 194 | } |
| 195 | } else if (startsWithKey(line, "processor" , /*IgnoreCase*/false)) { |
| 196 | // The above comparison is case-sensitive because ARM kernels often |
| 197 | // include a "Processor" line that tells you about the CPU, distinct |
| 198 | // from the usual "processor" lines that give you CPU ids. No current |
| 199 | // Linux architecture is using "Processor" for CPU ids. |
| 200 | num_cpus++; // count up every time we see an "processor :" entry |
| 201 | const char* id_str = strchr(line, ':'); |
| 202 | if (id_str) { |
| 203 | const long cpu_id = strtol(id_str + 1, &err, 10); |
| 204 | if (id_str[1] != '\0' && *err == '\0' && max_cpu_id < cpu_id) |
| 205 | max_cpu_id = cpu_id; |
| 206 | } |
| 207 | } |
| 208 | } while (chars_read > 0); |
| 209 | close(fd); |
| 210 | |
| 211 | if (!saw_mhz) { |
| 212 | if (saw_bogo) { |
| 213 | // If we didn't find anything better, we'll use bogomips, but |
| 214 | // we're not happy about it. |
| 215 | cpuinfo_cycles_per_second = bogo_clock; |
| 216 | } else { |
| 217 | // If we don't even have bogomips, we'll use the slow estimation. |
| 218 | cpuinfo_cycles_per_second = |
| 219 | static_cast<double>(EstimateCyclesPerSecond()); |
| 220 | } |
| 221 | } |
| 222 | if (num_cpus == 0) { |
| 223 | fprintf(stderr, "Failed to read num. CPUs correctly from /proc/cpuinfo\n" ); |
| 224 | } else { |
| 225 | if ((max_cpu_id + 1) != num_cpus) { |
| 226 | fprintf(stderr, |
| 227 | "CPU ID assignments in /proc/cpuinfo seem messed up." |
| 228 | " This is usually caused by a bad BIOS.\n" ); |
| 229 | } |
| 230 | cpuinfo_num_cpus = num_cpus; |
| 231 | } |
| 232 | |
| 233 | #elif defined BENCHMARK_OS_FREEBSD |
| 234 | // For this sysctl to work, the machine must be configured without |
| 235 | // SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0 |
| 236 | // and later. Before that, it's a 32-bit quantity (and gives the |
| 237 | // wrong answer on machines faster than 2^32 Hz). See |
| 238 | // http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html |
| 239 | // But also compare FreeBSD 7.0: |
| 240 | // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223 |
| 241 | // 231 error = sysctl_handle_quad(oidp, &freq, 0, req); |
| 242 | // To FreeBSD 6.3 (it's the same in 6-STABLE): |
| 243 | // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131 |
| 244 | // 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); |
| 245 | #if __FreeBSD__ >= 7 |
| 246 | uint64_t hz = 0; |
| 247 | #else |
| 248 | unsigned int hz = 0; |
| 249 | #endif |
| 250 | size_t sz = sizeof(hz); |
| 251 | const char* sysctl_path = "machdep.tsc_freq" ; |
| 252 | if (sysctlbyname(sysctl_path, &hz, &sz, nullptr, 0) != 0) { |
| 253 | fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n" , |
| 254 | sysctl_path, strerror(errno)); |
| 255 | cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond()); |
| 256 | } else { |
| 257 | cpuinfo_cycles_per_second = hz; |
| 258 | } |
| 259 | // TODO: also figure out cpuinfo_num_cpus |
| 260 | |
| 261 | #elif defined BENCHMARK_OS_WINDOWS |
| 262 | // In NT, read MHz from the registry. If we fail to do so or we're in win9x |
| 263 | // then make a crude estimate. |
| 264 | DWORD data, data_size = sizeof(data); |
| 265 | if (IsWindowsXPOrGreater() && |
| 266 | SUCCEEDED( |
| 267 | SHGetValueA(HKEY_LOCAL_MACHINE, |
| 268 | "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0" , |
| 269 | "~MHz" , nullptr, &data, &data_size))) |
| 270 | cpuinfo_cycles_per_second = |
| 271 | static_cast<double>((int64_t)data * (int64_t)(1000 * 1000)); // was mhz |
| 272 | else |
| 273 | cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond()); |
| 274 | |
| 275 | SYSTEM_INFO sysinfo; |
| 276 | // Use memset as opposed to = {} to avoid GCC missing initializer false |
| 277 | // positives. |
| 278 | std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO)); |
| 279 | GetSystemInfo(&sysinfo); |
| 280 | cpuinfo_num_cpus = sysinfo.dwNumberOfProcessors; // number of logical |
| 281 | // processors in the current |
| 282 | // group |
| 283 | |
| 284 | #elif defined BENCHMARK_OS_MACOSX |
| 285 | int32_t num_cpus = 0; |
| 286 | size_t size = sizeof(num_cpus); |
| 287 | if (::sysctlbyname("hw.ncpu" , &num_cpus, &size, nullptr, 0) == 0 && |
| 288 | (size == sizeof(num_cpus))) { |
| 289 | cpuinfo_num_cpus = num_cpus; |
| 290 | } else { |
| 291 | fprintf(stderr, "%s\n" , strerror(errno)); |
| 292 | std::exit(EXIT_FAILURE); |
| 293 | } |
| 294 | int64_t cpu_freq = 0; |
| 295 | size = sizeof(cpu_freq); |
| 296 | if (::sysctlbyname("hw.cpufrequency" , &cpu_freq, &size, nullptr, 0) == 0 && |
| 297 | (size == sizeof(cpu_freq))) { |
| 298 | cpuinfo_cycles_per_second = cpu_freq; |
| 299 | } else { |
| 300 | fprintf(stderr, "%s\n" , strerror(errno)); |
| 301 | std::exit(EXIT_FAILURE); |
| 302 | } |
| 303 | #else |
| 304 | // Generic cycles per second counter |
| 305 | cpuinfo_cycles_per_second = static_cast<double>(EstimateCyclesPerSecond()); |
| 306 | #endif |
| 307 | } |
| 308 | |
| 309 | } // end namespace |
| 310 | |
| 311 | double CyclesPerSecond(void) { |
| 312 | std::call_once(cpuinfo_init, InitializeSystemInfo); |
| 313 | return cpuinfo_cycles_per_second; |
| 314 | } |
| 315 | |
| 316 | int NumCPUs(void) { |
| 317 | std::call_once(cpuinfo_init, InitializeSystemInfo); |
| 318 | return cpuinfo_num_cpus; |
| 319 | } |
| 320 | |
| 321 | // The ""'s catch people who don't pass in a literal for "str" |
| 322 | #define strliterallen(str) (sizeof("" str "") - 1) |
| 323 | |
| 324 | // Must use a string literal for prefix. |
| 325 | #define memprefix(str, len, prefix) \ |
| 326 | ((((len) >= strliterallen(prefix)) && \ |
| 327 | std::memcmp(str, prefix, strliterallen(prefix)) == 0) \ |
| 328 | ? str + strliterallen(prefix) \ |
| 329 | : nullptr) |
| 330 | |
| 331 | bool CpuScalingEnabled() { |
| 332 | #ifndef BENCHMARK_OS_WINDOWS |
| 333 | // On Linux, the CPUfreq subsystem exposes CPU information as files on the |
| 334 | // local file system. If reading the exported files fails, then we may not be |
| 335 | // running on Linux, so we silently ignore all the read errors. |
| 336 | for (int cpu = 0, num_cpus = NumCPUs(); cpu < num_cpus; ++cpu) { |
| 337 | std::string governor_file = |
| 338 | StrCat("/sys/devices/system/cpu/cpu" , cpu, "/cpufreq/scaling_governor" ); |
| 339 | FILE* file = fopen(governor_file.c_str(), "r" ); |
| 340 | if (!file) break; |
| 341 | char buff[16]; |
| 342 | size_t bytes_read = fread(buff, 1, sizeof(buff), file); |
| 343 | fclose(file); |
| 344 | if (memprefix(buff, bytes_read, "performance" ) == nullptr) return true; |
| 345 | } |
| 346 | #endif |
| 347 | return false; |
| 348 | } |
| 349 | |
| 350 | } // end namespace benchmark |
| 351 | |