1/*
2 * Copyright 2014-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#ifndef __STDC_FORMAT_MACROS
17#define __STDC_FORMAT_MACROS
18#endif
19
20#include <folly/io/async/test/TimeUtil.h>
21
22#include <errno.h>
23#include <fcntl.h>
24#include <sys/stat.h>
25#include <sys/types.h>
26#ifdef __linux__
27#include <sys/utsname.h>
28#endif
29
30#include <chrono>
31#include <ostream>
32#include <stdexcept>
33
34#include <folly/Conv.h>
35#include <folly/ScopeGuard.h>
36#include <folly/String.h>
37#include <folly/portability/Unistd.h>
38#include <folly/system/ThreadId.h>
39
40#include <glog/logging.h>
41
42using std::string;
43using namespace std::chrono;
44
45namespace folly {
46
47static int getLinuxVersion(StringPiece release) {
48 auto dot1 = release.find('.');
49 if (dot1 == StringPiece::npos) {
50 throw std::invalid_argument("could not find first dot");
51 }
52 auto v1 = folly::to<int>(release.subpiece(0, dot1));
53
54 auto dot2 = release.find('.', dot1 + 1);
55 if (dot2 == StringPiece::npos) {
56 throw std::invalid_argument("could not find second dot");
57 }
58 auto v2 = folly::to<int>(release.subpiece(dot1 + 1, dot2 - (dot1 + 1)));
59
60 int dash = release.find('-', dot2 + 1);
61 auto v3 = folly::to<int>(release.subpiece(dot2 + 1, dash - (dot2 + 1)));
62
63 return ((v1 * 1000 + v2) * 1000) + v3;
64}
65
66#ifdef __linux__
67/**
68 * Determine the time units used in /proc/<pid>/schedstat
69 *
70 * Returns the number of nanoseconds per time unit,
71 * or -1 if we cannot determine the units.
72 */
73static int64_t determineSchedstatUnits() {
74 struct utsname unameInfo;
75 if (uname(&unameInfo) != 0) {
76 LOG(ERROR) << "unable to determine jiffies/second: uname failed: %s"
77 << errnoStr(errno);
78 return -1;
79 }
80
81 // In Linux version 2.6.23 and later, time time values are always
82 // reported in nanoseconds.
83 //
84 // This change appears to have been made in commit 425e0968a25f, which
85 // moved some of the sched stats code to a new file. Despite the commit
86 // message claiming "no code changes are caused by this patch", it changed
87 // the task.sched_info.cpu_time and task.sched_info.run_delay counters to be
88 // computed using sched_clock() rather than jiffies.
89 int linuxVersion;
90 try {
91 linuxVersion = getLinuxVersion(unameInfo.release);
92 } catch (const std::exception&) {
93 LOG(ERROR) << "unable to determine jiffies/second: failed to parse "
94 << "kernel release string \"" << unameInfo.release << "\"";
95 return -1;
96 }
97 if (linuxVersion >= 2006023) {
98 // The units are nanoseconds
99 return 1;
100 }
101
102 // In Linux versions prior to 2.6.23, the time values are reported in
103 // jiffies. This is somewhat unfortunate, as the number of jiffies per
104 // second is configurable. We have to determine the units being used.
105 //
106 // It seems like the only real way to figure out the CONFIG_HZ value used by
107 // this kernel is to look it up in the config file.
108 //
109 // Look in /boot/config-<kernel_release>
110 char configPath[256];
111 snprintf(
112 configPath, sizeof(configPath), "/boot/config-%s", unameInfo.release);
113
114 FILE* f = fopen(configPath, "r");
115 if (f == nullptr) {
116 LOG(ERROR) << "unable to determine jiffies/second: "
117 "cannot open kernel config file %s"
118 << configPath;
119 return -1;
120 }
121 SCOPE_EXIT {
122 fclose(f);
123 };
124
125 int64_t hz = -1;
126 char buf[1024];
127 while (fgets(buf, sizeof(buf), f) != nullptr) {
128 if (strcmp(buf, "CONFIG_NO_HZ=y\n") == 0) {
129 LOG(ERROR) << "unable to determine jiffies/second: tickless kernel";
130 return -1;
131 } else if (strcmp(buf, "CONFIG_HZ=1000\n") == 0) {
132 hz = 1000;
133 } else if (strcmp(buf, "CONFIG_HZ=300\n") == 0) {
134 hz = 300;
135 } else if (strcmp(buf, "CONFIG_HZ=250\n") == 0) {
136 hz = 250;
137 } else if (strcmp(buf, "CONFIG_HZ=100\n") == 0) {
138 hz = 100;
139 }
140 }
141
142 if (hz == -1) {
143 LOG(ERROR) << "unable to determine jiffies/second: no CONFIG_HZ setting "
144 "found in %s"
145 << configPath;
146 return -1;
147 }
148
149 return hz;
150}
151#endif
152
153/**
154 * Determine how long this process has spent waiting to get scheduled on the
155 * CPU.
156 *
157 * Returns the number of nanoseconds spent waiting, or -1 if the amount of
158 * time cannot be determined.
159 */
160static nanoseconds getSchedTimeWaiting(pid_t tid) {
161#ifndef __linux__
162 (void)tid;
163 return nanoseconds(0);
164#else
165 static int64_t timeUnits = determineSchedstatUnits();
166 if (timeUnits < 0) {
167 // We couldn't figure out how many jiffies there are in a second.
168 // Don't bother reading the schedstat info if we can't interpret it.
169 return nanoseconds(0);
170 }
171
172 int fd = -1;
173 try {
174 char schedstatFile[256];
175 snprintf(schedstatFile, sizeof(schedstatFile), "/proc/%d/schedstat", tid);
176 fd = open(schedstatFile, O_RDONLY);
177 if (fd < 0) {
178 throw std::runtime_error(
179 folly::to<string>("failed to open process schedstat file", errno));
180 }
181
182 char buf[512];
183 ssize_t bytesReadRet = read(fd, buf, sizeof(buf) - 1);
184 if (bytesReadRet <= 0) {
185 throw std::runtime_error(
186 folly::to<string>("failed to read process schedstat file", errno));
187 }
188 size_t bytesRead = size_t(bytesReadRet);
189
190 if (buf[bytesRead - 1] != '\n') {
191 throw std::runtime_error("expected newline at end of schedstat data");
192 }
193 assert(bytesRead < sizeof(buf));
194 buf[bytesRead] = '\0';
195
196 uint64_t activeJiffies = 0;
197 uint64_t waitingJiffies = 0;
198 uint64_t numTasks = 0;
199 int rc = sscanf(
200 buf,
201 "%" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
202 &activeJiffies,
203 &waitingJiffies,
204 &numTasks);
205 if (rc != 3) {
206 throw std::runtime_error("failed to parse schedstat data");
207 }
208
209 close(fd);
210 return nanoseconds(waitingJiffies * timeUnits);
211 } catch (const std::runtime_error& e) {
212 if (fd >= 0) {
213 close(fd);
214 }
215 LOG(ERROR) << "error determining process wait time: %s" << e.what();
216 return nanoseconds(0);
217 }
218#endif
219}
220
221void TimePoint::reset() {
222 // Remember the current time
223 timeStart_ = steady_clock::now();
224
225 // Remember how long this process has spent waiting to be scheduled
226 tid_ = getOSThreadID();
227 timeWaiting_ = getSchedTimeWaiting(tid_);
228
229 // In case it took a while to read the schedstat info,
230 // also record the time after the schedstat check
231 timeEnd_ = steady_clock::now();
232}
233
234std::ostream& operator<<(std::ostream& os, const TimePoint& timePoint) {
235 os << "TimePoint(" << timePoint.getTimeStart().time_since_epoch().count()
236 << ", " << timePoint.getTimeEnd().time_since_epoch().count() << ", "
237 << timePoint.getTimeWaiting().count() << ")";
238 return os;
239}
240
241bool checkTimeout(
242 const TimePoint& start,
243 const TimePoint& end,
244 nanoseconds expected,
245 bool allowSmaller,
246 nanoseconds tolerance) {
247 auto elapsedTime = end.getTimeStart() - start.getTimeEnd();
248
249 if (!allowSmaller) {
250 // Timeouts should never fire before the time was up.
251 // Allow 1ms of wiggle room for rounding errors.
252 if (elapsedTime < (expected - milliseconds(1))) {
253 return false;
254 }
255 }
256
257 // Check that the event fired within a reasonable time of the timout.
258 //
259 // If the system is under heavy load, our process may have had to wait for a
260 // while to be run. The time spent waiting for the processor shouldn't
261 // count against us, so exclude this time from the check.
262 nanoseconds timeExcluded;
263 if (end.getTid() != start.getTid()) {
264 // We can only correctly compute the amount of time waiting to be scheduled
265 // if both TimePoints were set in the same thread.
266 timeExcluded = nanoseconds(0);
267 } else {
268 timeExcluded = end.getTimeWaiting() - start.getTimeWaiting();
269 assert(end.getTimeWaiting() >= start.getTimeWaiting());
270 // Add a tolerance here due to precision issues on linux, see below note.
271 assert((elapsedTime + tolerance) >= timeExcluded);
272 }
273
274 nanoseconds effectiveElapsedTime(0);
275 if (elapsedTime > timeExcluded) {
276 effectiveElapsedTime = elapsedTime - timeExcluded;
277 }
278
279 // On x86 Linux, sleep calls generally have precision only to the nearest
280 // millisecond. The tolerance parameter lets users allow a few ms of slop.
281 auto overrun = effectiveElapsedTime - expected;
282 if (overrun > tolerance) {
283 return false;
284 }
285
286 return true;
287}
288
289} // namespace folly
290