1 | /* |
2 | * Copyright 2014-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | #ifndef __STDC_FORMAT_MACROS |
17 | #define __STDC_FORMAT_MACROS |
18 | #endif |
19 | |
20 | #include <folly/io/async/test/TimeUtil.h> |
21 | |
22 | #include <errno.h> |
23 | #include <fcntl.h> |
24 | #include <sys/stat.h> |
25 | #include <sys/types.h> |
26 | #ifdef __linux__ |
27 | #include <sys/utsname.h> |
28 | #endif |
29 | |
30 | #include <chrono> |
31 | #include <ostream> |
32 | #include <stdexcept> |
33 | |
34 | #include <folly/Conv.h> |
35 | #include <folly/ScopeGuard.h> |
36 | #include <folly/String.h> |
37 | #include <folly/portability/Unistd.h> |
38 | #include <folly/system/ThreadId.h> |
39 | |
40 | #include <glog/logging.h> |
41 | |
42 | using std::string; |
43 | using namespace std::chrono; |
44 | |
45 | namespace folly { |
46 | |
47 | static int getLinuxVersion(StringPiece release) { |
48 | auto dot1 = release.find('.'); |
49 | if (dot1 == StringPiece::npos) { |
50 | throw std::invalid_argument("could not find first dot" ); |
51 | } |
52 | auto v1 = folly::to<int>(release.subpiece(0, dot1)); |
53 | |
54 | auto dot2 = release.find('.', dot1 + 1); |
55 | if (dot2 == StringPiece::npos) { |
56 | throw std::invalid_argument("could not find second dot" ); |
57 | } |
58 | auto v2 = folly::to<int>(release.subpiece(dot1 + 1, dot2 - (dot1 + 1))); |
59 | |
60 | int dash = release.find('-', dot2 + 1); |
61 | auto v3 = folly::to<int>(release.subpiece(dot2 + 1, dash - (dot2 + 1))); |
62 | |
63 | return ((v1 * 1000 + v2) * 1000) + v3; |
64 | } |
65 | |
66 | #ifdef __linux__ |
67 | /** |
68 | * Determine the time units used in /proc/<pid>/schedstat |
69 | * |
70 | * Returns the number of nanoseconds per time unit, |
71 | * or -1 if we cannot determine the units. |
72 | */ |
73 | static int64_t determineSchedstatUnits() { |
74 | struct utsname unameInfo; |
75 | if (uname(&unameInfo) != 0) { |
76 | LOG(ERROR) << "unable to determine jiffies/second: uname failed: %s" |
77 | << errnoStr(errno); |
78 | return -1; |
79 | } |
80 | |
81 | // In Linux version 2.6.23 and later, time time values are always |
82 | // reported in nanoseconds. |
83 | // |
84 | // This change appears to have been made in commit 425e0968a25f, which |
85 | // moved some of the sched stats code to a new file. Despite the commit |
86 | // message claiming "no code changes are caused by this patch", it changed |
87 | // the task.sched_info.cpu_time and task.sched_info.run_delay counters to be |
88 | // computed using sched_clock() rather than jiffies. |
89 | int linuxVersion; |
90 | try { |
91 | linuxVersion = getLinuxVersion(unameInfo.release); |
92 | } catch (const std::exception&) { |
93 | LOG(ERROR) << "unable to determine jiffies/second: failed to parse " |
94 | << "kernel release string \"" << unameInfo.release << "\"" ; |
95 | return -1; |
96 | } |
97 | if (linuxVersion >= 2006023) { |
98 | // The units are nanoseconds |
99 | return 1; |
100 | } |
101 | |
102 | // In Linux versions prior to 2.6.23, the time values are reported in |
103 | // jiffies. This is somewhat unfortunate, as the number of jiffies per |
104 | // second is configurable. We have to determine the units being used. |
105 | // |
106 | // It seems like the only real way to figure out the CONFIG_HZ value used by |
107 | // this kernel is to look it up in the config file. |
108 | // |
109 | // Look in /boot/config-<kernel_release> |
110 | char configPath[256]; |
111 | snprintf( |
112 | configPath, sizeof(configPath), "/boot/config-%s" , unameInfo.release); |
113 | |
114 | FILE* f = fopen(configPath, "r" ); |
115 | if (f == nullptr) { |
116 | LOG(ERROR) << "unable to determine jiffies/second: " |
117 | "cannot open kernel config file %s" |
118 | << configPath; |
119 | return -1; |
120 | } |
121 | SCOPE_EXIT { |
122 | fclose(f); |
123 | }; |
124 | |
125 | int64_t hz = -1; |
126 | char buf[1024]; |
127 | while (fgets(buf, sizeof(buf), f) != nullptr) { |
128 | if (strcmp(buf, "CONFIG_NO_HZ=y\n" ) == 0) { |
129 | LOG(ERROR) << "unable to determine jiffies/second: tickless kernel" ; |
130 | return -1; |
131 | } else if (strcmp(buf, "CONFIG_HZ=1000\n" ) == 0) { |
132 | hz = 1000; |
133 | } else if (strcmp(buf, "CONFIG_HZ=300\n" ) == 0) { |
134 | hz = 300; |
135 | } else if (strcmp(buf, "CONFIG_HZ=250\n" ) == 0) { |
136 | hz = 250; |
137 | } else if (strcmp(buf, "CONFIG_HZ=100\n" ) == 0) { |
138 | hz = 100; |
139 | } |
140 | } |
141 | |
142 | if (hz == -1) { |
143 | LOG(ERROR) << "unable to determine jiffies/second: no CONFIG_HZ setting " |
144 | "found in %s" |
145 | << configPath; |
146 | return -1; |
147 | } |
148 | |
149 | return hz; |
150 | } |
151 | #endif |
152 | |
153 | /** |
154 | * Determine how long this process has spent waiting to get scheduled on the |
155 | * CPU. |
156 | * |
157 | * Returns the number of nanoseconds spent waiting, or -1 if the amount of |
158 | * time cannot be determined. |
159 | */ |
160 | static nanoseconds getSchedTimeWaiting(pid_t tid) { |
161 | #ifndef __linux__ |
162 | (void)tid; |
163 | return nanoseconds(0); |
164 | #else |
165 | static int64_t timeUnits = determineSchedstatUnits(); |
166 | if (timeUnits < 0) { |
167 | // We couldn't figure out how many jiffies there are in a second. |
168 | // Don't bother reading the schedstat info if we can't interpret it. |
169 | return nanoseconds(0); |
170 | } |
171 | |
172 | int fd = -1; |
173 | try { |
174 | char schedstatFile[256]; |
175 | snprintf(schedstatFile, sizeof(schedstatFile), "/proc/%d/schedstat" , tid); |
176 | fd = open(schedstatFile, O_RDONLY); |
177 | if (fd < 0) { |
178 | throw std::runtime_error( |
179 | folly::to<string>("failed to open process schedstat file" , errno)); |
180 | } |
181 | |
182 | char buf[512]; |
183 | ssize_t bytesReadRet = read(fd, buf, sizeof(buf) - 1); |
184 | if (bytesReadRet <= 0) { |
185 | throw std::runtime_error( |
186 | folly::to<string>("failed to read process schedstat file" , errno)); |
187 | } |
188 | size_t bytesRead = size_t(bytesReadRet); |
189 | |
190 | if (buf[bytesRead - 1] != '\n') { |
191 | throw std::runtime_error("expected newline at end of schedstat data" ); |
192 | } |
193 | assert(bytesRead < sizeof(buf)); |
194 | buf[bytesRead] = '\0'; |
195 | |
196 | uint64_t activeJiffies = 0; |
197 | uint64_t waitingJiffies = 0; |
198 | uint64_t numTasks = 0; |
199 | int rc = sscanf( |
200 | buf, |
201 | "%" PRIu64 " %" PRIu64 " %" PRIu64 "\n" , |
202 | &activeJiffies, |
203 | &waitingJiffies, |
204 | &numTasks); |
205 | if (rc != 3) { |
206 | throw std::runtime_error("failed to parse schedstat data" ); |
207 | } |
208 | |
209 | close(fd); |
210 | return nanoseconds(waitingJiffies * timeUnits); |
211 | } catch (const std::runtime_error& e) { |
212 | if (fd >= 0) { |
213 | close(fd); |
214 | } |
215 | LOG(ERROR) << "error determining process wait time: %s" << e.what(); |
216 | return nanoseconds(0); |
217 | } |
218 | #endif |
219 | } |
220 | |
221 | void TimePoint::reset() { |
222 | // Remember the current time |
223 | timeStart_ = steady_clock::now(); |
224 | |
225 | // Remember how long this process has spent waiting to be scheduled |
226 | tid_ = getOSThreadID(); |
227 | timeWaiting_ = getSchedTimeWaiting(tid_); |
228 | |
229 | // In case it took a while to read the schedstat info, |
230 | // also record the time after the schedstat check |
231 | timeEnd_ = steady_clock::now(); |
232 | } |
233 | |
234 | std::ostream& operator<<(std::ostream& os, const TimePoint& timePoint) { |
235 | os << "TimePoint(" << timePoint.getTimeStart().time_since_epoch().count() |
236 | << ", " << timePoint.getTimeEnd().time_since_epoch().count() << ", " |
237 | << timePoint.getTimeWaiting().count() << ")" ; |
238 | return os; |
239 | } |
240 | |
241 | bool checkTimeout( |
242 | const TimePoint& start, |
243 | const TimePoint& end, |
244 | nanoseconds expected, |
245 | bool allowSmaller, |
246 | nanoseconds tolerance) { |
247 | auto elapsedTime = end.getTimeStart() - start.getTimeEnd(); |
248 | |
249 | if (!allowSmaller) { |
250 | // Timeouts should never fire before the time was up. |
251 | // Allow 1ms of wiggle room for rounding errors. |
252 | if (elapsedTime < (expected - milliseconds(1))) { |
253 | return false; |
254 | } |
255 | } |
256 | |
257 | // Check that the event fired within a reasonable time of the timout. |
258 | // |
259 | // If the system is under heavy load, our process may have had to wait for a |
260 | // while to be run. The time spent waiting for the processor shouldn't |
261 | // count against us, so exclude this time from the check. |
262 | nanoseconds timeExcluded; |
263 | if (end.getTid() != start.getTid()) { |
264 | // We can only correctly compute the amount of time waiting to be scheduled |
265 | // if both TimePoints were set in the same thread. |
266 | timeExcluded = nanoseconds(0); |
267 | } else { |
268 | timeExcluded = end.getTimeWaiting() - start.getTimeWaiting(); |
269 | assert(end.getTimeWaiting() >= start.getTimeWaiting()); |
270 | // Add a tolerance here due to precision issues on linux, see below note. |
271 | assert((elapsedTime + tolerance) >= timeExcluded); |
272 | } |
273 | |
274 | nanoseconds effectiveElapsedTime(0); |
275 | if (elapsedTime > timeExcluded) { |
276 | effectiveElapsedTime = elapsedTime - timeExcluded; |
277 | } |
278 | |
279 | // On x86 Linux, sleep calls generally have precision only to the nearest |
280 | // millisecond. The tolerance parameter lets users allow a few ms of slop. |
281 | auto overrun = effectiveElapsedTime - expected; |
282 | if (overrun > tolerance) { |
283 | return false; |
284 | } |
285 | |
286 | return true; |
287 | } |
288 | |
289 | } // namespace folly |
290 | |