1 | /* |
2 | * Copyright 2013-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | // This is heavily inspired by the signal handler from google-glog |
18 | |
19 | #include <folly/experimental/symbolizer/SignalHandler.h> |
20 | |
21 | #include <signal.h> |
22 | #include <sys/types.h> |
23 | |
24 | #include <algorithm> |
25 | #include <atomic> |
26 | #include <cerrno> |
27 | #include <ctime> |
28 | #include <mutex> |
29 | #include <vector> |
30 | |
31 | #include <glog/logging.h> |
32 | |
33 | #include <folly/Conv.h> |
34 | #include <folly/ScopeGuard.h> |
35 | #include <folly/experimental/symbolizer/ElfCache.h> |
36 | #include <folly/experimental/symbolizer/Symbolizer.h> |
37 | #include <folly/portability/SysSyscall.h> |
38 | #include <folly/portability/Unistd.h> |
39 | |
40 | namespace folly { |
41 | namespace symbolizer { |
42 | |
43 | namespace { |
44 | |
45 | /** |
46 | * Fatal signal handler registry. |
47 | */ |
48 | class FatalSignalCallbackRegistry { |
49 | public: |
50 | FatalSignalCallbackRegistry(); |
51 | |
52 | void add(SignalCallback func); |
53 | void markInstalled(); |
54 | void run(); |
55 | |
56 | private: |
57 | std::atomic<bool> installed_; |
58 | std::mutex mutex_; |
59 | std::vector<SignalCallback> handlers_; |
60 | }; |
61 | |
62 | FatalSignalCallbackRegistry::FatalSignalCallbackRegistry() |
63 | : installed_(false) {} |
64 | |
65 | void FatalSignalCallbackRegistry::add(SignalCallback func) { |
66 | std::lock_guard<std::mutex> lock(mutex_); |
67 | CHECK(!installed_) << "FatalSignalCallbackRegistry::add may not be used " |
68 | "after installing the signal handlers." ; |
69 | handlers_.push_back(func); |
70 | } |
71 | |
72 | void FatalSignalCallbackRegistry::markInstalled() { |
73 | std::lock_guard<std::mutex> lock(mutex_); |
74 | CHECK(!installed_.exchange(true)) |
75 | << "FatalSignalCallbackRegistry::markInstalled must be called " |
76 | << "at most once" ; |
77 | } |
78 | |
79 | void FatalSignalCallbackRegistry::run() { |
80 | if (!installed_) { |
81 | return; |
82 | } |
83 | |
84 | for (auto& fn : handlers_) { |
85 | fn(); |
86 | } |
87 | } |
88 | |
89 | std::atomic<FatalSignalCallbackRegistry*> gFatalSignalCallbackRegistry{}; |
90 | |
91 | static FatalSignalCallbackRegistry* getFatalSignalCallbackRegistry() { |
92 | // Leak it so we don't have to worry about destruction order |
93 | static FatalSignalCallbackRegistry* fatalSignalCallbackRegistry = |
94 | new FatalSignalCallbackRegistry(); |
95 | |
96 | return fatalSignalCallbackRegistry; |
97 | } |
98 | |
99 | struct { |
100 | int number; |
101 | const char* name; |
102 | struct sigaction oldAction; |
103 | } kFatalSignals[] = { |
104 | {SIGSEGV, "SIGSEGV" , {}}, |
105 | {SIGILL, "SIGILL" , {}}, |
106 | {SIGFPE, "SIGFPE" , {}}, |
107 | {SIGABRT, "SIGABRT" , {}}, |
108 | {SIGBUS, "SIGBUS" , {}}, |
109 | {SIGTERM, "SIGTERM" , {}}, |
110 | {SIGQUIT, "SIGQUIT" , {}}, |
111 | {0, nullptr, {}}, |
112 | }; |
113 | |
114 | void callPreviousSignalHandler(int signum) { |
115 | // Restore disposition to old disposition, then kill ourselves with the same |
116 | // signal. The signal will be blocked until we return from our handler, |
117 | // then it will invoke the default handler and abort. |
118 | for (auto p = kFatalSignals; p->name; ++p) { |
119 | if (p->number == signum) { |
120 | sigaction(signum, &p->oldAction, nullptr); |
121 | raise(signum); |
122 | return; |
123 | } |
124 | } |
125 | |
126 | // Not one of the signals we know about. Oh well. Reset to default. |
127 | struct sigaction sa; |
128 | memset(&sa, 0, sizeof(sa)); |
129 | sa.sa_handler = SIG_DFL; |
130 | sigaction(signum, &sa, nullptr); |
131 | raise(signum); |
132 | } |
133 | |
134 | // Note: not thread-safe, but that's okay, as we only let one thread |
135 | // in our signal handler at a time. |
136 | // |
137 | // Leak it so we don't have to worry about destruction order |
138 | // |
139 | // Initialized by installFatalSignalHandler |
140 | SafeStackTracePrinter* gStackTracePrinter; |
141 | |
142 | void printDec(uint64_t val) { |
143 | char buf[20]; |
144 | uint32_t n = uint64ToBufferUnsafe(val, buf); |
145 | gStackTracePrinter->print(StringPiece(buf, n)); |
146 | } |
147 | |
148 | const char kHexChars[] = "0123456789abcdef" ; |
149 | void printHex(uint64_t val) { |
150 | // TODO(tudorb): Add this to folly/Conv.h |
151 | char buf[2 + 2 * sizeof(uint64_t)]; // "0x" prefix, 2 digits for each byte |
152 | |
153 | char* end = buf + sizeof(buf); |
154 | char* p = end; |
155 | do { |
156 | *--p = kHexChars[val & 0x0f]; |
157 | val >>= 4; |
158 | } while (val != 0); |
159 | *--p = 'x'; |
160 | *--p = '0'; |
161 | |
162 | gStackTracePrinter->print(StringPiece(p, end)); |
163 | } |
164 | |
165 | void print(StringPiece sp) { |
166 | gStackTracePrinter->print(sp); |
167 | } |
168 | |
169 | void flush() { |
170 | gStackTracePrinter->flush(); |
171 | } |
172 | |
173 | void dumpTimeInfo() { |
174 | SCOPE_EXIT { |
175 | flush(); |
176 | }; |
177 | time_t now = time(nullptr); |
178 | print("*** Aborted at " ); |
179 | printDec(now); |
180 | print(" (Unix time, try 'date -d @" ); |
181 | printDec(now); |
182 | print("') ***\n" ); |
183 | } |
184 | |
185 | const char* sigill_reason(int si_code) { |
186 | switch (si_code) { |
187 | case ILL_ILLOPC: |
188 | return "illegal opcode" ; |
189 | case ILL_ILLOPN: |
190 | return "illegal operand" ; |
191 | case ILL_ILLADR: |
192 | return "illegal addressing mode" ; |
193 | case ILL_ILLTRP: |
194 | return "illegal trap" ; |
195 | case ILL_PRVOPC: |
196 | return "privileged opcode" ; |
197 | case ILL_PRVREG: |
198 | return "privileged register" ; |
199 | case ILL_COPROC: |
200 | return "coprocessor error" ; |
201 | case ILL_BADSTK: |
202 | return "internal stack error" ; |
203 | |
204 | default: |
205 | return nullptr; |
206 | } |
207 | } |
208 | |
209 | const char* sigfpe_reason(int si_code) { |
210 | switch (si_code) { |
211 | case FPE_INTDIV: |
212 | return "integer divide by zero" ; |
213 | case FPE_INTOVF: |
214 | return "integer overflow" ; |
215 | case FPE_FLTDIV: |
216 | return "floating-point divide by zero" ; |
217 | case FPE_FLTOVF: |
218 | return "floating-point overflow" ; |
219 | case FPE_FLTUND: |
220 | return "floating-point underflow" ; |
221 | case FPE_FLTRES: |
222 | return "floating-point inexact result" ; |
223 | case FPE_FLTINV: |
224 | return "floating-point invalid operation" ; |
225 | case FPE_FLTSUB: |
226 | return "subscript out of range" ; |
227 | |
228 | default: |
229 | return nullptr; |
230 | } |
231 | } |
232 | |
233 | const char* sigsegv_reason(int si_code) { |
234 | switch (si_code) { |
235 | case SEGV_MAPERR: |
236 | return "address not mapped to object" ; |
237 | case SEGV_ACCERR: |
238 | return "invalid permissions for mapped object" ; |
239 | |
240 | default: |
241 | return nullptr; |
242 | } |
243 | } |
244 | |
245 | const char* sigbus_reason(int si_code) { |
246 | switch (si_code) { |
247 | case BUS_ADRALN: |
248 | return "invalid address alignment" ; |
249 | case BUS_ADRERR: |
250 | return "nonexistent physical address" ; |
251 | case BUS_OBJERR: |
252 | return "object-specific hardware error" ; |
253 | |
254 | // MCEERR_AR and MCEERR_AO: in sigaction(2) but not in headers. |
255 | |
256 | default: |
257 | return nullptr; |
258 | } |
259 | } |
260 | |
261 | const char* sigtrap_reason(int si_code) { |
262 | switch (si_code) { |
263 | case TRAP_BRKPT: |
264 | return "process breakpoint" ; |
265 | case TRAP_TRACE: |
266 | return "process trace trap" ; |
267 | |
268 | // TRAP_BRANCH and TRAP_HWBKPT: in sigaction(2) but not in headers. |
269 | |
270 | default: |
271 | return nullptr; |
272 | } |
273 | } |
274 | |
275 | const char* sigchld_reason(int si_code) { |
276 | switch (si_code) { |
277 | case CLD_EXITED: |
278 | return "child has exited" ; |
279 | case CLD_KILLED: |
280 | return "child was killed" ; |
281 | case CLD_DUMPED: |
282 | return "child terminated abnormally" ; |
283 | case CLD_TRAPPED: |
284 | return "traced child has trapped" ; |
285 | case CLD_STOPPED: |
286 | return "child has stopped" ; |
287 | case CLD_CONTINUED: |
288 | return "stopped child has continued" ; |
289 | |
290 | default: |
291 | return nullptr; |
292 | } |
293 | } |
294 | |
295 | const char* sigio_reason(int si_code) { |
296 | switch (si_code) { |
297 | case POLL_IN: |
298 | return "data input available" ; |
299 | case POLL_OUT: |
300 | return "output buffers available" ; |
301 | case POLL_MSG: |
302 | return "input message available" ; |
303 | case POLL_ERR: |
304 | return "I/O error" ; |
305 | case POLL_PRI: |
306 | return "high priority input available" ; |
307 | case POLL_HUP: |
308 | return "device disconnected" ; |
309 | |
310 | default: |
311 | return nullptr; |
312 | } |
313 | } |
314 | |
315 | const char* signal_reason(int signum, int si_code) { |
316 | switch (signum) { |
317 | case SIGILL: |
318 | return sigill_reason(si_code); |
319 | case SIGFPE: |
320 | return sigfpe_reason(si_code); |
321 | case SIGSEGV: |
322 | return sigsegv_reason(si_code); |
323 | case SIGBUS: |
324 | return sigbus_reason(si_code); |
325 | case SIGTRAP: |
326 | return sigtrap_reason(si_code); |
327 | case SIGCHLD: |
328 | return sigchld_reason(si_code); |
329 | case SIGIO: |
330 | return sigio_reason(si_code); // aka SIGPOLL |
331 | |
332 | default: |
333 | return nullptr; |
334 | } |
335 | } |
336 | |
337 | void dumpSignalInfo(int signum, siginfo_t* siginfo) { |
338 | SCOPE_EXIT { |
339 | flush(); |
340 | }; |
341 | // Get the signal name, if possible. |
342 | const char* name = nullptr; |
343 | for (auto p = kFatalSignals; p->name; ++p) { |
344 | if (p->number == signum) { |
345 | name = p->name; |
346 | break; |
347 | } |
348 | } |
349 | |
350 | print("*** Signal " ); |
351 | printDec(signum); |
352 | if (name) { |
353 | print(" (" ); |
354 | print(name); |
355 | print(")" ); |
356 | } |
357 | |
358 | print(" (" ); |
359 | printHex(reinterpret_cast<uint64_t>(siginfo->si_addr)); |
360 | print(") received by PID " ); |
361 | printDec(getpid()); |
362 | print(" (pthread TID " ); |
363 | printHex((uint64_t)pthread_self()); |
364 | print(") (linux TID " ); |
365 | printDec(syscall(__NR_gettid)); |
366 | |
367 | // Kernel-sourced signals don't give us useful info for pid/uid. |
368 | if (siginfo->si_code != SI_KERNEL) { |
369 | print(") (maybe from PID " ); |
370 | printDec(siginfo->si_pid); |
371 | print(", UID " ); |
372 | printDec(siginfo->si_uid); |
373 | } |
374 | |
375 | auto reason = signal_reason(signum, siginfo->si_code); |
376 | |
377 | if (reason != nullptr) { |
378 | print(") (code: " ); |
379 | print(reason); |
380 | } |
381 | |
382 | print("), stack trace: ***\n" ); |
383 | } |
384 | |
385 | // On Linux, pthread_t is a pointer, so 0 is an invalid value, which we |
386 | // take to indicate "no thread in the signal handler". |
387 | // |
388 | // POSIX defines PTHREAD_NULL for this purpose, but that's not available. |
389 | constexpr pthread_t kInvalidThreadId = 0; |
390 | |
391 | std::atomic<pthread_t> gSignalThread(kInvalidThreadId); |
392 | std::atomic<bool> gInRecursiveSignalHandler(false); |
393 | |
394 | // Here be dragons. |
395 | void innerSignalHandler(int signum, siginfo_t* info, void* /* uctx */) { |
396 | // First, let's only let one thread in here at a time. |
397 | pthread_t myId = pthread_self(); |
398 | |
399 | pthread_t prevSignalThread = kInvalidThreadId; |
400 | while (!gSignalThread.compare_exchange_strong(prevSignalThread, myId)) { |
401 | if (pthread_equal(prevSignalThread, myId)) { |
402 | // First time here. Try to dump the stack trace without symbolization. |
403 | // If we still fail, well, we're mightily screwed, so we do nothing the |
404 | // next time around. |
405 | if (!gInRecursiveSignalHandler.exchange(true)) { |
406 | print("Entered fatal signal handler recursively. We're in trouble.\n" ); |
407 | gStackTracePrinter->printStackTrace(false); // no symbolization |
408 | } |
409 | return; |
410 | } |
411 | |
412 | // Wait a while, try again. |
413 | timespec ts; |
414 | ts.tv_sec = 0; |
415 | ts.tv_nsec = 100L * 1000 * 1000; // 100ms |
416 | nanosleep(&ts, nullptr); |
417 | |
418 | prevSignalThread = kInvalidThreadId; |
419 | } |
420 | |
421 | dumpTimeInfo(); |
422 | dumpSignalInfo(signum, info); |
423 | gStackTracePrinter->printStackTrace(true); // with symbolization |
424 | |
425 | // Run user callbacks |
426 | auto callbacks = gFatalSignalCallbackRegistry.load(std::memory_order_acquire); |
427 | if (callbacks) { |
428 | callbacks->run(); |
429 | } |
430 | } |
431 | |
432 | void signalHandler(int signum, siginfo_t* info, void* uctx) { |
433 | int savedErrno = errno; |
434 | SCOPE_EXIT { |
435 | flush(); |
436 | errno = savedErrno; |
437 | }; |
438 | innerSignalHandler(signum, info, uctx); |
439 | |
440 | gSignalThread = kInvalidThreadId; |
441 | // Kill ourselves with the previous handler. |
442 | callPreviousSignalHandler(signum); |
443 | } |
444 | |
445 | } // namespace |
446 | |
447 | void addFatalSignalCallback(SignalCallback cb) { |
448 | getFatalSignalCallbackRegistry()->add(cb); |
449 | } |
450 | |
451 | void installFatalSignalCallbacks() { |
452 | getFatalSignalCallbackRegistry()->markInstalled(); |
453 | } |
454 | |
455 | namespace { |
456 | |
457 | std::atomic<bool> gAlreadyInstalled; |
458 | |
459 | // Small sigaltstack size threshold. |
460 | // 8931 is known to cause the signal handler to stack overflow during |
461 | // symbolization even for a simple one-liner "kill(getpid(), SIGTERM)". |
462 | const size_t kSmallSigAltStackSize = 8931; |
463 | |
464 | bool isSmallSigAltStackEnabled() { |
465 | stack_t ss; |
466 | if (sigaltstack(nullptr, &ss) != 0) { |
467 | return false; |
468 | } |
469 | if ((ss.ss_flags & SS_DISABLE) != 0) { |
470 | return false; |
471 | } |
472 | return ss.ss_size <= kSmallSigAltStackSize; |
473 | } |
474 | |
475 | } // namespace |
476 | |
477 | void installFatalSignalHandler() { |
478 | if (gAlreadyInstalled.exchange(true)) { |
479 | // Already done. |
480 | return; |
481 | } |
482 | |
483 | // make sure gFatalSignalCallbackRegistry is created before we |
484 | // install the fatal signal handler |
485 | gFatalSignalCallbackRegistry.store( |
486 | getFatalSignalCallbackRegistry(), std::memory_order_release); |
487 | |
488 | // If a small sigaltstack is enabled (ex. Rust stdlib might use sigaltstack |
489 | // to set a small stack), the default SafeStackTracePrinter would likely |
490 | // stack overflow. Replace it with the unsafe self-allocate printer. |
491 | bool useUnsafePrinter = isSmallSigAltStackEnabled(); |
492 | if (useUnsafePrinter) { |
493 | gStackTracePrinter = new UnsafeSelfAllocateStackTracePrinter(); |
494 | } else { |
495 | gStackTracePrinter = new SafeStackTracePrinter(); |
496 | } |
497 | |
498 | struct sigaction sa; |
499 | memset(&sa, 0, sizeof(sa)); |
500 | if (useUnsafePrinter) { |
501 | // The signal handler is not async-signal-safe. Block all signals to |
502 | // make it safer. But it's still unsafe. |
503 | sigfillset(&sa.sa_mask); |
504 | } else { |
505 | sigemptyset(&sa.sa_mask); |
506 | } |
507 | // By default signal handlers are run on the signaled thread's stack. |
508 | // In case of stack overflow running the SIGSEGV signal handler on |
509 | // the same stack leads to another SIGSEGV and crashes the program. |
510 | // Use SA_ONSTACK, so alternate stack is used (only if configured via |
511 | // sigaltstack). |
512 | // Golang also requires SA_ONSTACK. See: |
513 | // https://golang.org/pkg/os/signal/#hdr-Go_programs_that_use_cgo_or_SWIG |
514 | sa.sa_flags |= SA_SIGINFO | SA_ONSTACK; |
515 | sa.sa_sigaction = &signalHandler; |
516 | |
517 | for (auto p = kFatalSignals; p->name; ++p) { |
518 | CHECK_ERR(sigaction(p->number, &sa, &p->oldAction)); |
519 | } |
520 | } |
521 | } // namespace symbolizer |
522 | } // namespace folly |
523 | |