1/********************************************************************
2 * Copyright (c) 2013 - 2014, Pivotal Inc.
3 * All rights reserved.
4 *
5 * Author: Zhanwei Wang
6 ********************************************************************/
7/********************************************************************
8 * 2014 -
9 * open source under Apache License Version 2.0
10 ********************************************************************/
11/**
12 * Licensed to the Apache Software Foundation (ASF) under one
13 * or more contributor license agreements. See the NOTICE file
14 * distributed with this work for additional information
15 * regarding copyright ownership. The ASF licenses this file
16 * to you under the Apache License, Version 2.0 (the
17 * "License"); you may not use this file except in compliance
18 * with the License. You may obtain a copy of the License at
19 *
20 * http://www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an "AS IS" BASIS,
24 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28#include "platform.h"
29
30#include "StackPrinter.h"
31
32#include <cassert>
33#include <cxxabi.h>
34#include <dlfcn.h>
35#include <execinfo.h>
36#include <sstream>
37#include <string>
38#include <vector>
39
40namespace Hdfs {
41namespace Internal {
42
43static void ATTRIBUTE_NOINLINE GetStack(int skip, int maxDepth,
44 std::vector<void *> & stack) {
45 std::ostringstream ss;
46 ++skip; //current frame.
47 stack.resize(maxDepth + skip);
48 int size;
49 size = backtrace(&stack[0], maxDepth + skip);
50 size = size - skip;
51
52 if (size < 0) {
53 stack.resize(0);
54 return;
55 }
56
57 stack.erase(stack.begin(), stack.begin() + skip);
58 stack.resize(size);
59}
60
61std::string DemangleSymbol(const char * symbol) {
62 int status;
63 std::string retval;
64 char * name = abi::__cxa_demangle(symbol, 0, 0, &status);
65
66 switch (status) {
67 case 0:
68 retval = name;
69 break;
70
71 case -1:
72 throw std::bad_alloc();
73 break;
74
75 case -2:
76 retval = symbol;
77 break;
78
79 case -3:
80 retval = symbol;
81 break;
82 }
83
84 if (name) {
85 free(name);
86 }
87
88 return retval;
89}
90
91#if defined(__ELF__)
92
93#include <elf.h>
94#include <errno.h>
95#include <fcntl.h>
96#include <limits.h>
97#include <link.h> // For ElfW() macro.
98#include <stdint.h>
99#include <stdio.h>
100#include <stdlib.h>
101#include <stddef.h>
102#include <string.h>
103#include <sys/stat.h>
104#include <sys/types.h>
105#include <unistd.h>
106
107// Re-runs fn until it doesn't cause EINTR.
108#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR)
109
110// Read up to "count" bytes from file descriptor "fd" into the buffer
111// starting at "buf" while handling short reads and EINTR. On
112// success, return the number of bytes read. Otherwise, return -1.
113static ssize_t ReadPersistent(const int fd, void * buf, const size_t count) {
114 assert(fd >= 0);
115 char * buf0 = reinterpret_cast<char *>(buf);
116 ssize_t num_bytes = 0;
117
118 while (num_bytes < static_cast<ssize_t>(count)) {
119 ssize_t len;
120 NO_INTR(len = read(fd, buf0 + num_bytes, count - num_bytes));
121
122 if (len < 0) { // There was an error other than EINTR.
123 return -1;
124 }
125
126 if (len == 0) { // Reached EOF.
127 break;
128 }
129
130 num_bytes += len;
131 }
132
133 return num_bytes;
134}
135
136// Read up to "count" bytes from "offset" in the file pointed by file
137// descriptor "fd" into the buffer starting at "buf". On success,
138// return the number of bytes read. Otherwise, return -1.
139static ssize_t ReadFromOffset(const int fd, void * buf,
140 const size_t count, const off_t offset) {
141 off_t off = lseek(fd, offset, SEEK_SET);
142
143 if (off == (off_t) - 1) {
144 return -1;
145 }
146
147 return ReadPersistent(fd, buf, count);
148}
149
150// Try reading exactly "count" bytes from "offset" bytes in a file
151// pointed by "fd" into the buffer starting at "buf" while handling
152// short reads and EINTR. On success, return true. Otherwise, return
153// false.
154static bool ReadFromOffsetExact(const int fd, void * buf,
155 const size_t count, const off_t offset) {
156 ssize_t len = ReadFromOffset(fd, buf, count, offset);
157 return len == static_cast<ssize_t>(count);
158}
159
160// Returns elf_header.e_type if the file pointed by fd is an ELF binary.
161static int FileGetElfType(const int fd) {
162 ElfW(Ehdr) elf_header;
163
164 if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
165 return -1;
166 }
167
168 if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0) {
169 return -1;
170 }
171
172 return elf_header.e_type;
173}
174
175// Read the section headers in the given ELF binary, and if a section
176// of the specified type is found, set the output to this section header
177// and return true. Otherwise, return false.
178// To keep stack consumption low, we would like this function to not get
179// inlined.
180static bool
181GetSectionHeaderByType(const int fd, ElfW(Half) sh_num, const off_t sh_offset,
182 ElfW(Word) type, ElfW(Shdr) *out) {
183 // Read at most 16 section headers at a time to save read calls.
184 ElfW(Shdr) buf[16];
185
186 for (int i = 0; i < sh_num;) {
187 const ssize_t num_bytes_left = (sh_num - i) * sizeof(buf[0]);
188 const ssize_t num_bytes_to_read =
189 (sizeof(buf) > static_cast<size_t>(num_bytes_left)) ? num_bytes_left : sizeof(buf);
190 const ssize_t len = ReadFromOffset(fd, buf, num_bytes_to_read,
191 sh_offset + i * sizeof(buf[0]));
192 assert(len % sizeof(buf[0]) == 0);
193 const ssize_t num_headers_in_buf = len / sizeof(buf[0]);
194
195 for (int j = 0; j < num_headers_in_buf; ++j) {
196 if (buf[j].sh_type == type) {
197 *out = buf[j];
198 return true;
199 }
200 }
201
202 i += num_headers_in_buf;
203 }
204
205 return false;
206}
207
208// There is no particular reason to limit section name to 63 characters,
209// but there has (as yet) been no need for anything longer either.
210const int kMaxSectionNameLen = 64;
211
212// name_len should include terminating '\0'.
213bool GetSectionHeaderByName(int fd, const char * name, size_t name_len,
214 ElfW(Shdr) *out) {
215 ElfW(Ehdr) elf_header;
216
217 if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
218 return false;
219 }
220
221 ElfW(Shdr) shstrtab;
222 off_t shstrtab_offset = (elf_header.e_shoff +
223 elf_header.e_shentsize * elf_header.e_shstrndx);
224
225 if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) {
226 return false;
227 }
228
229 for (int i = 0; i < elf_header.e_shnum; ++i) {
230 off_t section_header_offset = (elf_header.e_shoff +
231 elf_header.e_shentsize * i);
232
233 if (!ReadFromOffsetExact(fd, out, sizeof(*out), section_header_offset)) {
234 return false;
235 }
236
237 char header_name[kMaxSectionNameLen];
238
239 if (sizeof(header_name) < name_len) {
240 // No point in even trying.
241 return false;
242 }
243
244 off_t name_offset = shstrtab.sh_offset + out->sh_name;
245 ssize_t n_read = ReadFromOffset(fd, &header_name, name_len, name_offset);
246
247 if (n_read == -1) {
248 return false;
249 } else if (n_read != static_cast<ssize_t>(name_len)) {
250 // Short read -- name could be at end of file.
251 continue;
252 }
253
254 if (memcmp(header_name, name, name_len) == 0) {
255 return true;
256 }
257 }
258
259 return false;
260}
261
262// Read a symbol table and look for the symbol containing the
263// pc. Iterate over symbols in a symbol table and look for the symbol
264// containing "pc". On success, return true and write the symbol name
265// to out. Otherwise, return false.
266// To keep stack consumption low, we would like this function to not get
267// inlined.
268static bool
269FindSymbol(uint64_t pc, const int fd, char * out, int out_size,
270 uint64_t symbol_offset, const ElfW(Shdr) *strtab,
271 const ElfW(Shdr) *symtab) {
272 if (symtab == NULL) {
273 return false;
274 }
275
276 const int num_symbols = symtab->sh_size / symtab->sh_entsize;
277
278 for (int i = 0; i < num_symbols;) {
279 off_t offset = symtab->sh_offset + i * symtab->sh_entsize;
280 // If we are reading Elf64_Sym's, we want to limit this array to
281 // 32 elements (to keep stack consumption low), otherwise we can
282 // have a 64 element Elf32_Sym array.
283#if __WORDSIZE == 64
284#define NUM_SYMBOLS 32
285#else
286#define NUM_SYMBOLS 64
287#endif
288 // Read at most NUM_SYMBOLS symbols at once to save read() calls.
289 ElfW(Sym) buf[NUM_SYMBOLS];
290 const ssize_t len = ReadFromOffset(fd, &buf, sizeof(buf), offset);
291 assert(len % sizeof(buf[0]) == 0);
292 const ssize_t num_symbols_in_buf = len / sizeof(buf[0]);
293
294 for (int j = 0; j < num_symbols_in_buf; ++j) {
295 const ElfW(Sym)& symbol = buf[j];
296 uint64_t start_address = symbol.st_value;
297 start_address += symbol_offset;
298 uint64_t end_address = start_address + symbol.st_size;
299
300 if (symbol.st_value != 0 && // Skip null value symbols.
301 symbol.st_shndx != 0 &&// Skip undefined symbols.
302 start_address <= pc && pc < end_address) {
303 ssize_t len1 = ReadFromOffset(fd, out, out_size,
304 strtab->sh_offset + symbol.st_name);
305
306 if (len1 <= 0 || memchr(out, '\0', out_size) == NULL) {
307 return false;
308 }
309
310 return true; // Obtained the symbol name.
311 }
312 }
313
314 i += num_symbols_in_buf;
315 }
316
317 return false;
318}
319
320// Get the symbol name of "pc" from the file pointed by "fd". Process
321// both regular and dynamic symbol tables if necessary. On success,
322// write the symbol name to "out" and return true. Otherwise, return
323// false.
324static bool GetSymbolFromObjectFile(const int fd, uint64_t pc,
325 char * out, int out_size,
326 uint64_t map_start_address) {
327 // Read the ELF header.
328 ElfW(Ehdr) elf_header;
329
330 if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
331 return false;
332 }
333
334 uint64_t symbol_offset = 0;
335
336 if (elf_header.e_type == ET_DYN) { // DSO needs offset adjustment.
337 symbol_offset = map_start_address;
338 }
339
340 ElfW(Shdr) symtab, strtab;
341
342 // Consult a regular symbol table first.
343 if (!GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff,
344 SHT_SYMTAB, &symtab)) {
345 return false;
346 }
347
348 if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff +
349 symtab.sh_link * sizeof(symtab))) {
350 return false;
351 }
352
353 if (FindSymbol(pc, fd, out, out_size, symbol_offset,
354 &strtab, &symtab)) {
355 return true; // Found the symbol in a regular symbol table.
356 }
357
358 // If the symbol is not found, then consult a dynamic symbol table.
359 if (!GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff,
360 SHT_DYNSYM, &symtab)) {
361 return false;
362 }
363
364 if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff +
365 symtab.sh_link * sizeof(symtab))) {
366 return false;
367 }
368
369 if (FindSymbol(pc, fd, out, out_size, symbol_offset,
370 &strtab, &symtab)) {
371 return true; // Found the symbol in a dynamic symbol table.
372 }
373
374 return false;
375}
376
377namespace {
378// Thin wrapper around a file descriptor so that the file descriptor
379// gets closed for sure.
380struct FileDescriptor {
381 const int fd_;
382 explicit FileDescriptor(int fd) : fd_(fd) {}
383 ~FileDescriptor() {
384 if (fd_ >= 0) {
385 NO_INTR(close(fd_));
386 }
387 }
388 int get() {
389 return fd_;
390 }
391
392private:
393 explicit FileDescriptor(const FileDescriptor &);
394 void operator=(const FileDescriptor &);
395};
396
397// Helper class for reading lines from file.
398//
399// Note: we don't use ProcMapsIterator since the object is big (it has
400// a 5k array member) and uses async-unsafe functions such as sscanf()
401// and snprintf().
402class LineReader {
403public:
404 explicit LineReader(int fd, char * buf, int buf_len) : fd_(fd),
405 buf_(buf), buf_len_(buf_len), bol_(buf), eol_(buf), eod_(buf) {
406 }
407
408 // Read '\n'-terminated line from file. On success, modify "bol"
409 // and "eol", then return true. Otherwise, return false.
410 //
411 // Note: if the last line doesn't end with '\n', the line will be
412 // dropped. It's an intentional behavior to make the code simple.
413 bool ReadLine(const char ** bol, const char ** eol) {
414 if (BufferIsEmpty()) { // First time.
415 const ssize_t num_bytes = ReadPersistent(fd_, buf_, buf_len_);
416
417 if (num_bytes <= 0) { // EOF or error.
418 return false;
419 }
420
421 eod_ = buf_ + num_bytes;
422 bol_ = buf_;
423 } else {
424 bol_ = eol_ + 1; // Advance to the next line in the buffer.
425 assert(bol_ <= eod_);// "bol_" can point to "eod_".
426
427 if (!HasCompleteLine()) {
428 const int incomplete_line_length = eod_ - bol_;
429 // Move the trailing incomplete line to the beginning.
430 memmove(buf_, bol_, incomplete_line_length);
431 // Read text from file and append it.
432 char * const append_pos = buf_ + incomplete_line_length;
433 const int capacity_left = buf_len_ - incomplete_line_length;
434 const ssize_t num_bytes = ReadPersistent(fd_, append_pos,
435 capacity_left);
436
437 if (num_bytes <= 0) { // EOF or error.
438 return false;
439 }
440
441 eod_ = append_pos + num_bytes;
442 bol_ = buf_;
443 }
444 }
445
446 eol_ = FindLineFeed();
447
448 if (eol_ == NULL) { // '\n' not found. Malformed line.
449 return false;
450 }
451
452 *eol_ = '\0'; // Replace '\n' with '\0'.
453 *bol = bol_;
454 *eol = eol_;
455 return true;
456 }
457
458 // Beginning of line.
459 const char * bol() {
460 return bol_;
461 }
462
463 // End of line.
464 const char * eol() {
465 return eol_;
466 }
467
468private:
469 explicit LineReader(const LineReader &);
470 void operator=(const LineReader &);
471
472 char * FindLineFeed() {
473 return reinterpret_cast<char *>(memchr(bol_, '\n', eod_ - bol_));
474 }
475
476 bool BufferIsEmpty() {
477 return buf_ == eod_;
478 }
479
480 bool HasCompleteLine() {
481 return !BufferIsEmpty() && FindLineFeed() != NULL;
482 }
483
484 const int fd_;
485 char * const buf_;
486 const int buf_len_;
487 char * bol_;
488 char * eol_;
489 const char * eod_; // End of data in "buf_".
490};
491} // namespace
492
493// Place the hex number read from "start" into "*hex". The pointer to
494// the first non-hex character or "end" is returned.
495static char * GetHex(const char * start, const char * end, uint64_t * hex) {
496 *hex = 0;
497 const char * p;
498
499 for (p = start; p < end; ++p) {
500 int ch = *p;
501
502 if ((ch >= '0' && ch <= '9') ||
503 (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f')) {
504 *hex = (*hex << 4) | (ch < 'A' ? ch - '0' : (ch & 0xF) + 9);
505 } else { // Encountered the first non-hex character.
506 break;
507 }
508 }
509
510 assert(p <= end);
511 return const_cast<char *>(p);
512}
513
514// Search for the object file (from /proc/self/maps) that contains
515// the specified pc. If found, open this file and return the file handle,
516// and also set start_address to the start address of where this object
517// file is mapped to in memory. Otherwise, return -1.
518static int
519OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
520 uint64_t & start_address) {
521 int object_fd;
522 // Open /proc/self/maps.
523 int maps_fd;
524 NO_INTR(maps_fd = open("/proc/self/maps", O_RDONLY));
525 FileDescriptor wrapped_maps_fd(maps_fd);
526
527 if (wrapped_maps_fd.get() < 0) {
528 return -1;
529 }
530
531 // Iterate over maps and look for the map containing the pc. Then
532 // look into the symbol tables inside.
533 char buf[1024];// Big enough for line of sane /proc/self/maps
534 LineReader reader(wrapped_maps_fd.get(), buf, sizeof(buf));
535
536 while (true) {
537 const char * cursor;
538 const char * eol;
539
540 if (!reader.ReadLine(&cursor, &eol)) { // EOF or malformed line.
541 return -1;
542 }
543
544 // Start parsing line in /proc/self/maps. Here is an example:
545 //
546 // 08048000-0804c000 r-xp 00000000 08:01 2142121 /bin/cat
547 //
548 // We want start address (08048000), end address (0804c000), flags
549 // (r-xp) and file name (/bin/cat).
550 // Read start address.
551 cursor = GetHex(cursor, eol, &start_address);
552
553 if (cursor == eol || *cursor != '-') {
554 return -1; // Malformed line.
555 }
556
557 ++cursor; // Skip '-'.
558 // Read end address.
559 uint64_t end_address;
560 cursor = GetHex(cursor, eol, &end_address);
561
562 if (cursor == eol || *cursor != ' ') {
563 return -1; // Malformed line.
564 }
565
566 ++cursor; // Skip ' '.
567
568 // Check start and end addresses.
569 if (!(start_address <= pc && pc < end_address)) {
570 continue; // We skip this map. PC isn't in this map.
571 }
572
573 // Read flags. Skip flags until we encounter a space or eol.
574 const char * const flags_start = cursor;
575
576 while (cursor < eol && *cursor != ' ') {
577 ++cursor;
578 }
579
580 // We expect at least four letters for flags (ex. "r-xp").
581 if (cursor == eol || cursor < flags_start + 4) {
582 return -1; // Malformed line.
583 }
584
585 // Check flags. We are only interested in "r-x" maps.
586 if (memcmp(flags_start, "r-x", 3) != 0) { // Not a "r-x" map.
587 continue;// We skip this map.
588 }
589
590 ++cursor; // Skip ' '.
591 // Skip to file name. "cursor" now points to file offset. We need to
592 // skip at least three spaces for file offset, dev, and inode.
593 int num_spaces = 0;
594
595 while (cursor < eol) {
596 if (*cursor == ' ') {
597 ++num_spaces;
598 } else if (num_spaces >= 3) {
599 // The first non-space character after skipping three spaces
600 // is the beginning of the file name.
601 break;
602 }
603
604 ++cursor;
605 }
606
607 if (cursor == eol) {
608 return -1; // Malformed line.
609 }
610
611 // Finally, "cursor" now points to file name of our interest.
612 NO_INTR(object_fd = open(cursor, O_RDONLY));
613
614 if (object_fd < 0) {
615 return -1;
616 }
617
618 return object_fd;
619 }
620}
621
622static const std::string SymbolizeAndDemangle(void * pc) {
623 std::vector<char> buffer(1024);
624 std::ostringstream ss;
625 uint64_t pc0 = reinterpret_cast<uintptr_t>(pc);
626 uint64_t start_address = 0;
627 int object_fd = OpenObjectFileContainingPcAndGetStartAddress(pc0,
628 start_address);
629
630 if (object_fd == -1) {
631 return DEFAULT_STACK_PREFIX"Unknown";
632 }
633
634 FileDescriptor wrapped_object_fd(object_fd);
635 int elf_type = FileGetElfType(wrapped_object_fd.get());
636
637 if (elf_type == -1) {
638 return DEFAULT_STACK_PREFIX"Unknown";
639 }
640
641 if (!GetSymbolFromObjectFile(wrapped_object_fd.get(), pc0,
642 &buffer[0], buffer.size(), start_address)) {
643 return DEFAULT_STACK_PREFIX"Unknown";
644 }
645
646 ss << DEFAULT_STACK_PREFIX << DemangleSymbol(&buffer[0]);
647 return ss.str();
648}
649
650#elif defined(OS_MACOSX) && defined(HAVE_DLADDR)
651
652static const std::string SymbolizeAndDemangle(void * pc) {
653 Dl_info info;
654 std::ostringstream ss;
655
656 if (dladdr(pc, &info) && info.dli_sname) {
657 ss << DEFAULT_STACK_PREFIX << DemangleSymbol(info.dli_sname);
658 } else {
659 ss << DEFAULT_STACK_PREFIX << "Unknown";
660 }
661
662 return ss.str();
663}
664
665#endif
666
667const std::string PrintStack(int skip, int maxDepth) {
668 std::ostringstream ss;
669 std::vector<void *> stack;
670 GetStack(skip + 1, maxDepth, stack);
671
672 for (size_t i = 0; i < stack.size(); ++i) {
673 ss << SymbolizeAndDemangle(stack[i]) << std::endl;
674 }
675
676 return ss.str();
677}
678
679}
680}
681
682