1// Copyright (c) 2011 Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// Restructured in 2009 by: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
31
32// dump_symbols.cc: implement google_breakpad::WriteSymbolFile:
33// Find all the debugging info in a file and dump it as a Breakpad symbol file.
34
35#include "common/linux/dump_symbols.h"
36
37#include <assert.h>
38#include <elf.h>
39#include <errno.h>
40#include <fcntl.h>
41#include <limits.h>
42#include <link.h>
43#include <stdint.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <sys/mman.h>
48#include <sys/stat.h>
49#include <unistd.h>
50
51#include <iostream>
52#include <set>
53#include <string>
54#include <utility>
55#include <vector>
56
57#include "common/dwarf/bytereader-inl.h"
58#include "common/dwarf/dwarf2diehandler.h"
59#include "common/dwarf_cfi_to_module.h"
60#include "common/dwarf_cu_to_module.h"
61#include "common/dwarf_line_to_module.h"
62#include "common/dwarf_range_list_handler.h"
63#include "common/linux/crc32.h"
64#include "common/linux/eintr_wrapper.h"
65#include "common/linux/elfutils.h"
66#include "common/linux/elfutils-inl.h"
67#include "common/linux/elf_symbols_to_module.h"
68#include "common/linux/file_id.h"
69#include "common/memory_allocator.h"
70#include "common/module.h"
71#include "common/path_helper.h"
72#include "common/scoped_ptr.h"
73#ifndef NO_STABS_SUPPORT
74#include "common/stabs_reader.h"
75#include "common/stabs_to_module.h"
76#endif
77#include "common/using_std_string.h"
78
79// This namespace contains helper functions.
80namespace {
81
82using google_breakpad::DumpOptions;
83using google_breakpad::DwarfCFIToModule;
84using google_breakpad::DwarfCUToModule;
85using google_breakpad::DwarfLineToModule;
86using google_breakpad::DwarfRangeListHandler;
87using google_breakpad::ElfClass;
88using google_breakpad::ElfClass32;
89using google_breakpad::ElfClass64;
90using google_breakpad::elf::FileID;
91using google_breakpad::FindElfSectionByName;
92using google_breakpad::GetOffset;
93using google_breakpad::IsValidElf;
94using google_breakpad::elf::kDefaultBuildIdSize;
95using google_breakpad::Module;
96using google_breakpad::PageAllocator;
97#ifndef NO_STABS_SUPPORT
98using google_breakpad::StabsToModule;
99#endif
100using google_breakpad::scoped_ptr;
101using google_breakpad::wasteful_vector;
102
103// Define AARCH64 ELF architecture if host machine does not include this define.
104#ifndef EM_AARCH64
105#define EM_AARCH64 183
106#endif
107
108//
109// FDWrapper
110//
111// Wrapper class to make sure opened file is closed.
112//
113class FDWrapper {
114 public:
115 explicit FDWrapper(int fd) :
116 fd_(fd) {}
117 ~FDWrapper() {
118 if (fd_ != -1)
119 close(fd_);
120 }
121 int get() {
122 return fd_;
123 }
124 int release() {
125 int fd = fd_;
126 fd_ = -1;
127 return fd;
128 }
129 private:
130 int fd_;
131};
132
133//
134// MmapWrapper
135//
136// Wrapper class to make sure mapped regions are unmapped.
137//
138class MmapWrapper {
139 public:
140 MmapWrapper() : is_set_(false) {}
141 ~MmapWrapper() {
142 if (is_set_ && base_ != NULL) {
143 assert(size_ > 0);
144 munmap(base_, size_);
145 }
146 }
147 void set(void* mapped_address, size_t mapped_size) {
148 is_set_ = true;
149 base_ = mapped_address;
150 size_ = mapped_size;
151 }
152 void release() {
153 assert(is_set_);
154 is_set_ = false;
155 base_ = NULL;
156 size_ = 0;
157 }
158
159 private:
160 bool is_set_;
161 void* base_;
162 size_t size_;
163};
164
165// Find the preferred loading address of the binary.
166template<typename ElfClass>
167typename ElfClass::Addr GetLoadingAddress(
168 const typename ElfClass::Phdr* program_headers,
169 int nheader) {
170 typedef typename ElfClass::Phdr Phdr;
171
172 // For non-PIC executables (e_type == ET_EXEC), the load address is
173 // the start address of the first PT_LOAD segment. (ELF requires
174 // the segments to be sorted by load address.) For PIC executables
175 // and dynamic libraries (e_type == ET_DYN), this address will
176 // normally be zero.
177 for (int i = 0; i < nheader; ++i) {
178 const Phdr& header = program_headers[i];
179 if (header.p_type == PT_LOAD)
180 return header.p_vaddr;
181 }
182 return 0;
183}
184
185// Find the set of address ranges for all PT_LOAD segments.
186template <typename ElfClass>
187vector<Module::Range> GetPtLoadSegmentRanges(
188 const typename ElfClass::Phdr* program_headers,
189 int nheader) {
190 typedef typename ElfClass::Phdr Phdr;
191 vector<Module::Range> ranges;
192
193 for (int i = 0; i < nheader; ++i) {
194 const Phdr& header = program_headers[i];
195 if (header.p_type == PT_LOAD) {
196 ranges.push_back(Module::Range(header.p_vaddr, header.p_memsz));
197 }
198 }
199 return ranges;
200}
201
202#ifndef NO_STABS_SUPPORT
203template<typename ElfClass>
204bool LoadStabs(const typename ElfClass::Ehdr* elf_header,
205 const typename ElfClass::Shdr* stab_section,
206 const typename ElfClass::Shdr* stabstr_section,
207 const bool big_endian,
208 Module* module) {
209 // A callback object to handle data from the STABS reader.
210 StabsToModule handler(module);
211 // Find the addresses of the STABS data, and create a STABS reader object.
212 // On Linux, STABS entries always have 32-bit values, regardless of the
213 // address size of the architecture whose code they're describing, and
214 // the strings are always "unitized".
215 const uint8_t* stabs =
216 GetOffset<ElfClass, uint8_t>(elf_header, stab_section->sh_offset);
217 const uint8_t* stabstr =
218 GetOffset<ElfClass, uint8_t>(elf_header, stabstr_section->sh_offset);
219 google_breakpad::StabsReader reader(stabs, stab_section->sh_size,
220 stabstr, stabstr_section->sh_size,
221 big_endian, 4, true, &handler);
222 // Read the STABS data, and do post-processing.
223 if (!reader.Process())
224 return false;
225 handler.Finalize();
226 return true;
227}
228#endif // NO_STABS_SUPPORT
229
230// A range handler that accepts rangelist data parsed by
231// google_breakpad::RangeListReader and populates a range vector (typically
232// owned by a function) with the results.
233class DumperRangesHandler : public DwarfCUToModule::RangesHandler {
234 public:
235 DumperRangesHandler(google_breakpad::ByteReader* reader) :
236 reader_(reader) { }
237
238 bool ReadRanges(
239 enum google_breakpad::DwarfForm form, uint64_t data,
240 google_breakpad::RangeListReader::CURangesInfo* cu_info,
241 vector<Module::Range>* ranges) {
242 DwarfRangeListHandler handler(ranges);
243 google_breakpad::RangeListReader range_list_reader(reader_, cu_info,
244 &handler);
245 return range_list_reader.ReadRanges(form, data);
246 }
247
248 private:
249 google_breakpad::ByteReader* reader_;
250};
251
252// A line-to-module loader that accepts line number info parsed by
253// google_breakpad::LineInfo and populates a Module and a line vector
254// with the results.
255class DumperLineToModule: public DwarfCUToModule::LineToModuleHandler {
256 public:
257 // Create a line-to-module converter using BYTE_READER.
258 explicit DumperLineToModule(google_breakpad::ByteReader* byte_reader)
259 : byte_reader_(byte_reader) { }
260 void StartCompilationUnit(const string& compilation_dir) {
261 compilation_dir_ = compilation_dir;
262 }
263 void ReadProgram(const uint8_t* program,
264 uint64_t length,
265 const uint8_t* string_section,
266 uint64_t string_section_length,
267 const uint8_t* line_string_section,
268 uint64_t line_string_section_length,
269 Module* module,
270 std::vector<Module::Line>* lines,
271 std::map<uint32_t, Module::File*>* files) {
272 DwarfLineToModule handler(module, compilation_dir_, lines, files);
273 google_breakpad::LineInfo parser(program, length, byte_reader_,
274 string_section, string_section_length,
275 line_string_section,
276 line_string_section_length,
277 &handler);
278 parser.Start();
279 }
280 private:
281 string compilation_dir_;
282 google_breakpad::ByteReader* byte_reader_;
283};
284
285template<typename ElfClass>
286bool LoadDwarf(const string& dwarf_filename,
287 const typename ElfClass::Ehdr* elf_header,
288 const bool big_endian,
289 bool handle_inter_cu_refs,
290 bool handle_inline,
291 Module* module) {
292 typedef typename ElfClass::Shdr Shdr;
293
294 const google_breakpad::Endianness endianness = big_endian ?
295 google_breakpad::ENDIANNESS_BIG : google_breakpad::ENDIANNESS_LITTLE;
296 google_breakpad::ByteReader byte_reader(endianness);
297
298 // Construct a context for this file.
299 DwarfCUToModule::FileContext file_context(dwarf_filename,
300 module,
301 handle_inter_cu_refs);
302
303 // Build a map of the ELF file's sections.
304 const Shdr* sections =
305 GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
306 int num_sections = elf_header->e_shnum;
307 const Shdr* section_names = sections + elf_header->e_shstrndx;
308 for (int i = 0; i < num_sections; i++) {
309 const Shdr* section = &sections[i];
310 string name = GetOffset<ElfClass, char>(elf_header,
311 section_names->sh_offset) +
312 section->sh_name;
313 const uint8_t* contents = GetOffset<ElfClass, uint8_t>(elf_header,
314 section->sh_offset);
315 file_context.AddSectionToSectionMap(name, contents, section->sh_size);
316 }
317
318 // .debug_ranges and .debug_rnglists reader
319 DumperRangesHandler ranges_handler(&byte_reader);
320
321 // Parse all the compilation units in the .debug_info section.
322 DumperLineToModule line_to_module(&byte_reader);
323 google_breakpad::SectionMap::const_iterator debug_info_entry =
324 file_context.section_map().find(".debug_info");
325 assert(debug_info_entry != file_context.section_map().end());
326 const std::pair<const uint8_t*, uint64_t>& debug_info_section =
327 debug_info_entry->second;
328 // This should never have been called if the file doesn't have a
329 // .debug_info section.
330 assert(debug_info_section.first);
331 uint64_t debug_info_length = debug_info_section.second;
332 for (uint64_t offset = 0; offset < debug_info_length;) {
333 // Make a handler for the root DIE that populates MODULE with the
334 // data that was found.
335 DwarfCUToModule::WarningReporter reporter(dwarf_filename, offset);
336 DwarfCUToModule root_handler(&file_context, &line_to_module,
337 &ranges_handler, &reporter, handle_inline);
338 // Make a Dwarf2Handler that drives the DIEHandler.
339 google_breakpad::DIEDispatcher die_dispatcher(&root_handler);
340 // Make a DWARF parser for the compilation unit at OFFSET.
341 google_breakpad::CompilationUnit reader(dwarf_filename,
342 file_context.section_map(),
343 offset,
344 &byte_reader,
345 &die_dispatcher);
346 // Process the entire compilation unit; get the offset of the next.
347 offset += reader.Start();
348 }
349 return true;
350}
351
352// Fill REGISTER_NAMES with the register names appropriate to the
353// machine architecture given in HEADER, indexed by the register
354// numbers used in DWARF call frame information. Return true on
355// success, or false if HEADER's machine architecture is not
356// supported.
357template<typename ElfClass>
358bool DwarfCFIRegisterNames(const typename ElfClass::Ehdr* elf_header,
359 std::vector<string>* register_names) {
360 switch (elf_header->e_machine) {
361 case EM_386:
362 *register_names = DwarfCFIToModule::RegisterNames::I386();
363 return true;
364 case EM_ARM:
365 *register_names = DwarfCFIToModule::RegisterNames::ARM();
366 return true;
367 case EM_AARCH64:
368 *register_names = DwarfCFIToModule::RegisterNames::ARM64();
369 return true;
370 case EM_MIPS:
371 *register_names = DwarfCFIToModule::RegisterNames::MIPS();
372 return true;
373 case EM_X86_64:
374 *register_names = DwarfCFIToModule::RegisterNames::X86_64();
375 return true;
376 default:
377 return false;
378 }
379}
380
381template<typename ElfClass>
382bool LoadDwarfCFI(const string& dwarf_filename,
383 const typename ElfClass::Ehdr* elf_header,
384 const char* section_name,
385 const typename ElfClass::Shdr* section,
386 const bool eh_frame,
387 const typename ElfClass::Shdr* got_section,
388 const typename ElfClass::Shdr* text_section,
389 const bool big_endian,
390 Module* module) {
391 // Find the appropriate set of register names for this file's
392 // architecture.
393 std::vector<string> register_names;
394 if (!DwarfCFIRegisterNames<ElfClass>(elf_header, &register_names)) {
395 fprintf(stderr, "%s: unrecognized ELF machine architecture '%d';"
396 " cannot convert DWARF call frame information\n",
397 dwarf_filename.c_str(), elf_header->e_machine);
398 return false;
399 }
400
401 const google_breakpad::Endianness endianness = big_endian ?
402 google_breakpad::ENDIANNESS_BIG : google_breakpad::ENDIANNESS_LITTLE;
403
404 // Find the call frame information and its size.
405 const uint8_t* cfi =
406 GetOffset<ElfClass, uint8_t>(elf_header, section->sh_offset);
407 size_t cfi_size = section->sh_size;
408
409 // Plug together the parser, handler, and their entourages.
410 DwarfCFIToModule::Reporter module_reporter(dwarf_filename, section_name);
411 DwarfCFIToModule handler(module, register_names, &module_reporter);
412 google_breakpad::ByteReader byte_reader(endianness);
413
414 byte_reader.SetAddressSize(ElfClass::kAddrSize);
415
416 // Provide the base addresses for .eh_frame encoded pointers, if
417 // possible.
418 byte_reader.SetCFIDataBase(section->sh_addr, cfi);
419 if (got_section)
420 byte_reader.SetDataBase(got_section->sh_addr);
421 if (text_section)
422 byte_reader.SetTextBase(text_section->sh_addr);
423
424 google_breakpad::CallFrameInfo::Reporter dwarf_reporter(dwarf_filename,
425 section_name);
426 google_breakpad::CallFrameInfo parser(cfi, cfi_size,
427 &byte_reader, &handler, &dwarf_reporter,
428 eh_frame);
429 parser.Start();
430 return true;
431}
432
433bool LoadELF(const string& obj_file, MmapWrapper* map_wrapper,
434 void** elf_header) {
435 int obj_fd = open(obj_file.c_str(), O_RDONLY);
436 if (obj_fd < 0) {
437 fprintf(stderr, "Failed to open ELF file '%s': %s\n",
438 obj_file.c_str(), strerror(errno));
439 return false;
440 }
441 FDWrapper obj_fd_wrapper(obj_fd);
442 struct stat st;
443 if (fstat(obj_fd, &st) != 0 && st.st_size <= 0) {
444 fprintf(stderr, "Unable to fstat ELF file '%s': %s\n",
445 obj_file.c_str(), strerror(errno));
446 return false;
447 }
448 void* obj_base = mmap(NULL, st.st_size,
449 PROT_READ | PROT_WRITE, MAP_PRIVATE, obj_fd, 0);
450 if (obj_base == MAP_FAILED) {
451 fprintf(stderr, "Failed to mmap ELF file '%s': %s\n",
452 obj_file.c_str(), strerror(errno));
453 return false;
454 }
455 map_wrapper->set(obj_base, st.st_size);
456 *elf_header = obj_base;
457 if (!IsValidElf(*elf_header)) {
458 fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
459 return false;
460 }
461 return true;
462}
463
464// Get the endianness of ELF_HEADER. If it's invalid, return false.
465template<typename ElfClass>
466bool ElfEndianness(const typename ElfClass::Ehdr* elf_header,
467 bool* big_endian) {
468 if (elf_header->e_ident[EI_DATA] == ELFDATA2LSB) {
469 *big_endian = false;
470 return true;
471 }
472 if (elf_header->e_ident[EI_DATA] == ELFDATA2MSB) {
473 *big_endian = true;
474 return true;
475 }
476
477 fprintf(stderr, "bad data encoding in ELF header: %d\n",
478 elf_header->e_ident[EI_DATA]);
479 return false;
480}
481
482// Given |left_abspath|, find the absolute path for |right_path| and see if the
483// two absolute paths are the same.
484bool IsSameFile(const char* left_abspath, const string& right_path) {
485 char right_abspath[PATH_MAX];
486 if (!realpath(right_path.c_str(), right_abspath))
487 return false;
488 return strcmp(left_abspath, right_abspath) == 0;
489}
490
491// Read the .gnu_debuglink and get the debug file name. If anything goes
492// wrong, return an empty string.
493string ReadDebugLink(const uint8_t* debuglink,
494 const size_t debuglink_size,
495 const bool big_endian,
496 const string& obj_file,
497 const std::vector<string>& debug_dirs) {
498 // Include '\0' + CRC32 (4 bytes).
499 size_t debuglink_len = strlen(reinterpret_cast<const char*>(debuglink)) + 5;
500 debuglink_len = 4 * ((debuglink_len + 3) / 4); // Round up to 4 bytes.
501
502 // Sanity check.
503 if (debuglink_len != debuglink_size) {
504 fprintf(stderr, "Mismatched .gnu_debuglink string / section size: "
505 "%zx %zx\n", debuglink_len, debuglink_size);
506 return string();
507 }
508
509 char obj_file_abspath[PATH_MAX];
510 if (!realpath(obj_file.c_str(), obj_file_abspath)) {
511 fprintf(stderr, "Cannot resolve absolute path for %s\n", obj_file.c_str());
512 return string();
513 }
514
515 std::vector<string> searched_paths;
516 string debuglink_path;
517 std::vector<string>::const_iterator it;
518 for (it = debug_dirs.begin(); it < debug_dirs.end(); ++it) {
519 const string& debug_dir = *it;
520 debuglink_path = debug_dir + "/" +
521 reinterpret_cast<const char*>(debuglink);
522
523 // There is the annoying case of /path/to/foo.so having foo.so as the
524 // debug link file name. Thus this may end up opening /path/to/foo.so again,
525 // and there is a small chance of the two files having the same CRC.
526 if (IsSameFile(obj_file_abspath, debuglink_path))
527 continue;
528
529 searched_paths.push_back(debug_dir);
530 int debuglink_fd = open(debuglink_path.c_str(), O_RDONLY);
531 if (debuglink_fd < 0)
532 continue;
533
534 FDWrapper debuglink_fd_wrapper(debuglink_fd);
535
536 // The CRC is the last 4 bytes in |debuglink|.
537 const google_breakpad::Endianness endianness = big_endian ?
538 google_breakpad::ENDIANNESS_BIG : google_breakpad::ENDIANNESS_LITTLE;
539 google_breakpad::ByteReader byte_reader(endianness);
540 uint32_t expected_crc =
541 byte_reader.ReadFourBytes(&debuglink[debuglink_size - 4]);
542
543 uint32_t actual_crc = 0;
544 while (true) {
545 const size_t kReadSize = 4096;
546 char buf[kReadSize];
547 ssize_t bytes_read = HANDLE_EINTR(read(debuglink_fd, &buf, kReadSize));
548 if (bytes_read < 0) {
549 fprintf(stderr, "Error reading debug ELF file %s.\n",
550 debuglink_path.c_str());
551 return string();
552 }
553 if (bytes_read == 0)
554 break;
555 actual_crc = google_breakpad::UpdateCrc32(actual_crc, buf, bytes_read);
556 }
557 if (actual_crc != expected_crc) {
558 fprintf(stderr, "Error reading debug ELF file - CRC32 mismatch: %s\n",
559 debuglink_path.c_str());
560 continue;
561 }
562
563 // Found debug file.
564 return debuglink_path;
565 }
566
567 // Not found case.
568 fprintf(stderr, "Failed to find debug ELF file for '%s' after trying:\n",
569 obj_file.c_str());
570 for (it = searched_paths.begin(); it < searched_paths.end(); ++it) {
571 const string& debug_dir = *it;
572 fprintf(stderr, " %s/%s\n", debug_dir.c_str(), debuglink);
573 }
574 return string();
575}
576
577//
578// LoadSymbolsInfo
579//
580// Holds the state between the two calls to LoadSymbols() in case it's necessary
581// to follow the .gnu_debuglink section and load debug information from a
582// different file.
583//
584template<typename ElfClass>
585class LoadSymbolsInfo {
586 public:
587 typedef typename ElfClass::Addr Addr;
588
589 explicit LoadSymbolsInfo(const std::vector<string>& dbg_dirs) :
590 debug_dirs_(dbg_dirs),
591 has_loading_addr_(false) {}
592
593 // Keeps track of which sections have been loaded so sections don't
594 // accidentally get loaded twice from two different files.
595 void LoadedSection(const string& section) {
596 if (loaded_sections_.count(section) == 0) {
597 loaded_sections_.insert(section);
598 } else {
599 fprintf(stderr, "Section %s has already been loaded.\n",
600 section.c_str());
601 }
602 }
603
604 // The ELF file and linked debug file are expected to have the same preferred
605 // loading address.
606 void set_loading_addr(Addr addr, const string& filename) {
607 if (!has_loading_addr_) {
608 loading_addr_ = addr;
609 loaded_file_ = filename;
610 return;
611 }
612
613 if (addr != loading_addr_) {
614 fprintf(stderr,
615 "ELF file '%s' and debug ELF file '%s' "
616 "have different load addresses.\n",
617 loaded_file_.c_str(), filename.c_str());
618 assert(false);
619 }
620 }
621
622 // Setters and getters
623 const std::vector<string>& debug_dirs() const {
624 return debug_dirs_;
625 }
626
627 string debuglink_file() const {
628 return debuglink_file_;
629 }
630 void set_debuglink_file(string file) {
631 debuglink_file_ = file;
632 }
633
634 private:
635 const std::vector<string>& debug_dirs_; // Directories in which to
636 // search for the debug ELF file.
637
638 string debuglink_file_; // Full path to the debug ELF file.
639
640 bool has_loading_addr_; // Indicate if LOADING_ADDR_ is valid.
641
642 Addr loading_addr_; // Saves the preferred loading address from the
643 // first call to LoadSymbols().
644
645 string loaded_file_; // Name of the file loaded from the first call to
646 // LoadSymbols().
647
648 std::set<string> loaded_sections_; // Tracks the Loaded ELF sections
649 // between calls to LoadSymbols().
650};
651
652template<typename ElfClass>
653bool LoadSymbols(const string& obj_file,
654 const bool big_endian,
655 const typename ElfClass::Ehdr* elf_header,
656 const bool read_gnu_debug_link,
657 LoadSymbolsInfo<ElfClass>* info,
658 const DumpOptions& options,
659 Module* module) {
660 typedef typename ElfClass::Addr Addr;
661 typedef typename ElfClass::Phdr Phdr;
662 typedef typename ElfClass::Shdr Shdr;
663
664 Addr loading_addr = GetLoadingAddress<ElfClass>(
665 GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
666 elf_header->e_phnum);
667 module->SetLoadAddress(loading_addr);
668 info->set_loading_addr(loading_addr, obj_file);
669
670 // Allow filtering of extraneous debug information in partitioned libraries.
671 // Such libraries contain debug information for all libraries extracted from
672 // the same combined library, implying extensive duplication.
673 vector<Module::Range> address_ranges = GetPtLoadSegmentRanges<ElfClass>(
674 GetOffset<ElfClass, Phdr>(elf_header, elf_header->e_phoff),
675 elf_header->e_phnum);
676 module->SetAddressRanges(address_ranges);
677
678 const Shdr* sections =
679 GetOffset<ElfClass, Shdr>(elf_header, elf_header->e_shoff);
680 const Shdr* section_names = sections + elf_header->e_shstrndx;
681 const char* names =
682 GetOffset<ElfClass, char>(elf_header, section_names->sh_offset);
683 const char* names_end = names + section_names->sh_size;
684 bool found_debug_info_section = false;
685 bool found_usable_info = false;
686
687 if ((options.symbol_data & SYMBOLS_AND_FILES) ||
688 (options.symbol_data & INLINES)) {
689#ifndef NO_STABS_SUPPORT
690 // Look for STABS debugging information, and load it if present.
691 const Shdr* stab_section =
692 FindElfSectionByName<ElfClass>(".stab", SHT_PROGBITS,
693 sections, names, names_end,
694 elf_header->e_shnum);
695 if (stab_section) {
696 const Shdr* stabstr_section = stab_section->sh_link + sections;
697 if (stabstr_section) {
698 found_debug_info_section = true;
699 found_usable_info = true;
700 info->LoadedSection(".stab");
701 if (!LoadStabs<ElfClass>(elf_header, stab_section, stabstr_section,
702 big_endian, module)) {
703 fprintf(stderr, "%s: \".stab\" section found, but failed to load"
704 " STABS debugging information\n", obj_file.c_str());
705 }
706 }
707 }
708#endif // NO_STABS_SUPPORT
709
710 // See if there are export symbols available.
711 const Shdr* symtab_section =
712 FindElfSectionByName<ElfClass>(".symtab", SHT_SYMTAB,
713 sections, names, names_end,
714 elf_header->e_shnum);
715 const Shdr* strtab_section =
716 FindElfSectionByName<ElfClass>(".strtab", SHT_STRTAB,
717 sections, names, names_end,
718 elf_header->e_shnum);
719 if (symtab_section && strtab_section) {
720 info->LoadedSection(".symtab");
721
722 const uint8_t* symtab =
723 GetOffset<ElfClass, uint8_t>(elf_header,
724 symtab_section->sh_offset);
725 const uint8_t* strtab =
726 GetOffset<ElfClass, uint8_t>(elf_header,
727 strtab_section->sh_offset);
728 bool result =
729 ELFSymbolsToModule(symtab,
730 symtab_section->sh_size,
731 strtab,
732 strtab_section->sh_size,
733 big_endian,
734 ElfClass::kAddrSize,
735 module);
736 found_usable_info = found_usable_info || result;
737 } else {
738 // Look in dynsym only if full symbol table was not available.
739 const Shdr* dynsym_section =
740 FindElfSectionByName<ElfClass>(".dynsym", SHT_DYNSYM,
741 sections, names, names_end,
742 elf_header->e_shnum);
743 const Shdr* dynstr_section =
744 FindElfSectionByName<ElfClass>(".dynstr", SHT_STRTAB,
745 sections, names, names_end,
746 elf_header->e_shnum);
747 if (dynsym_section && dynstr_section) {
748 info->LoadedSection(".dynsym");
749
750 const uint8_t* dynsyms =
751 GetOffset<ElfClass, uint8_t>(elf_header,
752 dynsym_section->sh_offset);
753 const uint8_t* dynstrs =
754 GetOffset<ElfClass, uint8_t>(elf_header,
755 dynstr_section->sh_offset);
756 bool result =
757 ELFSymbolsToModule(dynsyms,
758 dynsym_section->sh_size,
759 dynstrs,
760 dynstr_section->sh_size,
761 big_endian,
762 ElfClass::kAddrSize,
763 module);
764 found_usable_info = found_usable_info || result;
765 }
766 }
767
768 // Only Load .debug_info after loading symbol table to avoid duplicate
769 // PUBLIC records.
770 // Look for DWARF debugging information, and load it if present.
771 const Shdr* dwarf_section =
772 FindElfSectionByName<ElfClass>(".debug_info", SHT_PROGBITS,
773 sections, names, names_end,
774 elf_header->e_shnum);
775
776 // .debug_info section type is SHT_PROGBITS for mips on pnacl toolchains,
777 // but MIPS_DWARF for regular gnu toolchains, so both need to be checked
778 if (elf_header->e_machine == EM_MIPS && !dwarf_section) {
779 dwarf_section =
780 FindElfSectionByName<ElfClass>(".debug_info", SHT_MIPS_DWARF,
781 sections, names, names_end,
782 elf_header->e_shnum);
783 }
784
785 if (dwarf_section) {
786 found_debug_info_section = true;
787 found_usable_info = true;
788 info->LoadedSection(".debug_info");
789 if (!LoadDwarf<ElfClass>(obj_file, elf_header, big_endian,
790 options.handle_inter_cu_refs,
791 options.symbol_data & INLINES, module)) {
792 fprintf(stderr, "%s: \".debug_info\" section found, but failed to load "
793 "DWARF debugging information\n", obj_file.c_str());
794 }
795 }
796 }
797
798 if (options.symbol_data & CFI) {
799 // Dwarf Call Frame Information (CFI) is actually independent from
800 // the other DWARF debugging information, and can be used alone.
801 const Shdr* dwarf_cfi_section =
802 FindElfSectionByName<ElfClass>(".debug_frame", SHT_PROGBITS,
803 sections, names, names_end,
804 elf_header->e_shnum);
805
806 // .debug_frame section type is SHT_PROGBITS for mips on pnacl toolchains,
807 // but MIPS_DWARF for regular gnu toolchains, so both need to be checked
808 if (elf_header->e_machine == EM_MIPS && !dwarf_cfi_section) {
809 dwarf_cfi_section =
810 FindElfSectionByName<ElfClass>(".debug_frame", SHT_MIPS_DWARF,
811 sections, names, names_end,
812 elf_header->e_shnum);
813 }
814
815 if (dwarf_cfi_section) {
816 // Ignore the return value of this function; even without call frame
817 // information, the other debugging information could be perfectly
818 // useful.
819 info->LoadedSection(".debug_frame");
820 bool result =
821 LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".debug_frame",
822 dwarf_cfi_section, false, 0, 0, big_endian,
823 module);
824 found_usable_info = found_usable_info || result;
825 }
826
827 // Linux C++ exception handling information can also provide
828 // unwinding data.
829 const Shdr* eh_frame_section =
830 FindElfSectionByName<ElfClass>(".eh_frame", SHT_PROGBITS,
831 sections, names, names_end,
832 elf_header->e_shnum);
833 if (eh_frame_section) {
834 // Pointers in .eh_frame data may be relative to the base addresses of
835 // certain sections. Provide those sections if present.
836 const Shdr* got_section =
837 FindElfSectionByName<ElfClass>(".got", SHT_PROGBITS,
838 sections, names, names_end,
839 elf_header->e_shnum);
840 const Shdr* text_section =
841 FindElfSectionByName<ElfClass>(".text", SHT_PROGBITS,
842 sections, names, names_end,
843 elf_header->e_shnum);
844 info->LoadedSection(".eh_frame");
845 // As above, ignore the return value of this function.
846 bool result =
847 LoadDwarfCFI<ElfClass>(obj_file, elf_header, ".eh_frame",
848 eh_frame_section, true,
849 got_section, text_section, big_endian, module);
850 found_usable_info = found_usable_info || result;
851 }
852 }
853
854 if (!found_debug_info_section) {
855 fprintf(stderr, "%s: file contains no debugging information"
856 " (no \".stab\" or \".debug_info\" sections)\n",
857 obj_file.c_str());
858
859 // Failed, but maybe there's a .gnu_debuglink section?
860 if (read_gnu_debug_link) {
861 const Shdr* gnu_debuglink_section
862 = FindElfSectionByName<ElfClass>(".gnu_debuglink", SHT_PROGBITS,
863 sections, names,
864 names_end, elf_header->e_shnum);
865 if (gnu_debuglink_section) {
866 if (!info->debug_dirs().empty()) {
867 const uint8_t* debuglink_contents =
868 GetOffset<ElfClass, uint8_t>(elf_header,
869 gnu_debuglink_section->sh_offset);
870 string debuglink_file =
871 ReadDebugLink(debuglink_contents,
872 gnu_debuglink_section->sh_size,
873 big_endian,
874 obj_file,
875 info->debug_dirs());
876 info->set_debuglink_file(debuglink_file);
877 } else {
878 fprintf(stderr, ".gnu_debuglink section found in '%s', "
879 "but no debug path specified.\n", obj_file.c_str());
880 }
881 } else {
882 fprintf(stderr, "%s does not contain a .gnu_debuglink section.\n",
883 obj_file.c_str());
884 }
885 } else {
886 // Return true if some usable information was found, since the caller
887 // doesn't want to use .gnu_debuglink.
888 return found_usable_info;
889 }
890
891 // No debug info was found, let the user try again with .gnu_debuglink
892 // if present.
893 return false;
894 }
895
896 return true;
897}
898
899// Return the breakpad symbol file identifier for the architecture of
900// ELF_HEADER.
901template<typename ElfClass>
902const char* ElfArchitecture(const typename ElfClass::Ehdr* elf_header) {
903 typedef typename ElfClass::Half Half;
904 Half arch = elf_header->e_machine;
905 switch (arch) {
906 case EM_386: return "x86";
907 case EM_ARM: return "arm";
908 case EM_AARCH64: return "arm64";
909 case EM_MIPS: return "mips";
910 case EM_PPC64: return "ppc64";
911 case EM_PPC: return "ppc";
912 case EM_S390: return "s390";
913 case EM_SPARC: return "sparc";
914 case EM_SPARCV9: return "sparcv9";
915 case EM_X86_64: return "x86_64";
916 default: return NULL;
917 }
918}
919
920template<typename ElfClass>
921bool SanitizeDebugFile(const typename ElfClass::Ehdr* debug_elf_header,
922 const string& debuglink_file,
923 const string& obj_filename,
924 const char* obj_file_architecture,
925 const bool obj_file_is_big_endian) {
926 const char* debug_architecture =
927 ElfArchitecture<ElfClass>(debug_elf_header);
928 if (!debug_architecture) {
929 fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
930 debuglink_file.c_str(), debug_elf_header->e_machine);
931 return false;
932 }
933 if (strcmp(obj_file_architecture, debug_architecture)) {
934 fprintf(stderr, "%s with ELF machine architecture %s does not match "
935 "%s with ELF architecture %s\n",
936 debuglink_file.c_str(), debug_architecture,
937 obj_filename.c_str(), obj_file_architecture);
938 return false;
939 }
940 bool debug_big_endian;
941 if (!ElfEndianness<ElfClass>(debug_elf_header, &debug_big_endian))
942 return false;
943 if (debug_big_endian != obj_file_is_big_endian) {
944 fprintf(stderr, "%s and %s does not match in endianness\n",
945 obj_filename.c_str(), debuglink_file.c_str());
946 return false;
947 }
948 return true;
949}
950
951template<typename ElfClass>
952bool InitModuleForElfClass(const typename ElfClass::Ehdr* elf_header,
953 const string& obj_filename,
954 const string& obj_os,
955 scoped_ptr<Module>& module) {
956 PageAllocator allocator;
957 wasteful_vector<uint8_t> identifier(&allocator, kDefaultBuildIdSize);
958 if (!FileID::ElfFileIdentifierFromMappedFile(elf_header, identifier)) {
959 fprintf(stderr, "%s: unable to generate file identifier\n",
960 obj_filename.c_str());
961 return false;
962 }
963
964 const char* architecture = ElfArchitecture<ElfClass>(elf_header);
965 if (!architecture) {
966 fprintf(stderr, "%s: unrecognized ELF machine architecture: %d\n",
967 obj_filename.c_str(), elf_header->e_machine);
968 return false;
969 }
970
971 char name_buf[NAME_MAX] = {};
972 std::string name = google_breakpad::ElfFileSoNameFromMappedFile(
973 elf_header, name_buf, sizeof(name_buf))
974 ? name_buf
975 : google_breakpad::BaseName(obj_filename);
976
977 // Add an extra "0" at the end. PDB files on Windows have an 'age'
978 // number appended to the end of the file identifier; this isn't
979 // really used or necessary on other platforms, but be consistent.
980 string id = FileID::ConvertIdentifierToUUIDString(identifier) + "0";
981 // This is just the raw Build ID in hex.
982 string code_id = FileID::ConvertIdentifierToString(identifier);
983
984 module.reset(new Module(name, obj_os, architecture, id, code_id));
985
986 return true;
987}
988
989template<typename ElfClass>
990bool ReadSymbolDataElfClass(const typename ElfClass::Ehdr* elf_header,
991 const string& obj_filename,
992 const string& obj_os,
993 const std::vector<string>& debug_dirs,
994 const DumpOptions& options,
995 Module** out_module) {
996 typedef typename ElfClass::Ehdr Ehdr;
997
998 *out_module = NULL;
999
1000 scoped_ptr<Module> module;
1001 if (!InitModuleForElfClass<ElfClass>(elf_header, obj_filename, obj_os,
1002 module)) {
1003 return false;
1004 }
1005
1006 // Figure out what endianness this file is.
1007 bool big_endian;
1008 if (!ElfEndianness<ElfClass>(elf_header, &big_endian))
1009 return false;
1010
1011 LoadSymbolsInfo<ElfClass> info(debug_dirs);
1012 if (!LoadSymbols<ElfClass>(obj_filename, big_endian, elf_header,
1013 !debug_dirs.empty(), &info,
1014 options, module.get())) {
1015 const string debuglink_file = info.debuglink_file();
1016 if (debuglink_file.empty())
1017 return false;
1018
1019 // Load debuglink ELF file.
1020 fprintf(stderr, "Found debugging info in %s\n", debuglink_file.c_str());
1021 MmapWrapper debug_map_wrapper;
1022 Ehdr* debug_elf_header = NULL;
1023 if (!LoadELF(debuglink_file, &debug_map_wrapper,
1024 reinterpret_cast<void**>(&debug_elf_header)) ||
1025 !SanitizeDebugFile<ElfClass>(debug_elf_header, debuglink_file,
1026 obj_filename,
1027 module->architecture().c_str(),
1028 big_endian)) {
1029 return false;
1030 }
1031
1032 if (!LoadSymbols<ElfClass>(debuglink_file, big_endian,
1033 debug_elf_header, false, &info,
1034 options, module.get())) {
1035 return false;
1036 }
1037 }
1038
1039 *out_module = module.release();
1040 return true;
1041}
1042
1043} // namespace
1044
1045namespace google_breakpad {
1046
1047// Not explicitly exported, but not static so it can be used in unit tests.
1048bool ReadSymbolDataInternal(const uint8_t* obj_file,
1049 const string& obj_filename,
1050 const string& obj_os,
1051 const std::vector<string>& debug_dirs,
1052 const DumpOptions& options,
1053 Module** module) {
1054 if (!IsValidElf(obj_file)) {
1055 fprintf(stderr, "Not a valid ELF file: %s\n", obj_filename.c_str());
1056 return false;
1057 }
1058
1059 int elfclass = ElfClass(obj_file);
1060 if (elfclass == ELFCLASS32) {
1061 return ReadSymbolDataElfClass<ElfClass32>(
1062 reinterpret_cast<const Elf32_Ehdr*>(obj_file), obj_filename, obj_os,
1063 debug_dirs, options, module);
1064 }
1065 if (elfclass == ELFCLASS64) {
1066 return ReadSymbolDataElfClass<ElfClass64>(
1067 reinterpret_cast<const Elf64_Ehdr*>(obj_file), obj_filename, obj_os,
1068 debug_dirs, options, module);
1069 }
1070
1071 return false;
1072}
1073
1074bool WriteSymbolFile(const string& load_path,
1075 const string& obj_file,
1076 const string& obj_os,
1077 const std::vector<string>& debug_dirs,
1078 const DumpOptions& options,
1079 std::ostream& sym_stream) {
1080 Module* module;
1081 if (!ReadSymbolData(load_path, obj_file, obj_os, debug_dirs, options,
1082 &module))
1083 return false;
1084
1085 bool result = module->Write(sym_stream, options.symbol_data);
1086 delete module;
1087 return result;
1088}
1089
1090// Read the selected object file's debugging information, and write out the
1091// header only to |stream|. Return true on success; if an error occurs, report
1092// it and return false.
1093bool WriteSymbolFileHeader(const string& load_path,
1094 const string& obj_file,
1095 const string& obj_os,
1096 std::ostream& sym_stream) {
1097 MmapWrapper map_wrapper;
1098 void* elf_header = NULL;
1099 if (!LoadELF(load_path, &map_wrapper, &elf_header)) {
1100 fprintf(stderr, "Could not load ELF file: %s\n", obj_file.c_str());
1101 return false;
1102 }
1103
1104 if (!IsValidElf(elf_header)) {
1105 fprintf(stderr, "Not a valid ELF file: %s\n", obj_file.c_str());
1106 return false;
1107 }
1108
1109 int elfclass = ElfClass(elf_header);
1110 scoped_ptr<Module> module;
1111 if (elfclass == ELFCLASS32) {
1112 if (!InitModuleForElfClass<ElfClass32>(
1113 reinterpret_cast<const Elf32_Ehdr*>(elf_header), obj_file, obj_os,
1114 module)) {
1115 fprintf(stderr, "Failed to load ELF module: %s\n", obj_file.c_str());
1116 return false;
1117 }
1118 } else if (elfclass == ELFCLASS64) {
1119 if (!InitModuleForElfClass<ElfClass64>(
1120 reinterpret_cast<const Elf64_Ehdr*>(elf_header), obj_file, obj_os,
1121 module)) {
1122 fprintf(stderr, "Failed to load ELF module: %s\n", obj_file.c_str());
1123 return false;
1124 }
1125 } else {
1126 fprintf(stderr, "Unsupported module file: %s\n", obj_file.c_str());
1127 return false;
1128 }
1129
1130 return module->Write(sym_stream, ALL_SYMBOL_DATA);
1131}
1132
1133bool ReadSymbolData(const string& load_path,
1134 const string& obj_file,
1135 const string& obj_os,
1136 const std::vector<string>& debug_dirs,
1137 const DumpOptions& options,
1138 Module** module) {
1139 MmapWrapper map_wrapper;
1140 void* elf_header = NULL;
1141 if (!LoadELF(load_path, &map_wrapper, &elf_header))
1142 return false;
1143
1144 return ReadSymbolDataInternal(reinterpret_cast<uint8_t*>(elf_header),
1145 obj_file, obj_os, debug_dirs, options, module);
1146}
1147
1148} // namespace google_breakpad
1149