1 | // Copyright 2005 Google Inc. All Rights Reserved. |
2 | // Author: chatham@google.com (Andrew Chatham) |
3 | // Author: satorux@google.com (Satoru Takabayashi) |
4 | // |
5 | // Code for reading in ELF files. |
6 | // |
7 | // For information on the ELF format, see |
8 | // http://www.x86.org/ftp/manuals/tools/elf.pdf |
9 | // |
10 | // I also liked: |
11 | // http://www.caldera.com/developers/gabi/1998-04-29/contents.html |
12 | // |
13 | // A note about types: When dealing with the file format, we use types |
14 | // like Elf32_Word, but in the public interfaces we treat all |
15 | // addresses as uint64. As a result, we should be able to symbolize |
16 | // 64-bit binaries from a 32-bit process (which we don't do, |
17 | // anyway). size_t should therefore be avoided, except where required |
18 | // by things like mmap(). |
19 | // |
20 | // Although most of this code can deal with arbitrary ELF files of |
21 | // either word size, the public ElfReader interface only examines |
22 | // files loaded into the current address space, which must all match |
23 | // the machine's native word size. This code cannot handle ELF files |
24 | // with a non-native byte ordering. |
25 | // |
26 | // TODO(chatham): It would be nice if we could accomplish this task |
27 | // without using malloc(), so we could use it as the process is dying. |
28 | |
29 | #ifndef _GNU_SOURCE |
30 | #define _GNU_SOURCE // needed for pread() |
31 | #endif |
32 | |
33 | #include <fcntl.h> |
34 | #include <limits.h> |
35 | #include <string.h> |
36 | #include <sys/mman.h> |
37 | #include <sys/stat.h> |
38 | #include <sys/types.h> |
39 | #include <unistd.h> |
40 | |
41 | #include <algorithm> |
42 | #include <map> |
43 | #include <string> |
44 | #include <vector> |
45 | // TODO(saugustine): Add support for compressed debug. |
46 | // Also need to add configure tests for zlib. |
47 | //#include "zlib.h" |
48 | |
49 | #include "third_party/musl/include/elf.h" |
50 | #include "elf_reader.h" |
51 | #include "common/using_std_string.h" |
52 | |
53 | // EM_AARCH64 is not defined by elf.h of GRTE v3 on x86. |
54 | // TODO(dougkwan): Remove this when v17 is retired. |
55 | #if !defined(EM_AARCH64) |
56 | #define EM_AARCH64 183 /* ARM AARCH64 */ |
57 | #endif |
58 | |
59 | // Map Linux macros to their Apple equivalents. |
60 | #if __APPLE__ |
61 | #ifndef __LITTLE_ENDIAN |
62 | #define __LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__ |
63 | #endif // __LITTLE_ENDIAN |
64 | #ifndef __BIG_ENDIAN |
65 | #define __BIG_ENDIAN __ORDER_BIG_ENDIAN__ |
66 | #endif // __BIG_ENDIAN |
67 | #ifndef __BYTE_ORDER |
68 | #define __BYTE_ORDER __BYTE_ORDER__ |
69 | #endif // __BYTE_ORDER |
70 | #endif // __APPLE__ |
71 | |
72 | // TODO(dthomson): Can be removed once all Java code is using the Google3 |
73 | // launcher. We need to avoid processing PLT functions as it causes memory |
74 | // fragmentation in malloc, which is fixed in tcmalloc - and if the Google3 |
75 | // launcher is used the JVM will then use tcmalloc. b/13735638 |
76 | //DEFINE_bool(elfreader_process_dynsyms, true, |
77 | // "Activate PLT function processing"); |
78 | |
79 | using std::vector; |
80 | |
81 | namespace { |
82 | |
83 | // The lowest bit of an ARM symbol value is used to indicate a Thumb address. |
84 | const int kARMThumbBitOffset = 0; |
85 | |
86 | // Converts an ARM Thumb symbol value to a true aligned address value. |
87 | template <typename T> |
88 | T AdjustARMThumbSymbolValue(const T& symbol_table_value) { |
89 | return symbol_table_value & ~(1 << kARMThumbBitOffset); |
90 | } |
91 | |
92 | // Names of PLT-related sections. |
93 | const char kElfPLTRelSectionName[] = ".rel.plt" ; // Use Rel struct. |
94 | const char kElfPLTRelaSectionName[] = ".rela.plt" ; // Use Rela struct. |
95 | const char kElfPLTSectionName[] = ".plt" ; |
96 | const char kElfDynSymSectionName[] = ".dynsym" ; |
97 | |
98 | const int kX86PLTCodeSize = 0x10; // Size of one x86 PLT function in bytes. |
99 | const int kARMPLTCodeSize = 0xc; |
100 | const int kAARCH64PLTCodeSize = 0x10; |
101 | |
102 | const int kX86PLT0Size = 0x10; // Size of the special PLT0 entry. |
103 | const int kARMPLT0Size = 0x14; |
104 | const int kAARCH64PLT0Size = 0x20; |
105 | |
106 | // Suffix for PLT functions when it needs to be explicitly identified as such. |
107 | const char kPLTFunctionSuffix[] = "@plt" ; |
108 | |
109 | } // namespace |
110 | |
111 | namespace google_breakpad { |
112 | |
113 | template <class ElfArch> class ElfReaderImpl; |
114 | |
115 | // 32-bit and 64-bit ELF files are processed exactly the same, except |
116 | // for various field sizes. Elf32 and Elf64 encompass all of the |
117 | // differences between the two formats, and all format-specific code |
118 | // in this file is templated on one of them. |
119 | class Elf32 { |
120 | public: |
121 | typedef Elf32_Ehdr Ehdr; |
122 | typedef Elf32_Shdr Shdr; |
123 | typedef Elf32_Phdr Phdr; |
124 | typedef Elf32_Word Word; |
125 | typedef Elf32_Sym Sym; |
126 | typedef Elf32_Rel Rel; |
127 | typedef Elf32_Rela Rela; |
128 | |
129 | // What should be in the EI_CLASS header. |
130 | static const int kElfClass = ELFCLASS32; |
131 | |
132 | // Given a symbol pointer, return the binding type (eg STB_WEAK). |
133 | static char Bind(const Elf32_Sym* sym) { |
134 | return ELF32_ST_BIND(sym->st_info); |
135 | } |
136 | // Given a symbol pointer, return the symbol type (eg STT_FUNC). |
137 | static char Type(const Elf32_Sym* sym) { |
138 | return ELF32_ST_TYPE(sym->st_info); |
139 | } |
140 | |
141 | // Extract the symbol index from the r_info field of a relocation. |
142 | static int r_sym(const Elf32_Word r_info) { |
143 | return ELF32_R_SYM(r_info); |
144 | } |
145 | }; |
146 | |
147 | |
148 | class Elf64 { |
149 | public: |
150 | typedef Elf64_Ehdr Ehdr; |
151 | typedef Elf64_Shdr Shdr; |
152 | typedef Elf64_Phdr Phdr; |
153 | typedef Elf64_Word Word; |
154 | typedef Elf64_Sym Sym; |
155 | typedef Elf64_Rel Rel; |
156 | typedef Elf64_Rela Rela; |
157 | |
158 | // What should be in the EI_CLASS header. |
159 | static const int kElfClass = ELFCLASS64; |
160 | |
161 | static char Bind(const Elf64_Sym* sym) { |
162 | return ELF64_ST_BIND(sym->st_info); |
163 | } |
164 | static char Type(const Elf64_Sym* sym) { |
165 | return ELF64_ST_TYPE(sym->st_info); |
166 | } |
167 | static int r_sym(const Elf64_Xword r_info) { |
168 | return ELF64_R_SYM(r_info); |
169 | } |
170 | }; |
171 | |
172 | |
173 | // ElfSectionReader mmaps a section of an ELF file ("section" is ELF |
174 | // terminology). The ElfReaderImpl object providing the section header |
175 | // must exist for the lifetime of this object. |
176 | // |
177 | // The motivation for mmaping individual sections of the file is that |
178 | // many Google executables are large enough when unstripped that we |
179 | // have to worry about running out of virtual address space. |
180 | // |
181 | // For compressed sections we have no choice but to allocate memory. |
182 | template<class ElfArch> |
183 | class ElfSectionReader { |
184 | public: |
185 | ElfSectionReader(const char* name, const string& path, int fd, |
186 | const typename ElfArch::Shdr& ) |
187 | : contents_aligned_(NULL), |
188 | contents_(NULL), |
189 | header_(section_header) { |
190 | // Back up to the beginning of the page we're interested in. |
191 | const size_t additional = header_.sh_offset % getpagesize(); |
192 | const size_t offset_aligned = header_.sh_offset - additional; |
193 | section_size_ = header_.sh_size; |
194 | size_aligned_ = section_size_ + additional; |
195 | // If the section has been stripped or is empty, do not attempt |
196 | // to process its contents. |
197 | if (header_.sh_type == SHT_NOBITS || header_.sh_size == 0) |
198 | return; |
199 | contents_aligned_ = mmap(NULL, size_aligned_, PROT_READ, MAP_SHARED, |
200 | fd, offset_aligned); |
201 | // Set where the offset really should begin. |
202 | contents_ = reinterpret_cast<char*>(contents_aligned_) + |
203 | (header_.sh_offset - offset_aligned); |
204 | |
205 | // Check for and handle any compressed contents. |
206 | //if (strncmp(name, ".zdebug_", strlen(".zdebug_")) == 0) |
207 | // DecompressZlibContents(); |
208 | // TODO(saugustine): Add support for proposed elf-section flag |
209 | // "SHF_COMPRESS". |
210 | } |
211 | |
212 | ~ElfSectionReader() { |
213 | if (contents_aligned_ != NULL) |
214 | munmap(contents_aligned_, size_aligned_); |
215 | else |
216 | delete[] contents_; |
217 | } |
218 | |
219 | // Return the section header for this section. |
220 | typename ElfArch::Shdr const& () const { return header_; } |
221 | |
222 | // Return memory at the given offset within this section. |
223 | const char* GetOffset(typename ElfArch::Word bytes) const { |
224 | return contents_ + bytes; |
225 | } |
226 | |
227 | const char* contents() const { return contents_; } |
228 | size_t section_size() const { return section_size_; } |
229 | |
230 | private: |
231 | // page-aligned file contents |
232 | void* contents_aligned_; |
233 | // contents as usable by the client. For non-compressed sections, |
234 | // pointer within contents_aligned_ to where the section data |
235 | // begins; for compressed sections, pointer to the decompressed |
236 | // data. |
237 | char* contents_; |
238 | // size of contents_aligned_ |
239 | size_t size_aligned_; |
240 | // size of contents. |
241 | size_t section_size_; |
242 | const typename ElfArch::Shdr ; |
243 | }; |
244 | |
245 | // An iterator over symbols in a given section. It handles walking |
246 | // through the entries in the specified section and mapping symbol |
247 | // entries to their names in the appropriate string table (in |
248 | // another section). |
249 | template<class ElfArch> |
250 | class SymbolIterator { |
251 | public: |
252 | SymbolIterator(ElfReaderImpl<ElfArch>* reader, |
253 | typename ElfArch::Word section_type) |
254 | : symbol_section_(reader->GetSectionByType(section_type)), |
255 | string_section_(NULL), |
256 | num_symbols_in_section_(0), |
257 | symbol_within_section_(0) { |
258 | |
259 | // If this section type doesn't exist, leave |
260 | // num_symbols_in_section_ as zero, so this iterator is already |
261 | // done(). |
262 | if (symbol_section_ != NULL) { |
263 | num_symbols_in_section_ = symbol_section_->header().sh_size / |
264 | symbol_section_->header().sh_entsize; |
265 | |
266 | // Symbol sections have sh_link set to the section number of |
267 | // the string section containing the symbol names. |
268 | string_section_ = reader->GetSection(symbol_section_->header().sh_link); |
269 | } |
270 | } |
271 | |
272 | // Return true iff we have passed all symbols in this section. |
273 | bool done() const { |
274 | return symbol_within_section_ >= num_symbols_in_section_; |
275 | } |
276 | |
277 | // Advance to the next symbol in this section. |
278 | // REQUIRES: !done() |
279 | void Next() { ++symbol_within_section_; } |
280 | |
281 | // Return a pointer to the current symbol. |
282 | // REQUIRES: !done() |
283 | const typename ElfArch::Sym* GetSymbol() const { |
284 | return reinterpret_cast<const typename ElfArch::Sym*>( |
285 | symbol_section_->GetOffset(symbol_within_section_ * |
286 | symbol_section_->header().sh_entsize)); |
287 | } |
288 | |
289 | // Return the name of the current symbol, NULL if it has none. |
290 | // REQUIRES: !done() |
291 | const char* GetSymbolName() const { |
292 | int name_offset = GetSymbol()->st_name; |
293 | if (name_offset == 0) |
294 | return NULL; |
295 | return string_section_->GetOffset(name_offset); |
296 | } |
297 | |
298 | int GetCurrentSymbolIndex() const { |
299 | return symbol_within_section_; |
300 | } |
301 | |
302 | private: |
303 | const ElfSectionReader<ElfArch>* const symbol_section_; |
304 | const ElfSectionReader<ElfArch>* string_section_; |
305 | int num_symbols_in_section_; |
306 | int symbol_within_section_; |
307 | }; |
308 | |
309 | |
310 | // Copied from strings/strutil.h. Per chatham, |
311 | // this library should not depend on strings. |
312 | |
313 | static inline bool MyHasSuffixString(const string& str, const string& suffix) { |
314 | int len = str.length(); |
315 | int suflen = suffix.length(); |
316 | return (suflen <= len) && (str.compare(len-suflen, suflen, suffix) == 0); |
317 | } |
318 | |
319 | |
320 | // ElfReader loads an ELF binary and can provide information about its |
321 | // contents. It is most useful for matching addresses to function |
322 | // names. It does not understand debugging formats (eg dwarf2), so it |
323 | // can't print line numbers. It takes a path to an elf file and a |
324 | // readable file descriptor for that file, which it does not assume |
325 | // ownership of. |
326 | template<class ElfArch> |
327 | class ElfReaderImpl { |
328 | public: |
329 | explicit ElfReaderImpl(const string& path, int fd) |
330 | : path_(path), |
331 | fd_(fd), |
332 | section_headers_(NULL), |
333 | program_headers_(NULL), |
334 | opd_section_(NULL), |
335 | base_for_text_(0), |
336 | plts_supported_(false), |
337 | plt_code_size_(0), |
338 | plt0_size_(0), |
339 | visited_relocation_entries_(false) { |
340 | string error; |
341 | is_dwp_ = MyHasSuffixString(path, ".dwp" ); |
342 | ParseHeaders(fd, path); |
343 | // Currently we need some extra information for PowerPC64 binaries |
344 | // including a way to read the .opd section for function descriptors and a |
345 | // way to find the linked base for function symbols. |
346 | if (header_.e_machine == EM_PPC64) { |
347 | // "opd_section_" must always be checked for NULL before use. |
348 | opd_section_ = GetSectionInfoByName(".opd" , &opd_info_); |
349 | for (unsigned int k = 0u; k < GetNumSections(); ++k) { |
350 | const char* name = GetSectionName(section_headers_[k].sh_name); |
351 | if (strncmp(name, ".text" , strlen(".text" )) == 0) { |
352 | base_for_text_ = |
353 | section_headers_[k].sh_addr - section_headers_[k].sh_offset; |
354 | break; |
355 | } |
356 | } |
357 | } |
358 | // Turn on PLTs. |
359 | if (header_.e_machine == EM_386 || header_.e_machine == EM_X86_64) { |
360 | plt_code_size_ = kX86PLTCodeSize; |
361 | plt0_size_ = kX86PLT0Size; |
362 | plts_supported_ = true; |
363 | } else if (header_.e_machine == EM_ARM) { |
364 | plt_code_size_ = kARMPLTCodeSize; |
365 | plt0_size_ = kARMPLT0Size; |
366 | plts_supported_ = true; |
367 | } else if (header_.e_machine == EM_AARCH64) { |
368 | plt_code_size_ = kAARCH64PLTCodeSize; |
369 | plt0_size_ = kAARCH64PLT0Size; |
370 | plts_supported_ = true; |
371 | } |
372 | } |
373 | |
374 | ~ElfReaderImpl() { |
375 | for (unsigned int i = 0u; i < sections_.size(); ++i) |
376 | delete sections_[i]; |
377 | delete [] section_headers_; |
378 | delete [] program_headers_; |
379 | } |
380 | |
381 | // Examine the headers of the file and return whether the file looks |
382 | // like an ELF file for this architecture. Takes an already-open |
383 | // file descriptor for the candidate file, reading in the prologue |
384 | // to see if the ELF file appears to match the current |
385 | // architecture. If error is non-NULL, it will be set with a reason |
386 | // in case of failure. |
387 | static bool IsArchElfFile(int fd, string* error) { |
388 | unsigned char [EI_NIDENT]; |
389 | if (pread(fd, header, sizeof(header), 0) != sizeof(header)) { |
390 | if (error != NULL) *error = "Could not read header" ; |
391 | return false; |
392 | } |
393 | |
394 | if (memcmp(header, ELFMAG, SELFMAG) != 0) { |
395 | if (error != NULL) *error = "Missing ELF magic" ; |
396 | return false; |
397 | } |
398 | |
399 | if (header[EI_CLASS] != ElfArch::kElfClass) { |
400 | if (error != NULL) *error = "Different word size" ; |
401 | return false; |
402 | } |
403 | |
404 | int endian = 0; |
405 | if (header[EI_DATA] == ELFDATA2LSB) |
406 | endian = __LITTLE_ENDIAN; |
407 | else if (header[EI_DATA] == ELFDATA2MSB) |
408 | endian = __BIG_ENDIAN; |
409 | if (endian != __BYTE_ORDER) { |
410 | if (error != NULL) *error = "Different byte order" ; |
411 | return false; |
412 | } |
413 | |
414 | return true; |
415 | } |
416 | |
417 | // Return true if we can use this symbol in Address-to-Symbol map. |
418 | bool CanUseSymbol(const char* name, const typename ElfArch::Sym* sym) { |
419 | // For now we only save FUNC and NOTYPE symbols. For now we just |
420 | // care about functions, but some functions written in assembler |
421 | // don't have a proper ELF type attached to them, so we store |
422 | // NOTYPE symbols as well. The remaining significant type is |
423 | // OBJECT (eg global variables), which represent about 25% of |
424 | // the symbols in a typical google3 binary. |
425 | if (ElfArch::Type(sym) != STT_FUNC && |
426 | ElfArch::Type(sym) != STT_NOTYPE) { |
427 | return false; |
428 | } |
429 | |
430 | // Target specific filtering. |
431 | switch (header_.e_machine) { |
432 | case EM_AARCH64: |
433 | case EM_ARM: |
434 | // Filter out '$x' special local symbols used by tools |
435 | return name[0] != '$' || ElfArch::Bind(sym) != STB_LOCAL; |
436 | case EM_X86_64: |
437 | // Filter out read-only constants like .LC123. |
438 | return name[0] != '.' || ElfArch::Bind(sym) != STB_LOCAL; |
439 | default: |
440 | return true; |
441 | } |
442 | } |
443 | |
444 | // Iterate over the symbols in a section, either SHT_DYNSYM or |
445 | // SHT_SYMTAB. Add all symbols to the given SymbolMap. |
446 | /* |
447 | void GetSymbolPositions(SymbolMap* symbols, |
448 | typename ElfArch::Word section_type, |
449 | uint64_t mem_offset, |
450 | uint64_t file_offset) { |
451 | // This map is used to filter out "nested" functions. |
452 | // See comment below. |
453 | AddrToSymMap addr_to_sym_map; |
454 | for (SymbolIterator<ElfArch> it(this, section_type); |
455 | !it.done(); it.Next()) { |
456 | const char* name = it.GetSymbolName(); |
457 | if (name == NULL) |
458 | continue; |
459 | const typename ElfArch::Sym* sym = it.GetSymbol(); |
460 | if (CanUseSymbol(name, sym)) { |
461 | const int sec = sym->st_shndx; |
462 | |
463 | // We don't support special section indices. The most common |
464 | // is SHN_ABS, for absolute symbols used deep in the bowels of |
465 | // glibc. Also ignore any undefined symbols. |
466 | if (sec == SHN_UNDEF || |
467 | (sec >= SHN_LORESERVE && sec <= SHN_HIRESERVE)) { |
468 | continue; |
469 | } |
470 | |
471 | const typename ElfArch::Shdr& hdr = section_headers_[sec]; |
472 | |
473 | // Adjust for difference between where we expected to mmap |
474 | // this section, and where it was actually mmapped. |
475 | const int64_t expected_base = hdr.sh_addr - hdr.sh_offset; |
476 | const int64_t real_base = mem_offset - file_offset; |
477 | const int64_t adjust = real_base - expected_base; |
478 | |
479 | uint64_t start = sym->st_value + adjust; |
480 | |
481 | // Adjust function symbols for PowerPC64 by dereferencing and adjusting |
482 | // the function descriptor to get the function address. |
483 | if (header_.e_machine == EM_PPC64 && ElfArch::Type(sym) == STT_FUNC) { |
484 | const uint64_t opd_addr = |
485 | AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value); |
486 | // Only adjust the returned value if the function address was found. |
487 | if (opd_addr != sym->st_value) { |
488 | const int64_t adjust_function_symbols = |
489 | real_base - base_for_text_; |
490 | start = opd_addr + adjust_function_symbols; |
491 | } |
492 | } |
493 | |
494 | addr_to_sym_map.push_back(std::make_pair(start, sym)); |
495 | } |
496 | } |
497 | std::sort(addr_to_sym_map.begin(), addr_to_sym_map.end(), &AddrToSymSorter); |
498 | addr_to_sym_map.erase(std::unique(addr_to_sym_map.begin(), |
499 | addr_to_sym_map.end(), &AddrToSymEquals), |
500 | addr_to_sym_map.end()); |
501 | |
502 | // Squeeze out any "nested functions". |
503 | // Nested functions are not allowed in C, but libc plays tricks. |
504 | // |
505 | // For example, here is disassembly of /lib64/tls/libc-2.3.5.so: |
506 | // 0x00000000000aa380 <read+0>: cmpl $0x0,0x2781b9(%rip) |
507 | // 0x00000000000aa387 <read+7>: jne 0xaa39b <read+27> |
508 | // 0x00000000000aa389 <__read_nocancel+0>: mov $0x0,%rax |
509 | // 0x00000000000aa390 <__read_nocancel+7>: syscall |
510 | // 0x00000000000aa392 <__read_nocancel+9>: cmp $0xfffffffffffff001,%rax |
511 | // 0x00000000000aa398 <__read_nocancel+15>: jae 0xaa3ef <read+111> |
512 | // 0x00000000000aa39a <__read_nocancel+17>: retq |
513 | // 0x00000000000aa39b <read+27>: sub $0x28,%rsp |
514 | // 0x00000000000aa39f <read+31>: mov %rdi,0x8(%rsp) |
515 | // ... |
516 | // Without removing __read_nocancel, symbolizer will return NULL |
517 | // given e.g. 0xaa39f (because the lower bound is __read_nocancel, |
518 | // but 0xaa39f is beyond its end. |
519 | if (addr_to_sym_map.empty()) { |
520 | return; |
521 | } |
522 | const ElfSectionReader<ElfArch>* const symbol_section = |
523 | this->GetSectionByType(section_type); |
524 | const ElfSectionReader<ElfArch>* const string_section = |
525 | this->GetSection(symbol_section->header().sh_link); |
526 | |
527 | typename AddrToSymMap::iterator curr = addr_to_sym_map.begin(); |
528 | // Always insert the first symbol. |
529 | symbols->AddSymbol(string_section->GetOffset(curr->second->st_name), |
530 | curr->first, curr->second->st_size); |
531 | typename AddrToSymMap::iterator prev = curr++; |
532 | for (; curr != addr_to_sym_map.end(); ++curr) { |
533 | const uint64_t prev_addr = prev->first; |
534 | const uint64_t curr_addr = curr->first; |
535 | const typename ElfArch::Sym* const prev_sym = prev->second; |
536 | const typename ElfArch::Sym* const curr_sym = curr->second; |
537 | if (prev_addr + prev_sym->st_size <= curr_addr || |
538 | // The next condition is true if two symbols overlap like this: |
539 | // |
540 | // Previous symbol |----------------------------| |
541 | // Current symbol |-------------------------------| |
542 | // |
543 | // These symbols are not found in google3 codebase, but in |
544 | // jdk1.6.0_01_gg1/jre/lib/i386/server/libjvm.so. |
545 | // |
546 | // 0619e040 00000046 t CardTableModRefBS::write_region_work() |
547 | // 0619e070 00000046 t CardTableModRefBS::write_ref_array_work() |
548 | // |
549 | // We allow overlapped symbols rather than ignore these. |
550 | // Due to the way SymbolMap::GetSymbolAtPosition() works, |
551 | // lookup for any address in [curr_addr, curr_addr + its size) |
552 | // (e.g. 0619e071) will produce the current symbol, |
553 | // which is the desired outcome. |
554 | prev_addr + prev_sym->st_size < curr_addr + curr_sym->st_size) { |
555 | const char* name = string_section->GetOffset(curr_sym->st_name); |
556 | symbols->AddSymbol(name, curr_addr, curr_sym->st_size); |
557 | prev = curr; |
558 | } else { |
559 | // Current symbol is "nested" inside previous one like this: |
560 | // |
561 | // Previous symbol |----------------------------| |
562 | // Current symbol |---------------------| |
563 | // |
564 | // This happens within glibc, e.g. __read_nocancel is nested |
565 | // "inside" __read. Ignore "inner" symbol. |
566 | //DCHECK_LE(curr_addr + curr_sym->st_size, |
567 | // prev_addr + prev_sym->st_size); |
568 | ; |
569 | } |
570 | } |
571 | } |
572 | */ |
573 | |
574 | void VisitSymbols(typename ElfArch::Word section_type, |
575 | ElfReader::SymbolSink* sink) { |
576 | VisitSymbols(section_type, sink, -1, -1, false); |
577 | } |
578 | |
579 | void VisitSymbols(typename ElfArch::Word section_type, |
580 | ElfReader::SymbolSink* sink, |
581 | int symbol_binding, |
582 | int symbol_type, |
583 | bool get_raw_symbol_values) { |
584 | for (SymbolIterator<ElfArch> it(this, section_type); |
585 | !it.done(); it.Next()) { |
586 | const char* name = it.GetSymbolName(); |
587 | if (!name) continue; |
588 | const typename ElfArch::Sym* sym = it.GetSymbol(); |
589 | if ((symbol_binding < 0 || ElfArch::Bind(sym) == symbol_binding) && |
590 | (symbol_type < 0 || ElfArch::Type(sym) == symbol_type)) { |
591 | typename ElfArch::Sym symbol = *sym; |
592 | // Add a PLT symbol in addition to the main undefined symbol. |
593 | // Only do this for SHT_DYNSYM, because PLT symbols are dynamic. |
594 | int symbol_index = it.GetCurrentSymbolIndex(); |
595 | // TODO(dthomson): Can be removed once all Java code is using the |
596 | // Google3 launcher. |
597 | if (section_type == SHT_DYNSYM && |
598 | static_cast<unsigned int>(symbol_index) < symbols_plt_offsets_.size() && |
599 | symbols_plt_offsets_[symbol_index] != 0) { |
600 | string plt_name = string(name) + kPLTFunctionSuffix; |
601 | if (plt_function_names_[symbol_index].empty()) { |
602 | plt_function_names_[symbol_index] = plt_name; |
603 | } else if (plt_function_names_[symbol_index] != plt_name) { |
604 | ; |
605 | } |
606 | sink->AddSymbol(plt_function_names_[symbol_index].c_str(), |
607 | symbols_plt_offsets_[it.GetCurrentSymbolIndex()], |
608 | plt_code_size_); |
609 | } |
610 | if (!get_raw_symbol_values) |
611 | AdjustSymbolValue(&symbol); |
612 | sink->AddSymbol(name, symbol.st_value, symbol.st_size); |
613 | } |
614 | } |
615 | } |
616 | |
617 | void VisitRelocationEntries() { |
618 | if (visited_relocation_entries_) { |
619 | return; |
620 | } |
621 | visited_relocation_entries_ = true; |
622 | |
623 | if (!plts_supported_) { |
624 | return; |
625 | } |
626 | // First determine if PLTs exist. If not, then there is nothing to do. |
627 | ElfReader::SectionInfo plt_section_info; |
628 | const char* plt_section = |
629 | GetSectionInfoByName(kElfPLTSectionName, &plt_section_info); |
630 | if (!plt_section) { |
631 | return; |
632 | } |
633 | if (plt_section_info.size == 0) { |
634 | return; |
635 | } |
636 | |
637 | // The PLTs could be referenced by either a Rel or Rela (Rel with Addend) |
638 | // section. |
639 | ElfReader::SectionInfo rel_section_info; |
640 | ElfReader::SectionInfo rela_section_info; |
641 | const char* rel_section = |
642 | GetSectionInfoByName(kElfPLTRelSectionName, &rel_section_info); |
643 | const char* rela_section = |
644 | GetSectionInfoByName(kElfPLTRelaSectionName, &rela_section_info); |
645 | |
646 | const typename ElfArch::Rel* rel = |
647 | reinterpret_cast<const typename ElfArch::Rel*>(rel_section); |
648 | const typename ElfArch::Rela* rela = |
649 | reinterpret_cast<const typename ElfArch::Rela*>(rela_section); |
650 | |
651 | if (!rel_section && !rela_section) { |
652 | return; |
653 | } |
654 | |
655 | // Use either Rel or Rela section, depending on which one exists. |
656 | size_t section_size = rel_section ? rel_section_info.size |
657 | : rela_section_info.size; |
658 | size_t entry_size = rel_section ? sizeof(typename ElfArch::Rel) |
659 | : sizeof(typename ElfArch::Rela); |
660 | |
661 | // Determine the number of entries in the dynamic symbol table. |
662 | ElfReader::SectionInfo dynsym_section_info; |
663 | const char* dynsym_section = |
664 | GetSectionInfoByName(kElfDynSymSectionName, &dynsym_section_info); |
665 | // The dynsym section might not exist, or it might be empty. In either case |
666 | // there is nothing to be done so return. |
667 | if (!dynsym_section || dynsym_section_info.size == 0) { |
668 | return; |
669 | } |
670 | size_t num_dynamic_symbols = |
671 | dynsym_section_info.size / dynsym_section_info.entsize; |
672 | symbols_plt_offsets_.resize(num_dynamic_symbols, 0); |
673 | |
674 | // TODO(dthomson): Can be removed once all Java code is using the |
675 | // Google3 launcher. |
676 | // Make storage room for PLT function name strings. |
677 | plt_function_names_.resize(num_dynamic_symbols); |
678 | |
679 | for (size_t i = 0; i < section_size / entry_size; ++i) { |
680 | // Determine symbol index from the |r_info| field. |
681 | int sym_index = ElfArch::r_sym(rel_section ? rel[i].r_info |
682 | : rela[i].r_info); |
683 | if (static_cast<unsigned int>(sym_index) >= symbols_plt_offsets_.size()) { |
684 | continue; |
685 | } |
686 | symbols_plt_offsets_[sym_index] = |
687 | plt_section_info.addr + plt0_size_ + i * plt_code_size_; |
688 | } |
689 | } |
690 | |
691 | // Return an ElfSectionReader for the first section of the given |
692 | // type by iterating through all section headers. Returns NULL if |
693 | // the section type is not found. |
694 | const ElfSectionReader<ElfArch>* GetSectionByType( |
695 | typename ElfArch::Word section_type) { |
696 | for (unsigned int k = 0u; k < GetNumSections(); ++k) { |
697 | if (section_headers_[k].sh_type == section_type) { |
698 | return GetSection(k); |
699 | } |
700 | } |
701 | return NULL; |
702 | } |
703 | |
704 | // Return the name of section "shndx". Returns NULL if the section |
705 | // is not found. |
706 | const char* GetSectionNameByIndex(int shndx) { |
707 | return GetSectionName(section_headers_[shndx].sh_name); |
708 | } |
709 | |
710 | // Return a pointer to section "shndx", and store the size in |
711 | // "size". Returns NULL if the section is not found. |
712 | const char* GetSectionContentsByIndex(int shndx, size_t* size) { |
713 | const ElfSectionReader<ElfArch>* section = GetSection(shndx); |
714 | if (section != NULL) { |
715 | *size = section->section_size(); |
716 | return section->contents(); |
717 | } |
718 | return NULL; |
719 | } |
720 | |
721 | // Return a pointer to the first section of the given name by |
722 | // iterating through all section headers, and store the size in |
723 | // "size". Returns NULL if the section name is not found. |
724 | const char* GetSectionContentsByName(const string& section_name, |
725 | size_t* size) { |
726 | for (unsigned int k = 0u; k < GetNumSections(); ++k) { |
727 | // When searching for sections in a .dwp file, the sections |
728 | // we're looking for will always be at the end of the section |
729 | // table, so reverse the direction of iteration. |
730 | int shndx = is_dwp_ ? GetNumSections() - k - 1 : k; |
731 | const char* name = GetSectionName(section_headers_[shndx].sh_name); |
732 | if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) { |
733 | const ElfSectionReader<ElfArch>* section = GetSection(shndx); |
734 | if (section == NULL) { |
735 | return NULL; |
736 | } else { |
737 | *size = section->section_size(); |
738 | return section->contents(); |
739 | } |
740 | } |
741 | } |
742 | return NULL; |
743 | } |
744 | |
745 | // This is like GetSectionContentsByName() but it returns a lot of extra |
746 | // information about the section. |
747 | const char* GetSectionInfoByName(const string& section_name, |
748 | ElfReader::SectionInfo* info) { |
749 | for (unsigned int k = 0u; k < GetNumSections(); ++k) { |
750 | // When searching for sections in a .dwp file, the sections |
751 | // we're looking for will always be at the end of the section |
752 | // table, so reverse the direction of iteration. |
753 | int shndx = is_dwp_ ? GetNumSections() - k - 1 : k; |
754 | const char* name = GetSectionName(section_headers_[shndx].sh_name); |
755 | if (name != NULL && ElfReader::SectionNamesMatch(section_name, name)) { |
756 | const ElfSectionReader<ElfArch>* section = GetSection(shndx); |
757 | if (section == NULL) { |
758 | return NULL; |
759 | } else { |
760 | info->type = section->header().sh_type; |
761 | info->flags = section->header().sh_flags; |
762 | info->addr = section->header().sh_addr; |
763 | info->offset = section->header().sh_offset; |
764 | info->size = section->header().sh_size; |
765 | info->link = section->header().sh_link; |
766 | info->info = section->header().sh_info; |
767 | info->addralign = section->header().sh_addralign; |
768 | info->entsize = section->header().sh_entsize; |
769 | return section->contents(); |
770 | } |
771 | } |
772 | } |
773 | return NULL; |
774 | } |
775 | |
776 | // p_vaddr of the first PT_LOAD segment (if any), or 0 if no PT_LOAD |
777 | // segments are present. This is the address an ELF image was linked |
778 | // (by static linker) to be loaded at. Usually (but not always) 0 for |
779 | // shared libraries and position-independent executables. |
780 | uint64_t VaddrOfFirstLoadSegment() const { |
781 | // Relocatable objects (of type ET_REL) do not have LOAD segments. |
782 | if (header_.e_type == ET_REL) { |
783 | return 0; |
784 | } |
785 | for (int i = 0; i < GetNumProgramHeaders(); ++i) { |
786 | if (program_headers_[i].p_type == PT_LOAD) { |
787 | return program_headers_[i].p_vaddr; |
788 | } |
789 | } |
790 | return 0; |
791 | } |
792 | |
793 | // According to the LSB ("ELF special sections"), sections with debug |
794 | // info are prefixed by ".debug". The names are not specified, but they |
795 | // look like ".debug_line", ".debug_info", etc. |
796 | bool HasDebugSections() { |
797 | // Debug sections are likely to be near the end, so reverse the |
798 | // direction of iteration. |
799 | for (int k = GetNumSections() - 1; k >= 0; --k) { |
800 | const char* name = GetSectionName(section_headers_[k].sh_name); |
801 | if (strncmp(name, ".debug" , strlen(".debug" )) == 0) return true; |
802 | if (strncmp(name, ".zdebug" , strlen(".zdebug" )) == 0) return true; |
803 | } |
804 | return false; |
805 | } |
806 | |
807 | bool IsDynamicSharedObject() const { |
808 | return header_.e_type == ET_DYN; |
809 | } |
810 | |
811 | // Return the number of sections. |
812 | uint64_t GetNumSections() const { |
813 | if (HasManySections()) |
814 | return first_section_header_.sh_size; |
815 | return header_.e_shnum; |
816 | } |
817 | |
818 | private: |
819 | typedef vector<pair<uint64_t, const typename ElfArch::Sym*> > AddrToSymMap; |
820 | |
821 | static bool AddrToSymSorter(const typename AddrToSymMap::value_type& lhs, |
822 | const typename AddrToSymMap::value_type& rhs) { |
823 | return lhs.first < rhs.first; |
824 | } |
825 | |
826 | static bool AddrToSymEquals(const typename AddrToSymMap::value_type& lhs, |
827 | const typename AddrToSymMap::value_type& rhs) { |
828 | return lhs.first == rhs.first; |
829 | } |
830 | |
831 | // Does this ELF file have too many sections to fit in the program header? |
832 | bool HasManySections() const { |
833 | return header_.e_shnum == SHN_UNDEF; |
834 | } |
835 | |
836 | // Return the number of program headers. |
837 | int () const { |
838 | if (HasManySections() && header_.e_phnum == 0xffff && |
839 | first_section_header_.sh_info != 0) |
840 | return first_section_header_.sh_info; |
841 | return header_.e_phnum; |
842 | } |
843 | |
844 | // Return the index of the string table. |
845 | int GetStringTableIndex() const { |
846 | if (HasManySections()) { |
847 | if (header_.e_shstrndx == 0xffff) |
848 | return first_section_header_.sh_link; |
849 | else if (header_.e_shstrndx >= GetNumSections()) |
850 | return 0; |
851 | } |
852 | return header_.e_shstrndx; |
853 | } |
854 | |
855 | // Given an offset into the section header string table, return the |
856 | // section name. |
857 | const char* GetSectionName(typename ElfArch::Word sh_name) { |
858 | const ElfSectionReader<ElfArch>* shstrtab = |
859 | GetSection(GetStringTableIndex()); |
860 | if (shstrtab != NULL) { |
861 | return shstrtab->GetOffset(sh_name); |
862 | } |
863 | return NULL; |
864 | } |
865 | |
866 | // Return an ElfSectionReader for the given section. The reader will |
867 | // be freed when this object is destroyed. |
868 | const ElfSectionReader<ElfArch>* GetSection(int num) { |
869 | const char* name; |
870 | // Hard-coding the name for the section-name string table prevents |
871 | // infinite recursion. |
872 | if (num == GetStringTableIndex()) |
873 | name = ".shstrtab" ; |
874 | else |
875 | name = GetSectionNameByIndex(num); |
876 | ElfSectionReader<ElfArch>*& reader = sections_[num]; |
877 | if (reader == NULL) |
878 | reader = new ElfSectionReader<ElfArch>(name, path_, fd_, |
879 | section_headers_[num]); |
880 | return reader; |
881 | } |
882 | |
883 | // Parse out the overall header information from the file and assert |
884 | // that it looks sane. This contains information like the magic |
885 | // number and target architecture. |
886 | bool (int fd, const string& path) { |
887 | // Read in the global ELF header. |
888 | if (pread(fd, &header_, sizeof(header_), 0) != sizeof(header_)) { |
889 | return false; |
890 | } |
891 | |
892 | // Must be an executable, dynamic shared object or relocatable object |
893 | if (header_.e_type != ET_EXEC && |
894 | header_.e_type != ET_DYN && |
895 | header_.e_type != ET_REL) { |
896 | return false; |
897 | } |
898 | // Need a section header. |
899 | if (header_.e_shoff == 0) { |
900 | return false; |
901 | } |
902 | |
903 | if (header_.e_shnum == SHN_UNDEF) { |
904 | // The number of sections in the program header is only a 16-bit value. In |
905 | // the event of overflow (greater than SHN_LORESERVE sections), e_shnum |
906 | // will read SHN_UNDEF and the true number of section header table entries |
907 | // is found in the sh_size field of the first section header. |
908 | // See: http://www.sco.com/developers/gabi/2003-12-17/ch4.sheader.html |
909 | if (pread(fd, &first_section_header_, sizeof(first_section_header_), |
910 | header_.e_shoff) != sizeof(first_section_header_)) { |
911 | return false; |
912 | } |
913 | } |
914 | |
915 | // Dynamically allocate enough space to store the section headers |
916 | // and read them out of the file. |
917 | const int = |
918 | GetNumSections() * sizeof(*section_headers_); |
919 | section_headers_ = new typename ElfArch::Shdr[section_headers_size]; |
920 | if (pread(fd, section_headers_, section_headers_size, header_.e_shoff) != |
921 | section_headers_size) { |
922 | return false; |
923 | } |
924 | |
925 | // Dynamically allocate enough space to store the program headers |
926 | // and read them out of the file. |
927 | //const int program_headers_size = |
928 | // GetNumProgramHeaders() * sizeof(*program_headers_); |
929 | program_headers_ = new typename ElfArch::Phdr[GetNumProgramHeaders()]; |
930 | |
931 | // Presize the sections array for efficiency. |
932 | sections_.resize(GetNumSections(), NULL); |
933 | return true; |
934 | } |
935 | |
936 | // Given the "value" of a function descriptor return the address of the |
937 | // function (i.e. the dereferenced value). Otherwise return "value". |
938 | uint64_t AdjustPPC64FunctionDescriptorSymbolValue(uint64_t value) { |
939 | if (opd_section_ != NULL && |
940 | opd_info_.addr <= value && |
941 | value < opd_info_.addr + opd_info_.size) { |
942 | uint64_t offset = value - opd_info_.addr; |
943 | return (*reinterpret_cast<const uint64_t*>(opd_section_ + offset)); |
944 | } |
945 | return value; |
946 | } |
947 | |
948 | void AdjustSymbolValue(typename ElfArch::Sym* sym) { |
949 | switch (header_.e_machine) { |
950 | case EM_ARM: |
951 | // For ARM architecture, if the LSB of the function symbol offset is set, |
952 | // it indicates a Thumb function. This bit should not be taken literally. |
953 | // Clear it. |
954 | if (ElfArch::Type(sym) == STT_FUNC) |
955 | sym->st_value = AdjustARMThumbSymbolValue(sym->st_value); |
956 | break; |
957 | case EM_386: |
958 | // No adjustment needed for Intel x86 architecture. However, explicitly |
959 | // define this case as we use it quite often. |
960 | break; |
961 | case EM_PPC64: |
962 | // PowerPC64 currently has function descriptors as part of the ABI. |
963 | // Function symbols need to be adjusted accordingly. |
964 | if (ElfArch::Type(sym) == STT_FUNC) |
965 | sym->st_value = AdjustPPC64FunctionDescriptorSymbolValue(sym->st_value); |
966 | break; |
967 | default: |
968 | break; |
969 | } |
970 | } |
971 | |
972 | friend class SymbolIterator<ElfArch>; |
973 | |
974 | // The file we're reading. |
975 | const string path_; |
976 | // Open file descriptor for path_. Not owned by this object. |
977 | const int fd_; |
978 | |
979 | // The global header of the ELF file. |
980 | typename ElfArch::Ehdr ; |
981 | |
982 | // The header of the first section. This may be used to supplement the ELF |
983 | // file header. |
984 | typename ElfArch::Shdr ; |
985 | |
986 | // Array of GetNumSections() section headers, allocated when we read |
987 | // in the global header. |
988 | typename ElfArch::Shdr* ; |
989 | |
990 | // Array of GetNumProgramHeaders() program headers, allocated when we read |
991 | // in the global header. |
992 | typename ElfArch::Phdr* ; |
993 | |
994 | // An array of pointers to ElfSectionReaders. Sections are |
995 | // mmaped as they're needed and not released until this object is |
996 | // destroyed. |
997 | vector<ElfSectionReader<ElfArch>*> sections_; |
998 | |
999 | // For PowerPC64 we need to keep track of function descriptors when looking up |
1000 | // values for funtion symbols values. Function descriptors are kept in the |
1001 | // .opd section and are dereferenced to find the function address. |
1002 | ElfReader::SectionInfo opd_info_; |
1003 | const char* opd_section_; // Must be checked for NULL before use. |
1004 | int64_t base_for_text_; |
1005 | |
1006 | // Read PLT-related sections for the current architecture. |
1007 | bool plts_supported_; |
1008 | // Code size of each PLT function for the current architecture. |
1009 | size_t plt_code_size_; |
1010 | // Size of the special first entry in the .plt section that calls the runtime |
1011 | // loader resolution routine, and that all other entries jump to when doing |
1012 | // lazy symbol binding. |
1013 | size_t plt0_size_; |
1014 | |
1015 | // Maps a dynamic symbol index to a PLT offset. |
1016 | // The vector entry index is the dynamic symbol index. |
1017 | std::vector<uint64_t> symbols_plt_offsets_; |
1018 | |
1019 | // Container for PLT function name strings. These strings are passed by |
1020 | // reference to SymbolSink::AddSymbol() so they need to be stored somewhere. |
1021 | std::vector<string> plt_function_names_; |
1022 | |
1023 | bool visited_relocation_entries_; |
1024 | |
1025 | // True if this is a .dwp file. |
1026 | bool is_dwp_; |
1027 | }; |
1028 | |
1029 | ElfReader::ElfReader(const string& path) |
1030 | : path_(path), fd_(-1), impl32_(NULL), impl64_(NULL) { |
1031 | // linux 2.6.XX kernel can show deleted files like this: |
1032 | // /var/run/nscd/dbYLJYaE (deleted) |
1033 | // and the kernel-supplied vdso and vsyscall mappings like this: |
1034 | // [vdso] |
1035 | // [vsyscall] |
1036 | if (MyHasSuffixString(path, " (deleted)" )) |
1037 | return; |
1038 | if (path == "[vdso]" ) |
1039 | return; |
1040 | if (path == "[vsyscall]" ) |
1041 | return; |
1042 | |
1043 | fd_ = open(path.c_str(), O_RDONLY); |
1044 | } |
1045 | |
1046 | ElfReader::~ElfReader() { |
1047 | if (fd_ != -1) |
1048 | close(fd_); |
1049 | if (impl32_ != NULL) |
1050 | delete impl32_; |
1051 | if (impl64_ != NULL) |
1052 | delete impl64_; |
1053 | } |
1054 | |
1055 | |
1056 | // The only word-size specific part of this file is IsNativeElfFile(). |
1057 | #if ULONG_MAX == 0xffffffff |
1058 | #define NATIVE_ELF_ARCH Elf32 |
1059 | #elif ULONG_MAX == 0xffffffffffffffff |
1060 | #define NATIVE_ELF_ARCH Elf64 |
1061 | #else |
1062 | #error "Invalid word size" |
1063 | #endif |
1064 | |
1065 | template <typename ElfArch> |
1066 | static bool IsElfFile(const int fd, const string& path) { |
1067 | if (fd < 0) |
1068 | return false; |
1069 | if (!ElfReaderImpl<ElfArch>::IsArchElfFile(fd, NULL)) { |
1070 | // No error message here. IsElfFile gets called many times. |
1071 | return false; |
1072 | } |
1073 | return true; |
1074 | } |
1075 | |
1076 | bool ElfReader::IsNativeElfFile() const { |
1077 | return IsElfFile<NATIVE_ELF_ARCH>(fd_, path_); |
1078 | } |
1079 | |
1080 | bool ElfReader::IsElf32File() const { |
1081 | return IsElfFile<Elf32>(fd_, path_); |
1082 | } |
1083 | |
1084 | bool ElfReader::IsElf64File() const { |
1085 | return IsElfFile<Elf64>(fd_, path_); |
1086 | } |
1087 | |
1088 | /* |
1089 | void ElfReader::AddSymbols(SymbolMap* symbols, |
1090 | uint64_t mem_offset, uint64_t file_offset, |
1091 | uint64_t length) { |
1092 | if (fd_ < 0) |
1093 | return; |
1094 | // TODO(chatham): Actually use the information about file offset and |
1095 | // the length of the mapped section. On some machines the data |
1096 | // section gets mapped as executable, and we'll end up reading the |
1097 | // file twice and getting some of the offsets wrong. |
1098 | if (IsElf32File()) { |
1099 | GetImpl32()->GetSymbolPositions(symbols, SHT_SYMTAB, |
1100 | mem_offset, file_offset); |
1101 | GetImpl32()->GetSymbolPositions(symbols, SHT_DYNSYM, |
1102 | mem_offset, file_offset); |
1103 | } else if (IsElf64File()) { |
1104 | GetImpl64()->GetSymbolPositions(symbols, SHT_SYMTAB, |
1105 | mem_offset, file_offset); |
1106 | GetImpl64()->GetSymbolPositions(symbols, SHT_DYNSYM, |
1107 | mem_offset, file_offset); |
1108 | } |
1109 | } |
1110 | */ |
1111 | |
1112 | void ElfReader::VisitSymbols(ElfReader::SymbolSink* sink) { |
1113 | VisitSymbols(sink, -1, -1); |
1114 | } |
1115 | |
1116 | void ElfReader::VisitSymbols(ElfReader::SymbolSink* sink, |
1117 | int symbol_binding, |
1118 | int symbol_type) { |
1119 | VisitSymbols(sink, symbol_binding, symbol_type, false); |
1120 | } |
1121 | |
1122 | void ElfReader::VisitSymbols(ElfReader::SymbolSink* sink, |
1123 | int symbol_binding, |
1124 | int symbol_type, |
1125 | bool get_raw_symbol_values) { |
1126 | if (IsElf32File()) { |
1127 | GetImpl32()->VisitRelocationEntries(); |
1128 | GetImpl32()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type, |
1129 | get_raw_symbol_values); |
1130 | GetImpl32()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type, |
1131 | get_raw_symbol_values); |
1132 | } else if (IsElf64File()) { |
1133 | GetImpl64()->VisitRelocationEntries(); |
1134 | GetImpl64()->VisitSymbols(SHT_SYMTAB, sink, symbol_binding, symbol_type, |
1135 | get_raw_symbol_values); |
1136 | GetImpl64()->VisitSymbols(SHT_DYNSYM, sink, symbol_binding, symbol_type, |
1137 | get_raw_symbol_values); |
1138 | } |
1139 | } |
1140 | |
1141 | uint64_t ElfReader::VaddrOfFirstLoadSegment() { |
1142 | if (IsElf32File()) { |
1143 | return GetImpl32()->VaddrOfFirstLoadSegment(); |
1144 | } else if (IsElf64File()) { |
1145 | return GetImpl64()->VaddrOfFirstLoadSegment(); |
1146 | } else { |
1147 | return 0; |
1148 | } |
1149 | } |
1150 | |
1151 | const char* ElfReader::GetSectionName(int shndx) { |
1152 | if (shndx < 0 || static_cast<unsigned int>(shndx) >= GetNumSections()) return NULL; |
1153 | if (IsElf32File()) { |
1154 | return GetImpl32()->GetSectionNameByIndex(shndx); |
1155 | } else if (IsElf64File()) { |
1156 | return GetImpl64()->GetSectionNameByIndex(shndx); |
1157 | } else { |
1158 | return NULL; |
1159 | } |
1160 | } |
1161 | |
1162 | uint64_t ElfReader::GetNumSections() { |
1163 | if (IsElf32File()) { |
1164 | return GetImpl32()->GetNumSections(); |
1165 | } else if (IsElf64File()) { |
1166 | return GetImpl64()->GetNumSections(); |
1167 | } else { |
1168 | return 0; |
1169 | } |
1170 | } |
1171 | |
1172 | const char* ElfReader::GetSectionByIndex(int shndx, size_t* size) { |
1173 | if (IsElf32File()) { |
1174 | return GetImpl32()->GetSectionContentsByIndex(shndx, size); |
1175 | } else if (IsElf64File()) { |
1176 | return GetImpl64()->GetSectionContentsByIndex(shndx, size); |
1177 | } else { |
1178 | return NULL; |
1179 | } |
1180 | } |
1181 | |
1182 | const char* ElfReader::GetSectionByName(const string& section_name, |
1183 | size_t* size) { |
1184 | if (IsElf32File()) { |
1185 | return GetImpl32()->GetSectionContentsByName(section_name, size); |
1186 | } else if (IsElf64File()) { |
1187 | return GetImpl64()->GetSectionContentsByName(section_name, size); |
1188 | } else { |
1189 | return NULL; |
1190 | } |
1191 | } |
1192 | |
1193 | const char* ElfReader::GetSectionInfoByName(const string& section_name, |
1194 | SectionInfo* info) { |
1195 | if (IsElf32File()) { |
1196 | return GetImpl32()->GetSectionInfoByName(section_name, info); |
1197 | } else if (IsElf64File()) { |
1198 | return GetImpl64()->GetSectionInfoByName(section_name, info); |
1199 | } else { |
1200 | return NULL; |
1201 | } |
1202 | } |
1203 | |
1204 | bool ElfReader::SectionNamesMatch(const string& name, const string& sh_name) { |
1205 | if ((name.find(".debug_" , 0) == 0) && (sh_name.find(".zdebug_" , 0) == 0)) { |
1206 | const string name_suffix(name, strlen(".debug_" )); |
1207 | const string sh_name_suffix(sh_name, strlen(".zdebug_" )); |
1208 | return name_suffix == sh_name_suffix; |
1209 | } |
1210 | return name == sh_name; |
1211 | } |
1212 | |
1213 | bool ElfReader::IsDynamicSharedObject() { |
1214 | if (IsElf32File()) { |
1215 | return GetImpl32()->IsDynamicSharedObject(); |
1216 | } else if (IsElf64File()) { |
1217 | return GetImpl64()->IsDynamicSharedObject(); |
1218 | } else { |
1219 | return false; |
1220 | } |
1221 | } |
1222 | |
1223 | ElfReaderImpl<Elf32>* ElfReader::GetImpl32() { |
1224 | if (impl32_ == NULL) { |
1225 | impl32_ = new ElfReaderImpl<Elf32>(path_, fd_); |
1226 | } |
1227 | return impl32_; |
1228 | } |
1229 | |
1230 | ElfReaderImpl<Elf64>* ElfReader::GetImpl64() { |
1231 | if (impl64_ == NULL) { |
1232 | impl64_ = new ElfReaderImpl<Elf64>(path_, fd_); |
1233 | } |
1234 | return impl64_; |
1235 | } |
1236 | |
1237 | // Return true if file is an ELF binary of ElfArch, with unstripped |
1238 | // debug info (debug_only=true) or symbol table (debug_only=false). |
1239 | // Otherwise, return false. |
1240 | template <typename ElfArch> |
1241 | static bool IsNonStrippedELFBinaryImpl(const string& path, const int fd, |
1242 | bool debug_only) { |
1243 | if (!ElfReaderImpl<ElfArch>::IsArchElfFile(fd, NULL)) return false; |
1244 | ElfReaderImpl<ElfArch> elf_reader(path, fd); |
1245 | return debug_only ? |
1246 | elf_reader.HasDebugSections() |
1247 | : (elf_reader.GetSectionByType(SHT_SYMTAB) != NULL); |
1248 | } |
1249 | |
1250 | // Helper for the IsNon[Debug]StrippedELFBinary functions. |
1251 | static bool IsNonStrippedELFBinaryHelper(const string& path, |
1252 | bool debug_only) { |
1253 | const int fd = open(path.c_str(), O_RDONLY); |
1254 | if (fd == -1) { |
1255 | return false; |
1256 | } |
1257 | |
1258 | if (IsNonStrippedELFBinaryImpl<Elf32>(path, fd, debug_only) || |
1259 | IsNonStrippedELFBinaryImpl<Elf64>(path, fd, debug_only)) { |
1260 | close(fd); |
1261 | return true; |
1262 | } |
1263 | close(fd); |
1264 | return false; |
1265 | } |
1266 | |
1267 | bool ElfReader::IsNonStrippedELFBinary(const string& path) { |
1268 | return IsNonStrippedELFBinaryHelper(path, false); |
1269 | } |
1270 | |
1271 | bool ElfReader::IsNonDebugStrippedELFBinary(const string& path) { |
1272 | return IsNonStrippedELFBinaryHelper(path, true); |
1273 | } |
1274 | } // namespace google_breakpad |
1275 | |