1// -*- mode: C++ -*-
2
3// Copyright (c) 2010 Google Inc. All Rights Reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
32
33// This file contains definitions related to the DWARF2/3 reader and
34// it's handler interfaces.
35// The DWARF2/3 specification can be found at
36// http://dwarf.freestandards.org and should be considered required
37// reading if you wish to modify the implementation.
38// Only a cursory attempt is made to explain terminology that is
39// used here, as it is much better explained in the standard documents
40#ifndef COMMON_DWARF_DWARF2READER_H__
41#define COMMON_DWARF_DWARF2READER_H__
42
43#include <assert.h>
44#include <stdint.h>
45
46#include <list>
47#include <map>
48#include <string>
49#include <utility>
50#include <vector>
51#include <memory>
52
53#include "common/dwarf/bytereader.h"
54#include "common/dwarf/dwarf2enums.h"
55#include "common/dwarf/types.h"
56#include "common/using_std_string.h"
57#include "common/dwarf/elf_reader.h"
58
59namespace google_breakpad {
60struct LineStateMachine;
61class Dwarf2Handler;
62class LineInfoHandler;
63class DwpReader;
64
65// This maps from a string naming a section to a pair containing a
66// the data for the section, and the size of the section.
67typedef std::map<string, std::pair<const uint8_t*, uint64_t> > SectionMap;
68
69// Abstract away the difference between elf and mach-o section names.
70// Elf-names use ".section_name, mach-o uses "__section_name". Pass "name" in
71// the elf form, ".section_name".
72const SectionMap::const_iterator GetSectionByName(const SectionMap&
73 sections, const char* name);
74
75// Most of the time, this struct functions as a simple attribute and form pair.
76// However, Dwarf5 DW_FORM_implicit_const means that a form may have its value
77// in line in the abbrev table, and that value must be associated with the
78// pair until the attr's value is needed.
79struct AttrForm {
80 AttrForm(enum DwarfAttribute attr, enum DwarfForm form, uint64_t value) :
81 attr_(attr), form_(form), value_(value) { }
82
83 enum DwarfAttribute attr_;
84 enum DwarfForm form_;
85 uint64_t value_;
86};
87typedef std::list<AttrForm> AttributeList;
88typedef AttributeList::iterator AttributeIterator;
89typedef AttributeList::const_iterator ConstAttributeIterator;
90
91struct LineInfoHeader {
92 uint64_t total_length;
93 uint16_t version;
94 uint64_t prologue_length;
95 uint8_t min_insn_length; // insn stands for instructin
96 bool default_is_stmt; // stmt stands for statement
97 int8_t line_base;
98 uint8_t line_range;
99 uint8_t opcode_base;
100 // Use a pointer so that signalsafe_addr2line is able to use this structure
101 // without heap allocation problem.
102 std::vector<unsigned char>* std_opcode_lengths;
103};
104
105class LineInfo {
106 public:
107
108 // Initializes a .debug_line reader. Buffer and buffer length point
109 // to the beginning and length of the line information to read.
110 // Reader is a ByteReader class that has the endianness set
111 // properly.
112 LineInfo(const uint8_t* buffer, uint64_t buffer_length,
113 ByteReader* reader, const uint8_t* string_buffer,
114 size_t string_buffer_length, const uint8_t* line_string_buffer,
115 size_t line_string_buffer_length, LineInfoHandler* handler);
116
117 virtual ~LineInfo() {
118 if (header_.std_opcode_lengths) {
119 delete header_.std_opcode_lengths;
120 }
121 }
122
123 // Start processing line info, and calling callbacks in the handler.
124 // Consumes the line number information for a single compilation unit.
125 // Returns the number of bytes processed.
126 uint64_t Start();
127
128 // Process a single line info opcode at START using the state
129 // machine at LSM. Return true if we should define a line using the
130 // current state of the line state machine. Place the length of the
131 // opcode in LEN.
132 // If LSM_PASSES_PC is non-NULL, this function also checks if the lsm
133 // passes the address of PC. In other words, LSM_PASSES_PC will be
134 // set to true, if the following condition is met.
135 //
136 // lsm's old address < PC <= lsm's new address
137 static bool ProcessOneOpcode(ByteReader* reader,
138 LineInfoHandler* handler,
139 const struct LineInfoHeader& header,
140 const uint8_t* start,
141 struct LineStateMachine* lsm,
142 size_t* len,
143 uintptr pc,
144 bool* lsm_passes_pc);
145
146 private:
147 // Reads the DWARF2/3 header for this line info.
148 void ReadHeader();
149
150 // Reads the DWARF2/3 line information
151 void ReadLines();
152
153 // Read the DWARF5 types and forms for the file and directory tables.
154 void ReadTypesAndForms(const uint8_t** lineptr, uint32_t* content_types,
155 uint32_t* content_forms, uint32_t max_types,
156 uint32_t* format_count);
157
158 // Read a row from the dwarf5 LineInfo file table.
159 void ReadFileRow(const uint8_t** lineptr, const uint32_t* content_types,
160 const uint32_t* content_forms, uint32_t row,
161 uint32_t format_count);
162
163 // Read and return the data at *lineptr according to form. Advance
164 // *lineptr appropriately.
165 uint64_t ReadUnsignedData(uint32_t form, const uint8_t** lineptr);
166
167 // Read and return the data at *lineptr according to form. Advance
168 // *lineptr appropriately.
169 const char* ReadStringForm(uint32_t form, const uint8_t** lineptr);
170
171 // The associated handler to call processing functions in
172 LineInfoHandler* handler_;
173
174 // The associated ByteReader that handles endianness issues for us
175 ByteReader* reader_;
176
177 // A DWARF line info header. This is not the same size as in the actual file,
178 // as the one in the file may have a 32 bit or 64 bit lengths
179
180 struct LineInfoHeader header_;
181
182 // buffer is the buffer for our line info, starting at exactly where
183 // the line info to read is. after_header is the place right after
184 // the end of the line information header.
185 const uint8_t* buffer_;
186#ifndef NDEBUG
187 uint64_t buffer_length_;
188#endif
189 // Convenience pointers into .debug_str and .debug_line_str. These exactly
190 // correspond to those in the compilation unit.
191 const uint8_t* string_buffer_;
192#ifndef NDEBUG
193 uint64_t string_buffer_length_;
194#endif
195 const uint8_t* line_string_buffer_;
196#ifndef NDEBUG
197 uint64_t line_string_buffer_length_;
198#endif
199
200 const uint8_t* after_header_;
201};
202
203// This class is the main interface between the line info reader and
204// the client. The virtual functions inside this get called for
205// interesting events that happen during line info reading. The
206// default implementation does nothing
207
208class LineInfoHandler {
209 public:
210 LineInfoHandler() { }
211
212 virtual ~LineInfoHandler() { }
213
214 // Called when we define a directory. NAME is the directory name,
215 // DIR_NUM is the directory number
216 virtual void DefineDir(const string& name, uint32_t dir_num) { }
217
218 // Called when we define a filename. NAME is the filename, FILE_NUM
219 // is the file number which is -1 if the file index is the next
220 // index after the last numbered index (this happens when files are
221 // dynamically defined by the line program), DIR_NUM is the
222 // directory index for the directory name of this file, MOD_TIME is
223 // the modification time of the file, and LENGTH is the length of
224 // the file
225 virtual void DefineFile(const string& name, int32_t file_num,
226 uint32_t dir_num, uint64_t mod_time,
227 uint64_t length) { }
228
229 // Called when the line info reader has a new line, address pair
230 // ready for us. ADDRESS is the address of the code, LENGTH is the
231 // length of its machine code in bytes, FILE_NUM is the file number
232 // containing the code, LINE_NUM is the line number in that file for
233 // the code, and COLUMN_NUM is the column number the code starts at,
234 // if we know it (0 otherwise).
235 virtual void AddLine(uint64_t address, uint64_t length,
236 uint32_t file_num, uint32_t line_num, uint32_t column_num) { }
237};
238
239class RangeListHandler {
240 public:
241 RangeListHandler() { }
242
243 virtual ~RangeListHandler() { }
244
245 // Add a range.
246 virtual void AddRange(uint64_t begin, uint64_t end) { };
247
248 // Finish processing the range list.
249 virtual void Finish() { };
250};
251
252class RangeListReader {
253 public:
254 // Reading a range list requires quite a bit of information
255 // from the compilation unit. Package it conveniently.
256 struct CURangesInfo {
257 CURangesInfo() :
258 version_(0), base_address_(0), ranges_base_(0),
259 buffer_(nullptr), size_(0), addr_buffer_(nullptr),
260 addr_buffer_size_(0), addr_base_(0) { }
261
262 uint16_t version_;
263 // Ranges base address. Ordinarily the CU's low_pc.
264 uint64_t base_address_;
265 // Offset into .debug_rnglists for this CU's rangelists.
266 uint64_t ranges_base_;
267 // Contents of either .debug_ranges or .debug_rnglists.
268 const uint8_t* buffer_;
269 uint64_t size_;
270 // Contents of .debug_addr. This cu's contribution starts at
271 // addr_base_
272 const uint8_t* addr_buffer_;
273 uint64_t addr_buffer_size_;
274 uint64_t addr_base_;
275 };
276
277 RangeListReader(ByteReader* reader, CURangesInfo* cu_info,
278 RangeListHandler* handler) :
279 reader_(reader), cu_info_(cu_info), handler_(handler),
280 offset_array_(0) { }
281
282 // Read ranges from cu_info as specified by form and data.
283 bool ReadRanges(enum DwarfForm form, uint64_t data);
284
285 private:
286 // Read dwarf4 .debug_ranges at offset.
287 bool ReadDebugRanges(uint64_t offset);
288 // Read dwarf5 .debug_rngslist at offset.
289 bool ReadDebugRngList(uint64_t offset);
290
291 // Convenience functions to handle the mechanics of reading entries in the
292 // ranges section.
293 uint64_t ReadULEB(uint64_t offset, uint64_t* value) {
294 size_t len;
295 *value = reader_->ReadUnsignedLEB128(cu_info_->buffer_ + offset, &len);
296 return len;
297 }
298
299 uint64_t ReadAddress(uint64_t offset, uint64_t* value) {
300 *value = reader_->ReadAddress(cu_info_->buffer_ + offset);
301 return reader_->AddressSize();
302 }
303
304 // Read the address at this CU's addr_index in the .debug_addr section.
305 uint64_t GetAddressAtIndex(uint64_t addr_index) {
306 assert(cu_info_->addr_buffer_ != nullptr);
307 uint64_t offset =
308 cu_info_->addr_base_ + addr_index * reader_->AddressSize();
309 assert(offset < cu_info_->addr_buffer_size_);
310 return reader_->ReadAddress(cu_info_->addr_buffer_ + offset);
311 }
312
313 ByteReader* reader_;
314 CURangesInfo* cu_info_;
315 RangeListHandler* handler_;
316 uint64_t offset_array_;
317};
318
319// This class is the main interface between the reader and the
320// client. The virtual functions inside this get called for
321// interesting events that happen during DWARF2 reading.
322// The default implementation skips everything.
323class Dwarf2Handler {
324 public:
325 Dwarf2Handler() { }
326
327 virtual ~Dwarf2Handler() { }
328
329 // Start to process a compilation unit at OFFSET from the beginning of the
330 // .debug_info section. Return false if you would like to skip this
331 // compilation unit.
332 virtual bool StartCompilationUnit(uint64_t offset, uint8_t address_size,
333 uint8_t offset_size, uint64_t cu_length,
334 uint8_t dwarf_version) { return false; }
335
336 // When processing a skeleton compilation unit, resulting from a split
337 // DWARF compilation, once the skeleton debug info has been read,
338 // the reader will call this function to ask the client if it needs
339 // the full debug info from the .dwo or .dwp file. Return true if
340 // you need it, or false to skip processing the split debug info.
341 virtual bool NeedSplitDebugInfo() { return true; }
342
343 // Start to process a split compilation unit at OFFSET from the beginning of
344 // the debug_info section in the .dwp/.dwo file. Return false if you would
345 // like to skip this compilation unit.
346 virtual bool StartSplitCompilationUnit(uint64_t offset,
347 uint64_t cu_length) { return false; }
348
349 // Start to process a DIE at OFFSET from the beginning of the .debug_info
350 // section. Return false if you would like to skip this DIE.
351 virtual bool StartDIE(uint64_t offset, enum DwarfTag tag) { return false; }
352
353 // Called when we have an attribute with unsigned data to give to our
354 // handler. The attribute is for the DIE at OFFSET from the beginning of the
355 // .debug_info section. Its name is ATTR, its form is FORM, and its value is
356 // DATA.
357 virtual void ProcessAttributeUnsigned(uint64_t offset,
358 enum DwarfAttribute attr,
359 enum DwarfForm form,
360 uint64_t data) { }
361
362 // Called when we have an attribute with signed data to give to our handler.
363 // The attribute is for the DIE at OFFSET from the beginning of the
364 // .debug_info section. Its name is ATTR, its form is FORM, and its value is
365 // DATA.
366 virtual void ProcessAttributeSigned(uint64_t offset,
367 enum DwarfAttribute attr,
368 enum DwarfForm form,
369 int64_t data) { }
370
371 // Called when we have an attribute whose value is a reference to
372 // another DIE. The attribute belongs to the DIE at OFFSET from the
373 // beginning of the .debug_info section. Its name is ATTR, its form
374 // is FORM, and the offset of the DIE being referred to from the
375 // beginning of the .debug_info section is DATA.
376 virtual void ProcessAttributeReference(uint64_t offset,
377 enum DwarfAttribute attr,
378 enum DwarfForm form,
379 uint64_t data) { }
380
381 // Called when we have an attribute with a buffer of data to give to our
382 // handler. The attribute is for the DIE at OFFSET from the beginning of the
383 // .debug_info section. Its name is ATTR, its form is FORM, DATA points to
384 // the buffer's contents, and its length in bytes is LENGTH. The buffer is
385 // owned by the caller, not the callee, and may not persist for very long.
386 // If you want the data to be available later, it needs to be copied.
387 virtual void ProcessAttributeBuffer(uint64_t offset,
388 enum DwarfAttribute attr,
389 enum DwarfForm form,
390 const uint8_t* data,
391 uint64_t len) { }
392
393 // Called when we have an attribute with string data to give to our handler.
394 // The attribute is for the DIE at OFFSET from the beginning of the
395 // .debug_info section. Its name is ATTR, its form is FORM, and its value is
396 // DATA.
397 virtual void ProcessAttributeString(uint64_t offset,
398 enum DwarfAttribute attr,
399 enum DwarfForm form,
400 const string& data) { }
401
402 // Called when we have an attribute whose value is the 64-bit signature
403 // of a type unit in the .debug_types section. OFFSET is the offset of
404 // the DIE whose attribute we're reporting. ATTR and FORM are the
405 // attribute's name and form. SIGNATURE is the type unit's signature.
406 virtual void ProcessAttributeSignature(uint64_t offset,
407 enum DwarfAttribute attr,
408 enum DwarfForm form,
409 uint64_t signature) { }
410
411 // Called when finished processing the DIE at OFFSET.
412 // Because DWARF2/3 specifies a tree of DIEs, you may get starts
413 // before ends of the previous DIE, as we process children before
414 // ending the parent.
415 virtual void EndDIE(uint64_t offset) { }
416
417};
418
419// The base of DWARF2/3 debug info is a DIE (Debugging Information
420// Entry.
421// DWARF groups DIE's into a tree and calls the root of this tree a
422// "compilation unit". Most of the time, there is one compilation
423// unit in the .debug_info section for each file that had debug info
424// generated.
425// Each DIE consists of
426
427// 1. a tag specifying a thing that is being described (ie
428// DW_TAG_subprogram for functions, DW_TAG_variable for variables, etc
429// 2. attributes (such as DW_AT_location for location in memory,
430// DW_AT_name for name), and data for each attribute.
431// 3. A flag saying whether the DIE has children or not
432
433// In order to gain some amount of compression, the format of
434// each DIE (tag name, attributes and data forms for the attributes)
435// are stored in a separate table called the "abbreviation table".
436// This is done because a large number of DIEs have the exact same tag
437// and list of attributes, but different data for those attributes.
438// As a result, the .debug_info section is just a stream of data, and
439// requires reading of the .debug_abbrev section to say what the data
440// means.
441
442// As a warning to the user, it should be noted that the reason for
443// using absolute offsets from the beginning of .debug_info is that
444// DWARF2/3 supports referencing DIE's from other DIE's by their offset
445// from either the current compilation unit start, *or* the beginning
446// of the .debug_info section. This means it is possible to reference
447// a DIE in one compilation unit from a DIE in another compilation
448// unit. This style of reference is usually used to eliminate
449// duplicated information that occurs across compilation
450// units, such as base types, etc. GCC 3.4+ support this with
451// -feliminate-dwarf2-dups. Other toolchains will sometimes do
452// duplicate elimination in the linker.
453
454class CompilationUnit {
455 public:
456
457 // Initialize a compilation unit. This requires a map of sections,
458 // the offset of this compilation unit in the .debug_info section, a
459 // ByteReader, and a Dwarf2Handler class to call callbacks in.
460 CompilationUnit(const string& path, const SectionMap& sections,
461 uint64_t offset, ByteReader* reader, Dwarf2Handler* handler);
462 virtual ~CompilationUnit() {
463 if (abbrevs_) delete abbrevs_;
464 }
465
466 // Initialize a compilation unit from a .dwo or .dwp file.
467 // In this case, we need the .debug_addr section from the
468 // executable file that contains the corresponding skeleton
469 // compilation unit. We also inherit the Dwarf2Handler from
470 // the executable file, and call it as if we were still
471 // processing the original compilation unit.
472 void SetSplitDwarf(const uint8_t* addr_buffer, uint64_t addr_buffer_length,
473 uint64_t addr_base, uint64_t ranges_base, uint64_t dwo_id);
474
475 // Begin reading a Dwarf2 compilation unit, and calling the
476 // callbacks in the Dwarf2Handler
477
478 // Return the full length of the compilation unit, including
479 // headers. This plus the starting offset passed to the constructor
480 // is the offset of the end of the compilation unit --- and the
481 // start of the next compilation unit, if there is one.
482 uint64_t Start();
483
484 private:
485
486 // This struct represents a single DWARF2/3 abbreviation
487 // The abbreviation tells how to read a DWARF2/3 DIE, and consist of a
488 // tag and a list of attributes, as well as the data form of each attribute.
489 struct Abbrev {
490 uint64_t number;
491 enum DwarfTag tag;
492 bool has_children;
493 AttributeList attributes;
494 };
495
496 // A DWARF2/3 compilation unit header. This is not the same size as
497 // in the actual file, as the one in the file may have a 32 bit or
498 // 64 bit length.
499 struct CompilationUnitHeader {
500 uint64_t length;
501 uint16_t version;
502 uint64_t abbrev_offset;
503 uint8_t address_size;
504 } header_;
505
506 // Reads the DWARF2/3 header for this compilation unit.
507 void ReadHeader();
508
509 // Reads the DWARF2/3 abbreviations for this compilation unit
510 void ReadAbbrevs();
511
512 // Read the abbreviation offset for this compilation unit
513 size_t ReadAbbrevOffset(const uint8_t* headerptr);
514
515 // Read the address size for this compilation unit
516 size_t ReadAddressSize(const uint8_t* headerptr);
517
518 // Read the DWO id from a split or skeleton compilation unit header
519 size_t ReadDwoId(const uint8_t* headerptr);
520
521 // Read the type signature from a type or split type compilation unit header
522 size_t ReadTypeSignature(const uint8_t* headerptr);
523
524 // Read the DWO id from a split or skeleton compilation unit header
525 size_t ReadTypeOffset(const uint8_t* headerptr);
526
527 // Processes a single DIE for this compilation unit and return a new
528 // pointer just past the end of it
529 const uint8_t* ProcessDIE(uint64_t dieoffset,
530 const uint8_t* start,
531 const Abbrev& abbrev);
532
533 // Processes a single attribute and return a new pointer just past the
534 // end of it
535 const uint8_t* ProcessAttribute(uint64_t dieoffset,
536 const uint8_t* start,
537 enum DwarfAttribute attr,
538 enum DwarfForm form,
539 uint64_t implicit_const);
540
541 // Special version of ProcessAttribute, for finding str_offsets_base and
542 // DW_AT_addr_base in DW_TAG_compile_unit, for DWARF v5.
543 const uint8_t* ProcessOffsetBaseAttribute(uint64_t dieoffset,
544 const uint8_t* start,
545 enum DwarfAttribute attr,
546 enum DwarfForm form,
547 uint64_t implicit_const);
548
549 // Called when we have an attribute with unsigned data to give to
550 // our handler. The attribute is for the DIE at OFFSET from the
551 // beginning of compilation unit, has a name of ATTR, a form of
552 // FORM, and the actual data of the attribute is in DATA.
553 // If we see a DW_AT_GNU_dwo_id attribute, save the value so that
554 // we can find the debug info in a .dwo or .dwp file.
555 void ProcessAttributeUnsigned(uint64_t offset,
556 enum DwarfAttribute attr,
557 enum DwarfForm form,
558 uint64_t data) {
559 if (attr == DW_AT_GNU_dwo_id) {
560 dwo_id_ = data;
561 }
562 else if (attr == DW_AT_GNU_addr_base || attr == DW_AT_addr_base) {
563 addr_base_ = data;
564 }
565 else if (attr == DW_AT_str_offsets_base) {
566 str_offsets_base_ = data;
567 }
568 else if (attr == DW_AT_GNU_ranges_base || attr == DW_AT_rnglists_base) {
569 ranges_base_ = data;
570 }
571 // TODO(yunlian): When we add DW_AT_ranges_base from DWARF-5,
572 // that base will apply to DW_AT_ranges attributes in the
573 // skeleton CU as well as in the .dwo/.dwp files.
574 else if (attr == DW_AT_ranges && is_split_dwarf_) {
575 data += ranges_base_;
576 }
577 handler_->ProcessAttributeUnsigned(offset, attr, form, data);
578 }
579
580 // Called when we have an attribute with signed data to give to
581 // our handler. The attribute is for the DIE at OFFSET from the
582 // beginning of compilation unit, has a name of ATTR, a form of
583 // FORM, and the actual data of the attribute is in DATA.
584 void ProcessAttributeSigned(uint64_t offset,
585 enum DwarfAttribute attr,
586 enum DwarfForm form,
587 int64_t data) {
588 handler_->ProcessAttributeSigned(offset, attr, form, data);
589 }
590
591 // Called when we have an attribute with a buffer of data to give to
592 // our handler. The attribute is for the DIE at OFFSET from the
593 // beginning of compilation unit, has a name of ATTR, a form of
594 // FORM, and the actual data of the attribute is in DATA, and the
595 // length of the buffer is LENGTH.
596 void ProcessAttributeBuffer(uint64_t offset,
597 enum DwarfAttribute attr,
598 enum DwarfForm form,
599 const uint8_t* data,
600 uint64_t len) {
601 handler_->ProcessAttributeBuffer(offset, attr, form, data, len);
602 }
603
604 // Handles the common parts of DW_FORM_GNU_str_index, DW_FORM_strx,
605 // DW_FORM_strx1, DW_FORM_strx2, DW_FORM_strx3, and DW_FORM_strx4.
606 // Retrieves the data and calls through to ProcessAttributeString.
607 void ProcessFormStringIndex(uint64_t offset,
608 enum DwarfAttribute attr,
609 enum DwarfForm form,
610 uint64_t str_index);
611
612 // Called when we have an attribute with string data to give to
613 // our handler. The attribute is for the DIE at OFFSET from the
614 // beginning of compilation unit, has a name of ATTR, a form of
615 // FORM, and the actual data of the attribute is in DATA.
616 // If we see a DW_AT_GNU_dwo_name attribute, save the value so
617 // that we can find the debug info in a .dwo or .dwp file.
618 void ProcessAttributeString(uint64_t offset,
619 enum DwarfAttribute attr,
620 enum DwarfForm form,
621 const char* data) {
622 if (attr == DW_AT_GNU_dwo_name || attr == DW_AT_dwo_name)
623 dwo_name_ = data;
624 handler_->ProcessAttributeString(offset, attr, form, data);
625 }
626
627 // Called to handle common portions of DW_FORM_addrx and variations, as well
628 // as DW_FORM_GNU_addr_index.
629 void ProcessAttributeAddrIndex(uint64_t offset,
630 enum DwarfAttribute attr,
631 enum DwarfForm form,
632 uint64_t addr_index) {
633 const uint8_t* addr_ptr =
634 addr_buffer_ + addr_base_ + addr_index * reader_->AddressSize();
635 ProcessAttributeUnsigned(
636 offset, attr, form, reader_->ReadAddress(addr_ptr));
637 }
638
639 // Processes all DIEs for this compilation unit
640 void ProcessDIEs();
641
642 // Skips the die with attributes specified in ABBREV starting at
643 // START, and return the new place to position the stream to.
644 const uint8_t* SkipDIE(const uint8_t* start, const Abbrev& abbrev);
645
646 // Skips the attribute starting at START, with FORM, and return the
647 // new place to position the stream to.
648 const uint8_t* SkipAttribute(const uint8_t* start, enum DwarfForm form);
649
650 // Process the actual debug information in a split DWARF file.
651 void ProcessSplitDwarf();
652
653 // Read the debug sections from a .dwo file.
654 void ReadDebugSectionsFromDwo(ElfReader* elf_reader,
655 SectionMap* sections);
656
657 // Path of the file containing the debug information.
658 const string path_;
659
660 // Offset from section start is the offset of this compilation unit
661 // from the beginning of the .debug_info section.
662 uint64_t offset_from_section_start_;
663
664 // buffer is the buffer for our CU, starting at .debug_info + offset
665 // passed in from constructor.
666 // after_header points to right after the compilation unit header.
667 const uint8_t* buffer_;
668 uint64_t buffer_length_;
669 const uint8_t* after_header_;
670
671 // The associated ByteReader that handles endianness issues for us
672 ByteReader* reader_;
673
674 // The map of sections in our file to buffers containing their data
675 const SectionMap& sections_;
676
677 // The associated handler to call processing functions in
678 Dwarf2Handler* handler_;
679
680 // Set of DWARF2/3 abbreviations for this compilation unit. Indexed
681 // by abbreviation number, which means that abbrevs_[0] is not
682 // valid.
683 std::vector<Abbrev>* abbrevs_;
684
685 // String section buffer and length, if we have a string section.
686 // This is here to avoid doing a section lookup for strings in
687 // ProcessAttribute, which is in the hot path for DWARF2 reading.
688 const uint8_t* string_buffer_;
689 uint64_t string_buffer_length_;
690
691 // Similarly for .debug_line_string.
692 const uint8_t* line_string_buffer_;
693 uint64_t line_string_buffer_length_;
694
695 // String offsets section buffer and length, if we have a string offsets
696 // section (.debug_str_offsets or .debug_str_offsets.dwo).
697 const uint8_t* str_offsets_buffer_;
698 uint64_t str_offsets_buffer_length_;
699
700 // Address section buffer and length, if we have an address section
701 // (.debug_addr).
702 const uint8_t* addr_buffer_;
703 uint64_t addr_buffer_length_;
704
705 // Flag indicating whether this compilation unit is part of a .dwo
706 // or .dwp file. If true, we are reading this unit because a
707 // skeleton compilation unit in an executable file had a
708 // DW_AT_GNU_dwo_name or DW_AT_GNU_dwo_id attribute.
709 // In a .dwo file, we expect the string offsets section to
710 // have a ".dwo" suffix, and we will use the ".debug_addr" section
711 // associated with the skeleton compilation unit.
712 bool is_split_dwarf_;
713
714 // Flag indicating if it's a Type Unit (only applicable to DWARF v5).
715 bool is_type_unit_;
716
717 // The value of the DW_AT_GNU_dwo_id attribute, if any.
718 uint64_t dwo_id_;
719
720 // The value of the DW_AT_GNU_type_signature attribute, if any.
721 uint64_t type_signature_;
722
723 // The value of the DW_AT_GNU_type_offset attribute, if any.
724 size_t type_offset_;
725
726 // The value of the DW_AT_GNU_dwo_name attribute, if any.
727 const char* dwo_name_;
728
729 // If this is a split DWARF CU, the value of the DW_AT_GNU_dwo_id attribute
730 // from the skeleton CU.
731 uint64_t skeleton_dwo_id_;
732
733 // The value of the DW_AT_GNU_ranges_base or DW_AT_rnglists_base attribute,
734 // if any.
735 uint64_t ranges_base_;
736
737 // The value of the DW_AT_GNU_addr_base attribute, if any.
738 uint64_t addr_base_;
739
740 // The value of DW_AT_str_offsets_base attribute, if any.
741 uint64_t str_offsets_base_;
742
743 // True if we have already looked for a .dwp file.
744 bool have_checked_for_dwp_;
745
746 // Path to the .dwp file.
747 string dwp_path_;
748
749 // ByteReader for the DWP file.
750 std::unique_ptr<ByteReader> dwp_byte_reader_;
751
752 // DWP reader.
753 std::unique_ptr<DwpReader> dwp_reader_;
754};
755
756// A Reader for a .dwp file. Supports the fetching of DWARF debug
757// info for a given dwo_id.
758//
759// There are two versions of .dwp files. In both versions, the
760// .dwp file is an ELF file containing only debug sections.
761// In Version 1, the file contains many copies of each debug
762// section, one for each .dwo file that is packaged in the .dwp
763// file, and the .debug_cu_index section maps from the dwo_id
764// to a set of section indexes. In Version 2, the file contains
765// one of each debug section, and the .debug_cu_index section
766// maps from the dwo_id to a set of offsets and lengths that
767// identify each .dwo file's contribution to the larger sections.
768
769class DwpReader {
770 public:
771 DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader);
772
773 ~DwpReader();
774
775 // Read the CU index and initialize data members.
776 void Initialize();
777
778 // Read the debug sections for the given dwo_id.
779 void ReadDebugSectionsForCU(uint64_t dwo_id, SectionMap* sections);
780
781 private:
782 // Search a v1 hash table for "dwo_id". Returns the slot index
783 // where the dwo_id was found, or -1 if it was not found.
784 int LookupCU(uint64_t dwo_id);
785
786 // Search a v2 hash table for "dwo_id". Returns the row index
787 // in the offsets and sizes tables, or 0 if it was not found.
788 uint32_t LookupCUv2(uint64_t dwo_id);
789
790 // The ELF reader for the .dwp file.
791 ElfReader* elf_reader_;
792
793 // The ByteReader for the .dwp file.
794 const ByteReader& byte_reader_;
795
796 // Pointer to the .debug_cu_index section.
797 const char* cu_index_;
798
799 // Size of the .debug_cu_index section.
800 size_t cu_index_size_;
801
802 // Pointer to the .debug_str.dwo section.
803 const char* string_buffer_;
804
805 // Size of the .debug_str.dwo section.
806 size_t string_buffer_size_;
807
808 // Version of the .dwp file. We support versions 1 and 2 currently.
809 int version_;
810
811 // Number of columns in the section tables (version 2).
812 unsigned int ncolumns_;
813
814 // Number of units in the section tables (version 2).
815 unsigned int nunits_;
816
817 // Number of slots in the hash table.
818 unsigned int nslots_;
819
820 // Pointer to the beginning of the hash table.
821 const char* phash_;
822
823 // Pointer to the beginning of the index table.
824 const char* pindex_;
825
826 // Pointer to the beginning of the section index pool (version 1).
827 const char* shndx_pool_;
828
829 // Pointer to the beginning of the section offset table (version 2).
830 const char* offset_table_;
831
832 // Pointer to the beginning of the section size table (version 2).
833 const char* size_table_;
834
835 // Contents of the sections of interest (version 2).
836 const char* abbrev_data_;
837 size_t abbrev_size_;
838 const char* info_data_;
839 size_t info_size_;
840 const char* str_offsets_data_;
841 size_t str_offsets_size_;
842};
843
844// This class is a reader for DWARF's Call Frame Information. CFI
845// describes how to unwind stack frames --- even for functions that do
846// not follow fixed conventions for saving registers, whose frame size
847// varies as they execute, etc.
848//
849// CFI describes, at each machine instruction, how to compute the
850// stack frame's base address, how to find the return address, and
851// where to find the saved values of the caller's registers (if the
852// callee has stashed them somewhere to free up the registers for its
853// own use).
854//
855// For example, suppose we have a function whose machine code looks
856// like this (imagine an assembly language that looks like C, for a
857// machine with 32-bit registers, and a stack that grows towards lower
858// addresses):
859//
860// func: ; entry point; return address at sp
861// func+0: sp = sp - 16 ; allocate space for stack frame
862// func+1: sp[12] = r0 ; save r0 at sp+12
863// ... ; other code, not frame-related
864// func+10: sp -= 4; *sp = x ; push some x on the stack
865// ... ; other code, not frame-related
866// func+20: r0 = sp[16] ; restore saved r0
867// func+21: sp += 20 ; pop whole stack frame
868// func+22: pc = *sp; sp += 4 ; pop return address and jump to it
869//
870// DWARF CFI is (a very compressed representation of) a table with a
871// row for each machine instruction address and a column for each
872// register showing how to restore it, if possible.
873//
874// A special column named "CFA", for "Canonical Frame Address", tells how
875// to compute the base address of the frame; registers' entries may
876// refer to the CFA in describing where the registers are saved.
877//
878// Another special column, named "RA", represents the return address.
879//
880// For example, here is a complete (uncompressed) table describing the
881// function above:
882//
883// insn cfa r0 r1 ... ra
884// =======================================
885// func+0: sp cfa[0]
886// func+1: sp+16 cfa[0]
887// func+2: sp+16 cfa[-4] cfa[0]
888// func+11: sp+20 cfa[-4] cfa[0]
889// func+21: sp+20 cfa[0]
890// func+22: sp cfa[0]
891//
892// Some things to note here:
893//
894// - Each row describes the state of affairs *before* executing the
895// instruction at the given address. Thus, the row for func+0
896// describes the state before we allocate the stack frame. In the
897// next row, the formula for computing the CFA has changed,
898// reflecting that allocation.
899//
900// - The other entries are written in terms of the CFA; this allows
901// them to remain unchanged as the stack pointer gets bumped around.
902// For example, the rule for recovering the return address (the "ra"
903// column) remains unchanged throughout the function, even as the
904// stack pointer takes on three different offsets from the return
905// address.
906//
907// - Although we haven't shown it, most calling conventions designate
908// "callee-saves" and "caller-saves" registers. The callee must
909// preserve the values of callee-saves registers; if it uses them,
910// it must save their original values somewhere, and restore them
911// before it returns. In contrast, the callee is free to trash
912// caller-saves registers; if the callee uses these, it will
913// probably not bother to save them anywhere, and the CFI will
914// probably mark their values as "unrecoverable".
915//
916// (However, since the caller cannot assume the callee was going to
917// save them, caller-saves registers are probably dead in the caller
918// anyway, so compilers usually don't generate CFA for caller-saves
919// registers.)
920//
921// - Exactly where the CFA points is a matter of convention that
922// depends on the architecture and ABI in use. In the example, the
923// CFA is the value the stack pointer had upon entry to the
924// function, pointing at the saved return address. But on the x86,
925// the call frame information generated by GCC follows the
926// convention that the CFA is the address *after* the saved return
927// address.
928//
929// But by definition, the CFA remains constant throughout the
930// lifetime of the frame. This makes it a useful value for other
931// columns to refer to. It is also gives debuggers a useful handle
932// for identifying a frame.
933//
934// If you look at the table above, you'll notice that a given entry is
935// often the same as the one immediately above it: most instructions
936// change only one or two aspects of the stack frame, if they affect
937// it at all. The DWARF format takes advantage of this fact, and
938// reduces the size of the data by mentioning only the addresses and
939// columns at which changes take place. So for the above, DWARF CFI
940// data would only actually mention the following:
941//
942// insn cfa r0 r1 ... ra
943// =======================================
944// func+0: sp cfa[0]
945// func+1: sp+16
946// func+2: cfa[-4]
947// func+11: sp+20
948// func+21: r0
949// func+22: sp
950//
951// In fact, this is the way the parser reports CFI to the consumer: as
952// a series of statements of the form, "At address X, column Y changed
953// to Z," and related conventions for describing the initial state.
954//
955// Naturally, it would be impractical to have to scan the entire
956// program's CFI, noting changes as we go, just to recover the
957// unwinding rules in effect at one particular instruction. To avoid
958// this, CFI data is grouped into "entries", each of which covers a
959// specified range of addresses and begins with a complete statement
960// of the rules for all recoverable registers at that starting
961// address. Each entry typically covers a single function.
962//
963// Thus, to compute the contents of a given row of the table --- that
964// is, rules for recovering the CFA, RA, and registers at a given
965// instruction --- the consumer should find the entry that covers that
966// instruction's address, start with the initial state supplied at the
967// beginning of the entry, and work forward until it has processed all
968// the changes up to and including those for the present instruction.
969//
970// There are seven kinds of rules that can appear in an entry of the
971// table:
972//
973// - "undefined": The given register is not preserved by the callee;
974// its value cannot be recovered.
975//
976// - "same value": This register has the same value it did in the callee.
977//
978// - offset(N): The register is saved at offset N from the CFA.
979//
980// - val_offset(N): The value the register had in the caller is the
981// CFA plus offset N. (This is usually only useful for describing
982// the stack pointer.)
983//
984// - register(R): The register's value was saved in another register R.
985//
986// - expression(E): Evaluating the DWARF expression E using the
987// current frame's registers' values yields the address at which the
988// register was saved.
989//
990// - val_expression(E): Evaluating the DWARF expression E using the
991// current frame's registers' values yields the value the register
992// had in the caller.
993
994class CallFrameInfo {
995 public:
996 // The different kinds of entries one finds in CFI. Used internally,
997 // and for error reporting.
998 enum EntryKind { kUnknown, kCIE, kFDE, kTerminator };
999
1000 // The handler class to which the parser hands the parsed call frame
1001 // information. Defined below.
1002 class Handler;
1003
1004 // A reporter class, which CallFrameInfo uses to report errors
1005 // encountered while parsing call frame information. Defined below.
1006 class Reporter;
1007
1008 // Create a DWARF CFI parser. BUFFER points to the contents of the
1009 // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes.
1010 // REPORTER is an error reporter the parser should use to report
1011 // problems. READER is a ByteReader instance that has the endianness and
1012 // address size set properly. Report the data we find to HANDLER.
1013 //
1014 // This class can also parse Linux C++ exception handling data, as found
1015 // in '.eh_frame' sections. This data is a variant of DWARF CFI that is
1016 // placed in loadable segments so that it is present in the program's
1017 // address space, and is interpreted by the C++ runtime to search the
1018 // call stack for a handler interested in the exception being thrown,
1019 // actually pop the frames, and find cleanup code to run.
1020 //
1021 // There are two differences between the call frame information described
1022 // in the DWARF standard and the exception handling data Linux places in
1023 // the .eh_frame section:
1024 //
1025 // - Exception handling data uses uses a different format for call frame
1026 // information entry headers. The distinguished CIE id, the way FDEs
1027 // refer to their CIEs, and the way the end of the series of entries is
1028 // determined are all slightly different.
1029 //
1030 // If the constructor's EH_FRAME argument is true, then the
1031 // CallFrameInfo parses the entry headers as Linux C++ exception
1032 // handling data. If EH_FRAME is false or omitted, the CallFrameInfo
1033 // parses standard DWARF call frame information.
1034 //
1035 // - Linux C++ exception handling data uses CIE augmentation strings
1036 // beginning with 'z' to specify the presence of additional data after
1037 // the CIE and FDE headers and special encodings used for addresses in
1038 // frame description entries.
1039 //
1040 // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or
1041 // exception handling data if you have supplied READER with the base
1042 // addresses needed to interpret the pointer encodings that 'z'
1043 // augmentations can specify. See the ByteReader interface for details
1044 // about the base addresses. See the CallFrameInfo::Handler interface
1045 // for details about the additional information one might find in
1046 // 'z'-augmented data.
1047 //
1048 // Thus:
1049 //
1050 // - If you are parsing standard DWARF CFI, as found in a .debug_frame
1051 // section, you should pass false for the EH_FRAME argument, or omit
1052 // it, and you need not worry about providing READER with the
1053 // additional base addresses.
1054 //
1055 // - If you want to parse Linux C++ exception handling data from a
1056 // .eh_frame section, you should pass EH_FRAME as true, and call
1057 // READER's Set*Base member functions before calling our Start method.
1058 //
1059 // - If you want to parse DWARF CFI that uses the 'z' augmentations
1060 // (although I don't think any toolchain ever emits such data), you
1061 // could pass false for EH_FRAME, but call READER's Set*Base members.
1062 //
1063 // The extensions the Linux C++ ABI makes to DWARF for exception
1064 // handling are described here, rather poorly:
1065 // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
1066 // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
1067 //
1068 // The mechanics of C++ exception handling, personality routines,
1069 // and language-specific data areas are described here, rather nicely:
1070 // http://www.codesourcery.com/public/cxx-abi/abi-eh.html
1071 CallFrameInfo(const uint8_t* buffer, size_t buffer_length,
1072 ByteReader* reader, Handler* handler, Reporter* reporter,
1073 bool eh_frame = false)
1074 : buffer_(buffer), buffer_length_(buffer_length),
1075 reader_(reader), handler_(handler), reporter_(reporter),
1076 eh_frame_(eh_frame) { }
1077
1078 ~CallFrameInfo() { }
1079
1080 // Parse the entries in BUFFER, reporting what we find to HANDLER.
1081 // Return true if we reach the end of the section successfully, or
1082 // false if we encounter an error.
1083 bool Start();
1084
1085 // Return the textual name of KIND. For error reporting.
1086 static const char* KindName(EntryKind kind);
1087
1088 private:
1089
1090 struct CIE;
1091
1092 // A CFI entry, either an FDE or a CIE.
1093 struct Entry {
1094 // The starting offset of the entry in the section, for error
1095 // reporting.
1096 size_t offset;
1097
1098 // The start of this entry in the buffer.
1099 const uint8_t* start;
1100
1101 // Which kind of entry this is.
1102 //
1103 // We want to be able to use this for error reporting even while we're
1104 // in the midst of parsing. Error reporting code may assume that kind,
1105 // offset, and start fields are valid, although kind may be kUnknown.
1106 EntryKind kind;
1107
1108 // The end of this entry's common prologue (initial length and id), and
1109 // the start of this entry's kind-specific fields.
1110 const uint8_t* fields;
1111
1112 // The start of this entry's instructions.
1113 const uint8_t* instructions;
1114
1115 // The address past the entry's last byte in the buffer. (Note that
1116 // since offset points to the entry's initial length field, and the
1117 // length field is the number of bytes after that field, this is not
1118 // simply buffer_ + offset + length.)
1119 const uint8_t* end;
1120
1121 // For both DWARF CFI and .eh_frame sections, this is the CIE id in a
1122 // CIE, and the offset of the associated CIE in an FDE.
1123 uint64_t id;
1124
1125 // The CIE that applies to this entry, if we've parsed it. If this is a
1126 // CIE, then this field points to this structure.
1127 CIE* cie;
1128 };
1129
1130 // A common information entry (CIE).
1131 struct CIE: public Entry {
1132 uint8_t version; // CFI data version number
1133 string augmentation; // vendor format extension markers
1134 uint64_t code_alignment_factor; // scale for code address adjustments
1135 int data_alignment_factor; // scale for stack pointer adjustments
1136 unsigned return_address_register; // which register holds the return addr
1137
1138 // True if this CIE includes Linux C++ ABI 'z' augmentation data.
1139 bool has_z_augmentation;
1140
1141 // Parsed 'z' augmentation data. These are meaningful only if
1142 // has_z_augmentation is true.
1143 bool has_z_lsda; // The 'z' augmentation included 'L'.
1144 bool has_z_personality; // The 'z' augmentation included 'P'.
1145 bool has_z_signal_frame; // The 'z' augmentation included 'S'.
1146
1147 // If has_z_lsda is true, this is the encoding to be used for language-
1148 // specific data area pointers in FDEs.
1149 DwarfPointerEncoding lsda_encoding;
1150
1151 // If has_z_personality is true, this is the encoding used for the
1152 // personality routine pointer in the augmentation data.
1153 DwarfPointerEncoding personality_encoding;
1154
1155 // If has_z_personality is true, this is the address of the personality
1156 // routine --- or, if personality_encoding & DW_EH_PE_indirect, the
1157 // address where the personality routine's address is stored.
1158 uint64_t personality_address;
1159
1160 // This is the encoding used for addresses in the FDE header and
1161 // in DW_CFA_set_loc instructions. This is always valid, whether
1162 // or not we saw a 'z' augmentation string; its default value is
1163 // DW_EH_PE_absptr, which is what normal DWARF CFI uses.
1164 DwarfPointerEncoding pointer_encoding;
1165
1166 // These were only introduced in DWARF4, so will not be set in older
1167 // versions.
1168 uint8_t address_size;
1169 uint8_t segment_size;
1170 };
1171
1172 // A frame description entry (FDE).
1173 struct FDE: public Entry {
1174 uint64_t address; // start address of described code
1175 uint64_t size; // size of described code, in bytes
1176
1177 // If cie->has_z_lsda is true, then this is the language-specific data
1178 // area's address --- or its address's address, if cie->lsda_encoding
1179 // has the DW_EH_PE_indirect bit set.
1180 uint64_t lsda_address;
1181 };
1182
1183 // Internal use.
1184 class Rule;
1185 class UndefinedRule;
1186 class SameValueRule;
1187 class OffsetRule;
1188 class ValOffsetRule;
1189 class RegisterRule;
1190 class ExpressionRule;
1191 class ValExpressionRule;
1192 class RuleMap;
1193 class State;
1194
1195 // Parse the initial length and id of a CFI entry, either a CIE, an FDE,
1196 // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the
1197 // data to parse. On success, populate ENTRY as appropriate, and return
1198 // true. On failure, report the problem, and return false. Even if we
1199 // return false, set ENTRY->end to the first byte after the entry if we
1200 // were able to figure that out, or NULL if we weren't.
1201 bool ReadEntryPrologue(const uint8_t* cursor, Entry* entry);
1202
1203 // Parse the fields of a CIE after the entry prologue, including any 'z'
1204 // augmentation data. Assume that the 'Entry' fields of CIE are
1205 // populated; use CIE->fields and CIE->end as the start and limit for
1206 // parsing. On success, populate the rest of *CIE, and return true; on
1207 // failure, report the problem and return false.
1208 bool ReadCIEFields(CIE* cie);
1209
1210 // Parse the fields of an FDE after the entry prologue, including any 'z'
1211 // augmentation data. Assume that the 'Entry' fields of *FDE are
1212 // initialized; use FDE->fields and FDE->end as the start and limit for
1213 // parsing. Assume that FDE->cie is fully initialized. On success,
1214 // populate the rest of *FDE, and return true; on failure, report the
1215 // problem and return false.
1216 bool ReadFDEFields(FDE* fde);
1217
1218 // Report that ENTRY is incomplete, and return false. This is just a
1219 // trivial wrapper for invoking reporter_->Incomplete; it provides a
1220 // little brevity.
1221 bool ReportIncomplete(Entry* entry);
1222
1223 // Return true if ENCODING has the DW_EH_PE_indirect bit set.
1224 static bool IsIndirectEncoding(DwarfPointerEncoding encoding) {
1225 return encoding & DW_EH_PE_indirect;
1226 }
1227
1228 // The contents of the DWARF .debug_info section we're parsing.
1229 const uint8_t* buffer_;
1230 size_t buffer_length_;
1231
1232 // For reading multi-byte values with the appropriate endianness.
1233 ByteReader* reader_;
1234
1235 // The handler to which we should report the data we find.
1236 Handler* handler_;
1237
1238 // For reporting problems in the info we're parsing.
1239 Reporter* reporter_;
1240
1241 // True if we are processing .eh_frame-format data.
1242 bool eh_frame_;
1243};
1244
1245// The handler class for CallFrameInfo. The a CFI parser calls the
1246// member functions of a handler object to report the data it finds.
1247class CallFrameInfo::Handler {
1248 public:
1249 // The pseudo-register number for the canonical frame address.
1250 enum { kCFARegister = -1 };
1251
1252 Handler() { }
1253 virtual ~Handler() { }
1254
1255 // The parser has found CFI for the machine code at ADDRESS,
1256 // extending for LENGTH bytes. OFFSET is the offset of the frame
1257 // description entry in the section, for use in error messages.
1258 // VERSION is the version number of the CFI format. AUGMENTATION is
1259 // a string describing any producer-specific extensions present in
1260 // the data. RETURN_ADDRESS is the number of the register that holds
1261 // the address to which the function should return.
1262 //
1263 // Entry should return true to process this CFI, or false to skip to
1264 // the next entry.
1265 //
1266 // The parser invokes Entry for each Frame Description Entry (FDE)
1267 // it finds. The parser doesn't report Common Information Entries
1268 // to the handler explicitly; instead, if the handler elects to
1269 // process a given FDE, the parser reiterates the appropriate CIE's
1270 // contents at the beginning of the FDE's rules.
1271 virtual bool Entry(size_t offset, uint64_t address, uint64_t length,
1272 uint8_t version, const string& augmentation,
1273 unsigned return_address) = 0;
1274
1275 // When the Entry function returns true, the parser calls these
1276 // handler functions repeatedly to describe the rules for recovering
1277 // registers at each instruction in the given range of machine code.
1278 // Immediately after a call to Entry, the handler should assume that
1279 // the rule for each callee-saves register is "unchanged" --- that
1280 // is, that the register still has the value it had in the caller.
1281 //
1282 // If a *Rule function returns true, we continue processing this entry's
1283 // instructions. If a *Rule function returns false, we stop evaluating
1284 // instructions, and skip to the next entry. Either way, we call End
1285 // before going on to the next entry.
1286 //
1287 // In all of these functions, if the REG parameter is kCFARegister, then
1288 // the rule describes how to find the canonical frame address.
1289 // kCFARegister may be passed as a BASE_REGISTER argument, meaning that
1290 // the canonical frame address should be used as the base address for the
1291 // computation. All other REG values will be positive.
1292
1293 // At ADDRESS, register REG's value is not recoverable.
1294 virtual bool UndefinedRule(uint64_t address, int reg) = 0;
1295
1296 // At ADDRESS, register REG's value is the same as that it had in
1297 // the caller.
1298 virtual bool SameValueRule(uint64_t address, int reg) = 0;
1299
1300 // At ADDRESS, register REG has been saved at offset OFFSET from
1301 // BASE_REGISTER.
1302 virtual bool OffsetRule(uint64_t address, int reg,
1303 int base_register, long offset) = 0;
1304
1305 // At ADDRESS, the caller's value of register REG is the current
1306 // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an
1307 // address at which the register's value is saved.)
1308 virtual bool ValOffsetRule(uint64_t address, int reg,
1309 int base_register, long offset) = 0;
1310
1311 // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs
1312 // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that
1313 // BASE_REGISTER is the "home" for REG's saved value: if you want to
1314 // assign to a variable whose home is REG in the calling frame, you
1315 // should put the value in BASE_REGISTER.
1316 virtual bool RegisterRule(uint64_t address, int reg, int base_register) = 0;
1317
1318 // At ADDRESS, the DWARF expression EXPRESSION yields the address at
1319 // which REG was saved.
1320 virtual bool ExpressionRule(uint64_t address, int reg,
1321 const string& expression) = 0;
1322
1323 // At ADDRESS, the DWARF expression EXPRESSION yields the caller's
1324 // value for REG. (This rule doesn't provide an address at which the
1325 // register's value is saved.)
1326 virtual bool ValExpressionRule(uint64_t address, int reg,
1327 const string& expression) = 0;
1328
1329 // Indicate that the rules for the address range reported by the
1330 // last call to Entry are complete. End should return true if
1331 // everything is okay, or false if an error has occurred and parsing
1332 // should stop.
1333 virtual bool End() = 0;
1334
1335 // Handler functions for Linux C++ exception handling data. These are
1336 // only called if the data includes 'z' augmentation strings.
1337
1338 // The Linux C++ ABI uses an extension of the DWARF CFI format to
1339 // walk the stack to propagate exceptions from the throw to the
1340 // appropriate catch, and do the appropriate cleanups along the way.
1341 // CFI entries used for exception handling have two additional data
1342 // associated with them:
1343 //
1344 // - The "language-specific data area" describes which exception
1345 // types the function has 'catch' clauses for, and indicates how
1346 // to go about re-entering the function at the appropriate catch
1347 // clause. If the exception is not caught, it describes the
1348 // destructors that must run before the frame is popped.
1349 //
1350 // - The "personality routine" is responsible for interpreting the
1351 // language-specific data area's contents, and deciding whether
1352 // the exception should continue to propagate down the stack,
1353 // perhaps after doing some cleanup for this frame, or whether the
1354 // exception will be caught here.
1355 //
1356 // In principle, the language-specific data area is opaque to
1357 // everybody but the personality routine. In practice, these values
1358 // may be useful or interesting to readers with extra context, and
1359 // we have to at least skip them anyway, so we might as well report
1360 // them to the handler.
1361
1362 // This entry's exception handling personality routine's address is
1363 // ADDRESS. If INDIRECT is true, then ADDRESS is the address at
1364 // which the routine's address is stored. The default definition for
1365 // this handler function simply returns true, allowing parsing of
1366 // the entry to continue.
1367 virtual bool PersonalityRoutine(uint64_t address, bool indirect) {
1368 return true;
1369 }
1370
1371 // This entry's language-specific data area (LSDA) is located at
1372 // ADDRESS. If INDIRECT is true, then ADDRESS is the address at
1373 // which the area's address is stored. The default definition for
1374 // this handler function simply returns true, allowing parsing of
1375 // the entry to continue.
1376 virtual bool LanguageSpecificDataArea(uint64_t address, bool indirect) {
1377 return true;
1378 }
1379
1380 // This entry describes a signal trampoline --- this frame is the
1381 // caller of a signal handler. The default definition for this
1382 // handler function simply returns true, allowing parsing of the
1383 // entry to continue.
1384 //
1385 // The best description of the rationale for and meaning of signal
1386 // trampoline CFI entries seems to be in the GCC bug database:
1387 // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208
1388 virtual bool SignalHandler() { return true; }
1389};
1390
1391// The CallFrameInfo class makes calls on an instance of this class to
1392// report errors or warn about problems in the data it is parsing. The
1393// default definitions of these methods print a message to stderr, but
1394// you can make a derived class that overrides them.
1395class CallFrameInfo::Reporter {
1396 public:
1397 // Create an error reporter which attributes troubles to the section
1398 // named SECTION in FILENAME.
1399 //
1400 // Normally SECTION would be .debug_frame, but the Mac puts CFI data
1401 // in a Mach-O section named __debug_frame. If we support
1402 // Linux-style exception handling data, we could be reading an
1403 // .eh_frame section.
1404 Reporter(const string& filename,
1405 const string& section = ".debug_frame")
1406 : filename_(filename), section_(section) { }
1407 virtual ~Reporter() { }
1408
1409 // The CFI entry at OFFSET ends too early to be well-formed. KIND
1410 // indicates what kind of entry it is; KIND can be kUnknown if we
1411 // haven't parsed enough of the entry to tell yet.
1412 virtual void Incomplete(uint64_t offset, CallFrameInfo::EntryKind kind);
1413
1414 // The .eh_frame data has a four-byte zero at OFFSET where the next
1415 // entry's length would be; this is a terminator. However, the buffer
1416 // length as given to the CallFrameInfo constructor says there should be
1417 // more data.
1418 virtual void EarlyEHTerminator(uint64_t offset);
1419
1420 // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the
1421 // section is not that large.
1422 virtual void CIEPointerOutOfRange(uint64_t offset, uint64_t cie_offset);
1423
1424 // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry
1425 // there is not a CIE.
1426 virtual void BadCIEId(uint64_t offset, uint64_t cie_offset);
1427
1428 // The FDE at OFFSET refers to a CIE with an address size we don't know how
1429 // to handle.
1430 virtual void UnexpectedAddressSize(uint64_t offset, uint8_t address_size);
1431
1432 // The FDE at OFFSET refers to a CIE with an segment descriptor size we
1433 // don't know how to handle.
1434 virtual void UnexpectedSegmentSize(uint64_t offset, uint8_t segment_size);
1435
1436 // The FDE at OFFSET refers to a CIE with version number VERSION,
1437 // which we don't recognize. We cannot parse DWARF CFI if it uses
1438 // a version number we don't recognize.
1439 virtual void UnrecognizedVersion(uint64_t offset, int version);
1440
1441 // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION,
1442 // which we don't recognize. We cannot parse DWARF CFI if it uses
1443 // augmentations we don't recognize.
1444 virtual void UnrecognizedAugmentation(uint64_t offset,
1445 const string& augmentation);
1446
1447 // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not
1448 // a valid encoding.
1449 virtual void InvalidPointerEncoding(uint64_t offset, uint8_t encoding);
1450
1451 // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends
1452 // on a base address which has not been supplied.
1453 virtual void UnusablePointerEncoding(uint64_t offset, uint8_t encoding);
1454
1455 // The CIE at OFFSET contains a DW_CFA_restore instruction at
1456 // INSN_OFFSET, which may not appear in a CIE.
1457 virtual void RestoreInCIE(uint64_t offset, uint64_t insn_offset);
1458
1459 // The entry at OFFSET, of kind KIND, has an unrecognized
1460 // instruction at INSN_OFFSET.
1461 virtual void BadInstruction(uint64_t offset, CallFrameInfo::EntryKind kind,
1462 uint64_t insn_offset);
1463
1464 // The instruction at INSN_OFFSET in the entry at OFFSET, of kind
1465 // KIND, establishes a rule that cites the CFA, but we have not
1466 // established a CFA rule yet.
1467 virtual void NoCFARule(uint64_t offset, CallFrameInfo::EntryKind kind,
1468 uint64_t insn_offset);
1469
1470 // The instruction at INSN_OFFSET in the entry at OFFSET, of kind
1471 // KIND, is a DW_CFA_restore_state instruction, but the stack of
1472 // saved states is empty.
1473 virtual void EmptyStateStack(uint64_t offset, CallFrameInfo::EntryKind kind,
1474 uint64_t insn_offset);
1475
1476 // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry
1477 // at OFFSET, of kind KIND, would restore a state that has no CFA
1478 // rule, whereas the current state does have a CFA rule. This is
1479 // bogus input, which the CallFrameInfo::Handler interface doesn't
1480 // (and shouldn't) have any way to report.
1481 virtual void ClearingCFARule(uint64_t offset, CallFrameInfo::EntryKind kind,
1482 uint64_t insn_offset);
1483
1484 protected:
1485 // The name of the file whose CFI we're reading.
1486 string filename_;
1487
1488 // The name of the CFI section in that file.
1489 string section_;
1490};
1491
1492} // namespace google_breakpad
1493
1494#endif // UTIL_DEBUGINFO_DWARF2READER_H__
1495