dwarf2reader.h source code [breakpad/common/dwarf/dwarf2reader.h]

1	// -- mode: C++ --
2
3	// Copyright (c) 2010 Google Inc. All Rights Reserved.
4	//
5	// Redistribution and use in source and binary forms, with or without
6	// modification, are permitted provided that the following conditions are
7	// met:
8	//
9	// Redistributions of source code must retain the above copyright*
10	// notice, this list of conditions and the following disclaimer.
11	// Redistributions in binary form must reproduce the above*
12	// copyright notice, this list of conditions and the following disclaimer
13	// in the documentation and/or other materials provided with the
14	// distribution.
15	// Neither the name of Google Inc. nor the names of its*
16	// contributors may be used to endorse or promote products derived from
17	// this software without specific prior written permission.
18	//
19	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31	// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
32
33	// This file contains definitions related to the DWARF2/3 reader and
34	// it's handler interfaces.
35	// The DWARF2/3 specification can be found at
36	// http://dwarf.freestandards.org and should be considered required
37	// reading if you wish to modify the implementation.
38	// Only a cursory attempt is made to explain terminology that is
39	// used here, as it is much better explained in the standard documents
40	#ifndef COMMON_DWARF_DWARF2READER_H__
41	#define COMMON_DWARF_DWARF2READER_H__
42
43	#include <assert.h>
44	#include <stdint.h>
45
46	#include <list>
47	#include <map>
48	#include <string>
49	#include <utility>
50	#include <vector>
51	#include <memory>
52
53	#include "common/dwarf/bytereader.h"
54	#include "common/dwarf/dwarf2enums.h"
55	#include "common/dwarf/types.h"
56	#include "common/using_std_string.h"
57	#include "common/dwarf/elf_reader.h"
58
59	namespace google_breakpad {
60	struct LineStateMachine;
61	class Dwarf2Handler;
62	class LineInfoHandler;
63	class DwpReader;
64
65	// This maps from a string naming a section to a pair containing a
66	// the data for the section, and the size of the section.
67	typedef std::map<string, std::pair<const uint8_t*, uint64_t> > SectionMap;
68
69	// Abstract away the difference between elf and mach-o section names.
70	// Elf-names use ".section_name, mach-o uses "__section_name". Pass "name" in
71	// the elf form, ".section_name".
72	const SectionMap::const_iterator GetSectionByName(const SectionMap&
73	sections, const char* name);
74
75	// Most of the time, this struct functions as a simple attribute and form pair.
76	// However, Dwarf5 DW_FORM_implicit_const means that a form may have its value
77	// in line in the abbrev table, and that value must be associated with the
78	// pair until the attr's value is needed.
79	struct AttrForm {
80	AttrForm(enum DwarfAttribute attr, enum DwarfForm form, uint64_t value) :
81	attr_(attr), form_(form), value_(value) { }
82
83	enum DwarfAttribute attr_;
84	enum DwarfForm form_;
85	uint64_t value_;
86	};
87	typedef std::list<AttrForm> AttributeList;
88	typedef AttributeList::iterator AttributeIterator;
89	typedef AttributeList::const_iterator ConstAttributeIterator;
90
91	struct LineInfoHeader {
92	uint64_t total_length;
93	uint16_t version;
94	uint64_t prologue_length;
95	uint8_t min_insn_length; // insn stands for instructin
96	bool default_is_stmt; // stmt stands for statement
97	int8_t line_base;
98	uint8_t line_range;
99	uint8_t opcode_base;
100	// Use a pointer so that signalsafe_addr2line is able to use this structure
101	// without heap allocation problem.
102	std::vector<unsigned char>* std_opcode_lengths;
103	};
104
105	class LineInfo {
106	public:
107
108	// Initializes a .debug_line reader. Buffer and buffer length point
109	// to the beginning and length of the line information to read.
110	// Reader is a ByteReader class that has the endianness set
111	// properly.
112	LineInfo(const uint8_t* buffer, uint64_t buffer_length,
113	ByteReader* reader, const uint8_t* string_buffer,
114	size_t string_buffer_length, const uint8_t* line_string_buffer,
115	size_t line_string_buffer_length, LineInfoHandler* handler);
116
117	virtual ~LineInfo() {
118	if (header_.std_opcode_lengths) {
119	delete header_.std_opcode_lengths;
120	}
121	}
122
123	// Start processing line info, and calling callbacks in the handler.
124	// Consumes the line number information for a single compilation unit.
125	// Returns the number of bytes processed.
126	uint64_t Start();
127
128	// Process a single line info opcode at START using the state
129	// machine at LSM. Return true if we should define a line using the
130	// current state of the line state machine. Place the length of the
131	// opcode in LEN.
132	// If LSM_PASSES_PC is non-NULL, this function also checks if the lsm
133	// passes the address of PC. In other words, LSM_PASSES_PC will be
134	// set to true, if the following condition is met.
135	//
136	// lsm's old address < PC <= lsm's new address
137	static bool ProcessOneOpcode(ByteReader* reader,
138	LineInfoHandler* handler,
139	const struct LineInfoHeader& header,
140	const uint8_t* start,
141	struct LineStateMachine* lsm,
142	size_t* len,
143	uintptr pc,
144	bool* lsm_passes_pc);
145
146	private:
147	// Reads the DWARF2/3 header for this line info.
148	void ReadHeader();
149
150	// Reads the DWARF2/3 line information
151	void ReadLines();
152
153	// Read the DWARF5 types and forms for the file and directory tables.
154	void ReadTypesAndForms(const uint8_t** lineptr, uint32_t* content_types,
155	uint32_t* content_forms, uint32_t max_types,
156	uint32_t* format_count);
157
158	// Read a row from the dwarf5 LineInfo file table.
159	void ReadFileRow(const uint8_t** lineptr, const uint32_t* content_types,
160	const uint32_t* content_forms, uint32_t row,
161	uint32_t format_count);
162
163	// Read and return the data at lineptr according to form. Advance*
164	// lineptr appropriately.*
165	uint64_t ReadUnsignedData(uint32_t form, const uint8_t** lineptr);
166
167	// Read and return the data at lineptr according to form. Advance*
168	// lineptr appropriately.*
169	const char* ReadStringForm(uint32_t form, const uint8_t** lineptr);
170
171	// The associated handler to call processing functions in
172	LineInfoHandler* handler_;
173
174	// The associated ByteReader that handles endianness issues for us
175	ByteReader* reader_;
176
177	// A DWARF line info header. This is not the same size as in the actual file,
178	// as the one in the file may have a 32 bit or 64 bit lengths
179
180	struct LineInfoHeader header_;
181
182	// buffer is the buffer for our line info, starting at exactly where
183	// the line info to read is. after_header is the place right after
184	// the end of the line information header.
185	const uint8_t* buffer_;
186	#ifndef NDEBUG
187	uint64_t buffer_length_;
188	#endif
189	// Convenience pointers into .debug_str and .debug_line_str. These exactly
190	// correspond to those in the compilation unit.
191	const uint8_t* string_buffer_;
192	#ifndef NDEBUG
193	uint64_t string_buffer_length_;
194	#endif
195	const uint8_t* line_string_buffer_;
196	#ifndef NDEBUG
197	uint64_t line_string_buffer_length_;
198	#endif
199
200	const uint8_t* after_header_;
201	};
202
203	// This class is the main interface between the line info reader and
204	// the client. The virtual functions inside this get called for
205	// interesting events that happen during line info reading. The
206	// default implementation does nothing
207
208	class LineInfoHandler {
209	public:
210	LineInfoHandler() { }
211
212	virtual ~LineInfoHandler() { }
213
214	// Called when we define a directory. NAME is the directory name,
215	// DIR_NUM is the directory number
216	virtual void DefineDir(const string& name, uint32_t dir_num) { }
217
218	// Called when we define a filename. NAME is the filename, FILE_NUM
219	// is the file number which is -1 if the file index is the next
220	// index after the last numbered index (this happens when files are
221	// dynamically defined by the line program), DIR_NUM is the
222	// directory index for the directory name of this file, MOD_TIME is
223	// the modification time of the file, and LENGTH is the length of
224	// the file
225	virtual void DefineFile(const string& name, int32_t file_num,
226	uint32_t dir_num, uint64_t mod_time,
227	uint64_t length) { }
228
229	// Called when the line info reader has a new line, address pair
230	// ready for us. ADDRESS is the address of the code, LENGTH is the
231	// length of its machine code in bytes, FILE_NUM is the file number
232	// containing the code, LINE_NUM is the line number in that file for
233	// the code, and COLUMN_NUM is the column number the code starts at,
234	// if we know it (0 otherwise).
235	virtual void AddLine(uint64_t address, uint64_t length,
236	uint32_t file_num, uint32_t line_num, uint32_t column_num) { }
237	};
238
239	class RangeListHandler {
240	public:
241	RangeListHandler() { }
242
243	virtual ~RangeListHandler() { }
244
245	// Add a range.
246	virtual void AddRange(uint64_t begin, uint64_t end) { };
247
248	// Finish processing the range list.
249	virtual void Finish() { };
250	};
251
252	class RangeListReader {
253	public:
254	// Reading a range list requires quite a bit of information
255	// from the compilation unit. Package it conveniently.
256	struct CURangesInfo {
257	CURangesInfo() :
258	version_(`0`), base_address_(`0`), ranges_base_(`0`),
259	buffer_(nullptr), size_(`0`), addr_buffer_(nullptr),
260	addr_buffer_size_(`0`), addr_base_(`0`) { }
261
262	uint16_t version_;
263	// Ranges base address. Ordinarily the CU's low_pc.
264	uint64_t base_address_;
265	// Offset into .debug_rnglists for this CU's rangelists.
266	uint64_t ranges_base_;
267	// Contents of either .debug_ranges or .debug_rnglists.
268	const uint8_t* buffer_;
269	uint64_t size_;
270	// Contents of .debug_addr. This cu's contribution starts at
271	// addr_base_
272	const uint8_t* addr_buffer_;
273	uint64_t addr_buffer_size_;
274	uint64_t addr_base_;
275	};
276
277	RangeListReader(ByteReader* reader, CURangesInfo* cu_info,
278	RangeListHandler* handler) :
279	reader_(reader), cu_info_(cu_info), handler_(handler),
280	offset_array_(`0`) { }
281
282	// Read ranges from cu_info as specified by form and data.
283	bool ReadRanges(enum DwarfForm form, uint64_t data);
284
285	private:
286	// Read dwarf4 .debug_ranges at offset.
287	bool ReadDebugRanges(uint64_t offset);
288	// Read dwarf5 .debug_rngslist at offset.
289	bool ReadDebugRngList(uint64_t offset);
290
291	// Convenience functions to handle the mechanics of reading entries in the
292	// ranges section.
293	uint64_t ReadULEB(uint64_t offset, uint64_t* value) {
294	size_t len;
295	*value = reader_->ReadUnsignedLEB128(cu_info_->buffer_ + offset, &len);
296	return len;
297	}
298
299	uint64_t ReadAddress(uint64_t offset, uint64_t* value) {
300	*value = reader_->ReadAddress(cu_info_->buffer_ + offset);
301	return reader_->AddressSize();
302	}
303
304	// Read the address at this CU's addr_index in the .debug_addr section.
305	uint64_t GetAddressAtIndex(uint64_t addr_index) {
306	assert(cu_info_->addr_buffer_ != nullptr);
307	uint64_t offset =
308	cu_info_->addr_base_ + addr_index * reader_->AddressSize();
309	assert(offset < cu_info_->addr_buffer_size_);
310	return reader_->ReadAddress(cu_info_->addr_buffer_ + offset);
311	}
312
313	ByteReader* reader_;
314	CURangesInfo* cu_info_;
315	RangeListHandler* handler_;
316	uint64_t offset_array_;
317	};
318
319	// This class is the main interface between the reader and the
320	// client. The virtual functions inside this get called for
321	// interesting events that happen during DWARF2 reading.
322	// The default implementation skips everything.
323	class Dwarf2Handler {
324	public:
325	Dwarf2Handler() { }
326
327	virtual ~Dwarf2Handler() { }
328
329	// Start to process a compilation unit at OFFSET from the beginning of the
330	// .debug_info section. Return false if you would like to skip this
331	// compilation unit.
332	virtual bool StartCompilationUnit(uint64_t offset, uint8_t address_size,
333	uint8_t offset_size, uint64_t cu_length,
334	uint8_t dwarf_version) { return false; }
335
336	// When processing a skeleton compilation unit, resulting from a split
337	// DWARF compilation, once the skeleton debug info has been read,
338	// the reader will call this function to ask the client if it needs
339	// the full debug info from the .dwo or .dwp file. Return true if
340	// you need it, or false to skip processing the split debug info.
341	virtual bool NeedSplitDebugInfo() { return true; }
342
343	// Start to process a split compilation unit at OFFSET from the beginning of
344	// the debug_info section in the .dwp/.dwo file. Return false if you would
345	// like to skip this compilation unit.
346	virtual bool StartSplitCompilationUnit(uint64_t offset,
347	uint64_t cu_length) { return false; }
348
349	// Start to process a DIE at OFFSET from the beginning of the .debug_info
350	// section. Return false if you would like to skip this DIE.
351	virtual bool StartDIE(uint64_t offset, enum DwarfTag tag) { return false; }
352
353	// Called when we have an attribute with unsigned data to give to our
354	// handler. The attribute is for the DIE at OFFSET from the beginning of the
355	// .debug_info section. Its name is ATTR, its form is FORM, and its value is
356	// DATA.
357	virtual void ProcessAttributeUnsigned(uint64_t offset,
358	enum DwarfAttribute attr,
359	enum DwarfForm form,
360	uint64_t data) { }
361
362	// Called when we have an attribute with signed data to give to our handler.
363	// The attribute is for the DIE at OFFSET from the beginning of the
364	// .debug_info section. Its name is ATTR, its form is FORM, and its value is
365	// DATA.
366	virtual void ProcessAttributeSigned(uint64_t offset,
367	enum DwarfAttribute attr,
368	enum DwarfForm form,
369	int64_t data) { }
370
371	// Called when we have an attribute whose value is a reference to
372	// another DIE. The attribute belongs to the DIE at OFFSET from the
373	// beginning of the .debug_info section. Its name is ATTR, its form
374	// is FORM, and the offset of the DIE being referred to from the
375	// beginning of the .debug_info section is DATA.
376	virtual void ProcessAttributeReference(uint64_t offset,
377	enum DwarfAttribute attr,
378	enum DwarfForm form,
379	uint64_t data) { }
380
381	// Called when we have an attribute with a buffer of data to give to our
382	// handler. The attribute is for the DIE at OFFSET from the beginning of the
383	// .debug_info section. Its name is ATTR, its form is FORM, DATA points to
384	// the buffer's contents, and its length in bytes is LENGTH. The buffer is
385	// owned by the caller, not the callee, and may not persist for very long.
386	// If you want the data to be available later, it needs to be copied.
387	virtual void ProcessAttributeBuffer(uint64_t offset,
388	enum DwarfAttribute attr,
389	enum DwarfForm form,
390	const uint8_t* data,
391	uint64_t len) { }
392
393	// Called when we have an attribute with string data to give to our handler.
394	// The attribute is for the DIE at OFFSET from the beginning of the
395	// .debug_info section. Its name is ATTR, its form is FORM, and its value is
396	// DATA.
397	virtual void ProcessAttributeString(uint64_t offset,
398	enum DwarfAttribute attr,
399	enum DwarfForm form,
400	const string& data) { }
401
402	// Called when we have an attribute whose value is the 64-bit signature
403	// of a type unit in the .debug_types section. OFFSET is the offset of
404	// the DIE whose attribute we're reporting. ATTR and FORM are the
405	// attribute's name and form. SIGNATURE is the type unit's signature.
406	virtual void ProcessAttributeSignature(uint64_t offset,
407	enum DwarfAttribute attr,
408	enum DwarfForm form,
409	uint64_t signature) { }
410
411	// Called when finished processing the DIE at OFFSET.
412	// Because DWARF2/3 specifies a tree of DIEs, you may get starts
413	// before ends of the previous DIE, as we process children before
414	// ending the parent.
415	virtual void EndDIE(uint64_t offset) { }
416
417	};
418
419	// The base of DWARF2/3 debug info is a DIE (Debugging Information
420	// Entry.
421	// DWARF groups DIE's into a tree and calls the root of this tree a
422	// "compilation unit". Most of the time, there is one compilation
423	// unit in the .debug_info section for each file that had debug info
424	// generated.
425	// Each DIE consists of
426
427	// 1. a tag specifying a thing that is being described (ie
428	// DW_TAG_subprogram for functions, DW_TAG_variable for variables, etc
429	// 2. attributes (such as DW_AT_location for location in memory,
430	// DW_AT_name for name), and data for each attribute.
431	// 3. A flag saying whether the DIE has children or not
432
433	// In order to gain some amount of compression, the format of
434	// each DIE (tag name, attributes and data forms for the attributes)
435	// are stored in a separate table called the "abbreviation table".
436	// This is done because a large number of DIEs have the exact same tag
437	// and list of attributes, but different data for those attributes.
438	// As a result, the .debug_info section is just a stream of data, and
439	// requires reading of the .debug_abbrev section to say what the data
440	// means.
441
442	// As a warning to the user, it should be noted that the reason for
443	// using absolute offsets from the beginning of .debug_info is that
444	// DWARF2/3 supports referencing DIE's from other DIE's by their offset
445	// from either the current compilation unit start, or* the beginning*
446	// of the .debug_info section. This means it is possible to reference
447	// a DIE in one compilation unit from a DIE in another compilation
448	// unit. This style of reference is usually used to eliminate
449	// duplicated information that occurs across compilation
450	// units, such as base types, etc. GCC 3.4+ support this with
451	// -feliminate-dwarf2-dups. Other toolchains will sometimes do
452	// duplicate elimination in the linker.
453
454	class CompilationUnit {
455	public:
456
457	// Initialize a compilation unit. This requires a map of sections,
458	// the offset of this compilation unit in the .debug_info section, a
459	// ByteReader, and a Dwarf2Handler class to call callbacks in.
460	CompilationUnit(const string& path, const SectionMap& sections,
461	uint64_t offset, ByteReader* reader, Dwarf2Handler* handler);
462	virtual ~CompilationUnit() {
463	if (abbrevs_) delete abbrevs_;
464	}
465
466	// Initialize a compilation unit from a .dwo or .dwp file.
467	// In this case, we need the .debug_addr section from the
468	// executable file that contains the corresponding skeleton
469	// compilation unit. We also inherit the Dwarf2Handler from
470	// the executable file, and call it as if we were still
471	// processing the original compilation unit.
472	void SetSplitDwarf(const uint8_t* addr_buffer, uint64_t addr_buffer_length,
473	uint64_t addr_base, uint64_t ranges_base, uint64_t dwo_id);
474
475	// Begin reading a Dwarf2 compilation unit, and calling the
476	// callbacks in the Dwarf2Handler
477
478	// Return the full length of the compilation unit, including
479	// headers. This plus the starting offset passed to the constructor
480	// is the offset of the end of the compilation unit --- and the
481	// start of the next compilation unit, if there is one.
482	uint64_t Start();
483
484	private:
485
486	// This struct represents a single DWARF2/3 abbreviation
487	// The abbreviation tells how to read a DWARF2/3 DIE, and consist of a
488	// tag and a list of attributes, as well as the data form of each attribute.
489	struct Abbrev {
490	uint64_t number;
491	enum DwarfTag tag;
492	bool has_children;
493	AttributeList attributes;
494	};
495
496	// A DWARF2/3 compilation unit header. This is not the same size as
497	// in the actual file, as the one in the file may have a 32 bit or
498	// 64 bit length.
499	struct CompilationUnitHeader {
500	uint64_t length;
501	uint16_t version;
502	uint64_t abbrev_offset;
503	uint8_t address_size;
504	} header_;
505
506	// Reads the DWARF2/3 header for this compilation unit.
507	void ReadHeader();
508
509	// Reads the DWARF2/3 abbreviations for this compilation unit
510	void ReadAbbrevs();
511
512	// Read the abbreviation offset for this compilation unit
513	size_t ReadAbbrevOffset(const uint8_t* headerptr);
514
515	// Read the address size for this compilation unit
516	size_t ReadAddressSize(const uint8_t* headerptr);
517
518	// Read the DWO id from a split or skeleton compilation unit header
519	size_t ReadDwoId(const uint8_t* headerptr);
520
521	// Read the type signature from a type or split type compilation unit header
522	size_t ReadTypeSignature(const uint8_t* headerptr);
523
524	// Read the DWO id from a split or skeleton compilation unit header
525	size_t ReadTypeOffset(const uint8_t* headerptr);
526
527	// Processes a single DIE for this compilation unit and return a new
528	// pointer just past the end of it
529	const uint8_t* ProcessDIE(uint64_t dieoffset,
530	const uint8_t* start,
531	const Abbrev& abbrev);
532
533	// Processes a single attribute and return a new pointer just past the
534	// end of it
535	const uint8_t* ProcessAttribute(uint64_t dieoffset,
536	const uint8_t* start,
537	enum DwarfAttribute attr,
538	enum DwarfForm form,
539	uint64_t implicit_const);
540
541	// Special version of ProcessAttribute, for finding str_offsets_base and
542	// DW_AT_addr_base in DW_TAG_compile_unit, for DWARF v5.
543	const uint8_t* ProcessOffsetBaseAttribute(uint64_t dieoffset,
544	const uint8_t* start,
545	enum DwarfAttribute attr,
546	enum DwarfForm form,
547	uint64_t implicit_const);
548
549	// Called when we have an attribute with unsigned data to give to
550	// our handler. The attribute is for the DIE at OFFSET from the
551	// beginning of compilation unit, has a name of ATTR, a form of
552	// FORM, and the actual data of the attribute is in DATA.
553	// If we see a DW_AT_GNU_dwo_id attribute, save the value so that
554	// we can find the debug info in a .dwo or .dwp file.
555	void ProcessAttributeUnsigned(uint64_t offset,
556	enum DwarfAttribute attr,
557	enum DwarfForm form,
558	uint64_t data) {
559	if (attr == DW_AT_GNU_dwo_id) {
560	dwo_id_ = data;
561	}
562	else if (attr == DW_AT_GNU_addr_base \|\| attr == DW_AT_addr_base) {
563	addr_base_ = data;
564	}
565	else if (attr == DW_AT_str_offsets_base) {
566	str_offsets_base_ = data;
567	}
568	else if (attr == DW_AT_GNU_ranges_base \|\| attr == DW_AT_rnglists_base) {
569	ranges_base_ = data;
570	}
571	// TODO(yunlian): When we add DW_AT_ranges_base from DWARF-5,
572	// that base will apply to DW_AT_ranges attributes in the
573	// skeleton CU as well as in the .dwo/.dwp files.
574	else if (attr == DW_AT_ranges && is_split_dwarf_) {
575	data += ranges_base_;
576	}
577	handler_->ProcessAttributeUnsigned(offset, attr, form, data);
578	}
579
580	// Called when we have an attribute with signed data to give to
581	// our handler. The attribute is for the DIE at OFFSET from the
582	// beginning of compilation unit, has a name of ATTR, a form of
583	// FORM, and the actual data of the attribute is in DATA.
584	void ProcessAttributeSigned(uint64_t offset,
585	enum DwarfAttribute attr,
586	enum DwarfForm form,
587	int64_t data) {
588	handler_->ProcessAttributeSigned(offset, attr, form, data);
589	}
590
591	// Called when we have an attribute with a buffer of data to give to
592	// our handler. The attribute is for the DIE at OFFSET from the
593	// beginning of compilation unit, has a name of ATTR, a form of
594	// FORM, and the actual data of the attribute is in DATA, and the
595	// length of the buffer is LENGTH.
596	void ProcessAttributeBuffer(uint64_t offset,
597	enum DwarfAttribute attr,
598	enum DwarfForm form,
599	const uint8_t* data,
600	uint64_t len) {
601	handler_->ProcessAttributeBuffer(offset, attr, form, data, len);
602	}
603
604	// Handles the common parts of DW_FORM_GNU_str_index, DW_FORM_strx,
605	// DW_FORM_strx1, DW_FORM_strx2, DW_FORM_strx3, and DW_FORM_strx4.
606	// Retrieves the data and calls through to ProcessAttributeString.
607	void ProcessFormStringIndex(uint64_t offset,
608	enum DwarfAttribute attr,
609	enum DwarfForm form,
610	uint64_t str_index);
611
612	// Called when we have an attribute with string data to give to
613	// our handler. The attribute is for the DIE at OFFSET from the
614	// beginning of compilation unit, has a name of ATTR, a form of
615	// FORM, and the actual data of the attribute is in DATA.
616	// If we see a DW_AT_GNU_dwo_name attribute, save the value so
617	// that we can find the debug info in a .dwo or .dwp file.
618	void ProcessAttributeString(uint64_t offset,
619	enum DwarfAttribute attr,
620	enum DwarfForm form,
621	const char* data) {
622	if (attr == DW_AT_GNU_dwo_name \|\| attr == DW_AT_dwo_name)
623	dwo_name_ = data;
624	handler_->ProcessAttributeString(offset, attr, form, data);
625	}
626
627	// Called to handle common portions of DW_FORM_addrx and variations, as well
628	// as DW_FORM_GNU_addr_index.
629	void ProcessAttributeAddrIndex(uint64_t offset,
630	enum DwarfAttribute attr,
631	enum DwarfForm form,
632	uint64_t addr_index) {
633	const uint8_t* addr_ptr =
634	addr_buffer_ + addr_base_ + addr_index * reader_->AddressSize();
635	ProcessAttributeUnsigned(
636	offset, attr, form, reader_->ReadAddress(addr_ptr));
637	}
638
639	// Processes all DIEs for this compilation unit
640	void ProcessDIEs();
641
642	// Skips the die with attributes specified in ABBREV starting at
643	// START, and return the new place to position the stream to.
644	const uint8_t* SkipDIE(const uint8_t* start, const Abbrev& abbrev);
645
646	// Skips the attribute starting at START, with FORM, and return the
647	// new place to position the stream to.
648	const uint8_t* SkipAttribute(const uint8_t* start, enum DwarfForm form);
649
650	// Process the actual debug information in a split DWARF file.
651	void ProcessSplitDwarf();
652
653	// Read the debug sections from a .dwo file.
654	void ReadDebugSectionsFromDwo(ElfReader* elf_reader,
655	SectionMap* sections);
656
657	// Path of the file containing the debug information.
658	const string path_;
659
660	// Offset from section start is the offset of this compilation unit
661	// from the beginning of the .debug_info section.
662	uint64_t offset_from_section_start_;
663
664	// buffer is the buffer for our CU, starting at .debug_info + offset
665	// passed in from constructor.
666	// after_header points to right after the compilation unit header.
667	const uint8_t* buffer_;
668	uint64_t buffer_length_;
669	const uint8_t* after_header_;
670
671	// The associated ByteReader that handles endianness issues for us
672	ByteReader* reader_;
673
674	// The map of sections in our file to buffers containing their data
675	const SectionMap& sections_;
676
677	// The associated handler to call processing functions in
678	Dwarf2Handler* handler_;
679
680	// Set of DWARF2/3 abbreviations for this compilation unit. Indexed
681	// by abbreviation number, which means that abbrevs_[0] is not
682	// valid.
683	std::vector<Abbrev>* abbrevs_;
684
685	// String section buffer and length, if we have a string section.
686	// This is here to avoid doing a section lookup for strings in
687	// ProcessAttribute, which is in the hot path for DWARF2 reading.
688	const uint8_t* string_buffer_;
689	uint64_t string_buffer_length_;
690
691	// Similarly for .debug_line_string.
692	const uint8_t* line_string_buffer_;
693	uint64_t line_string_buffer_length_;
694
695	// String offsets section buffer and length, if we have a string offsets
696	// section (.debug_str_offsets or .debug_str_offsets.dwo).
697	const uint8_t* str_offsets_buffer_;
698	uint64_t str_offsets_buffer_length_;
699
700	// Address section buffer and length, if we have an address section
701	// (.debug_addr).
702	const uint8_t* addr_buffer_;
703	uint64_t addr_buffer_length_;
704
705	// Flag indicating whether this compilation unit is part of a .dwo
706	// or .dwp file. If true, we are reading this unit because a
707	// skeleton compilation unit in an executable file had a
708	// DW_AT_GNU_dwo_name or DW_AT_GNU_dwo_id attribute.
709	// In a .dwo file, we expect the string offsets section to
710	// have a ".dwo" suffix, and we will use the ".debug_addr" section
711	// associated with the skeleton compilation unit.
712	bool is_split_dwarf_;
713
714	// Flag indicating if it's a Type Unit (only applicable to DWARF v5).
715	bool is_type_unit_;
716
717	// The value of the DW_AT_GNU_dwo_id attribute, if any.
718	uint64_t dwo_id_;
719
720	// The value of the DW_AT_GNU_type_signature attribute, if any.
721	uint64_t type_signature_;
722
723	// The value of the DW_AT_GNU_type_offset attribute, if any.
724	size_t type_offset_;
725
726	// The value of the DW_AT_GNU_dwo_name attribute, if any.
727	const char* dwo_name_;
728
729	// If this is a split DWARF CU, the value of the DW_AT_GNU_dwo_id attribute
730	// from the skeleton CU.
731	uint64_t skeleton_dwo_id_;
732
733	// The value of the DW_AT_GNU_ranges_base or DW_AT_rnglists_base attribute,
734	// if any.
735	uint64_t ranges_base_;
736
737	// The value of the DW_AT_GNU_addr_base attribute, if any.
738	uint64_t addr_base_;
739
740	// The value of DW_AT_str_offsets_base attribute, if any.
741	uint64_t str_offsets_base_;
742
743	// True if we have already looked for a .dwp file.
744	bool have_checked_for_dwp_;
745
746	// Path to the .dwp file.
747	string dwp_path_;
748
749	// ByteReader for the DWP file.
750	std::unique_ptr<ByteReader> dwp_byte_reader_;
751
752	// DWP reader.
753	std::unique_ptr<DwpReader> dwp_reader_;
754	};
755
756	// A Reader for a .dwp file. Supports the fetching of DWARF debug
757	// info for a given dwo_id.
758	//
759	// There are two versions of .dwp files. In both versions, the
760	// .dwp file is an ELF file containing only debug sections.
761	// In Version 1, the file contains many copies of each debug
762	// section, one for each .dwo file that is packaged in the .dwp
763	// file, and the .debug_cu_index section maps from the dwo_id
764	// to a set of section indexes. In Version 2, the file contains
765	// one of each debug section, and the .debug_cu_index section
766	// maps from the dwo_id to a set of offsets and lengths that
767	// identify each .dwo file's contribution to the larger sections.
768
769	class DwpReader {
770	public:
771	DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader);
772
773	~DwpReader();
774
775	// Read the CU index and initialize data members.
776	void Initialize();
777
778	// Read the debug sections for the given dwo_id.
779	void ReadDebugSectionsForCU(uint64_t dwo_id, SectionMap* sections);
780
781	private:
782	// Search a v1 hash table for "dwo_id". Returns the slot index
783	// where the dwo_id was found, or -1 if it was not found.
784	int LookupCU(uint64_t dwo_id);
785
786	// Search a v2 hash table for "dwo_id". Returns the row index
787	// in the offsets and sizes tables, or 0 if it was not found.
788	uint32_t LookupCUv2(uint64_t dwo_id);
789
790	// The ELF reader for the .dwp file.
791	ElfReader* elf_reader_;
792
793	// The ByteReader for the .dwp file.
794	const ByteReader& byte_reader_;
795
796	// Pointer to the .debug_cu_index section.
797	const char* cu_index_;
798
799	// Size of the .debug_cu_index section.
800	size_t cu_index_size_;
801
802	// Pointer to the .debug_str.dwo section.
803	const char* string_buffer_;
804
805	// Size of the .debug_str.dwo section.
806	size_t string_buffer_size_;
807
808	// Version of the .dwp file. We support versions 1 and 2 currently.
809	int version_;
810
811	// Number of columns in the section tables (version 2).
812	unsigned int ncolumns_;
813
814	// Number of units in the section tables (version 2).
815	unsigned int nunits_;
816
817	// Number of slots in the hash table.
818	unsigned int nslots_;
819
820	// Pointer to the beginning of the hash table.
821	const char* phash_;
822
823	// Pointer to the beginning of the index table.
824	const char* pindex_;
825
826	// Pointer to the beginning of the section index pool (version 1).
827	const char* shndx_pool_;
828
829	// Pointer to the beginning of the section offset table (version 2).
830	const char* offset_table_;
831
832	// Pointer to the beginning of the section size table (version 2).
833	const char* size_table_;
834
835	// Contents of the sections of interest (version 2).
836	const char* abbrev_data_;
837	size_t abbrev_size_;
838	const char* info_data_;
839	size_t info_size_;
840	const char* str_offsets_data_;
841	size_t str_offsets_size_;
842	};
843
844	// This class is a reader for DWARF's Call Frame Information. CFI
845	// describes how to unwind stack frames --- even for functions that do
846	// not follow fixed conventions for saving registers, whose frame size
847	// varies as they execute, etc.
848	//
849	// CFI describes, at each machine instruction, how to compute the
850	// stack frame's base address, how to find the return address, and
851	// where to find the saved values of the caller's registers (if the
852	// callee has stashed them somewhere to free up the registers for its
853	// own use).
854	//
855	// For example, suppose we have a function whose machine code looks
856	// like this (imagine an assembly language that looks like C, for a
857	// machine with 32-bit registers, and a stack that grows towards lower
858	// addresses):
859	//
860	// func: ; entry point; return address at sp
861	// func+0: sp = sp - 16 ; allocate space for stack frame
862	// func+1: sp[12] = r0 ; save r0 at sp+12
863	// ... ; other code, not frame-related
864	// func+10: sp -= 4; sp = x ; push some x on the stack*
865	// ... ; other code, not frame-related
866	// func+20: r0 = sp[16] ; restore saved r0
867	// func+21: sp += 20 ; pop whole stack frame
868	// func+22: pc = sp; sp += 4 ; pop return address and jump to it*
869	//
870	// DWARF CFI is (a very compressed representation of) a table with a
871	// row for each machine instruction address and a column for each
872	// register showing how to restore it, if possible.
873	//
874	// A special column named "CFA", for "Canonical Frame Address", tells how
875	// to compute the base address of the frame; registers' entries may
876	// refer to the CFA in describing where the registers are saved.
877	//
878	// Another special column, named "RA", represents the return address.
879	//
880	// For example, here is a complete (uncompressed) table describing the
881	// function above:
882	//
883	// insn cfa r0 r1 ... ra
884	// =======================================
885	// func+0: sp cfa[0]
886	// func+1: sp+16 cfa[0]
887	// func+2: sp+16 cfa[-4] cfa[0]
888	// func+11: sp+20 cfa[-4] cfa[0]
889	// func+21: sp+20 cfa[0]
890	// func+22: sp cfa[0]
891	//
892	// Some things to note here:
893	//
894	// - Each row describes the state of affairs before* executing the*
895	// instruction at the given address. Thus, the row for func+0
896	// describes the state before we allocate the stack frame. In the
897	// next row, the formula for computing the CFA has changed,
898	// reflecting that allocation.
899	//
900	// - The other entries are written in terms of the CFA; this allows
901	// them to remain unchanged as the stack pointer gets bumped around.
902	// For example, the rule for recovering the return address (the "ra"
903	// column) remains unchanged throughout the function, even as the
904	// stack pointer takes on three different offsets from the return
905	// address.
906	//
907	// - Although we haven't shown it, most calling conventions designate
908	// "callee-saves" and "caller-saves" registers. The callee must
909	// preserve the values of callee-saves registers; if it uses them,
910	// it must save their original values somewhere, and restore them
911	// before it returns. In contrast, the callee is free to trash
912	// caller-saves registers; if the callee uses these, it will
913	// probably not bother to save them anywhere, and the CFI will
914	// probably mark their values as "unrecoverable".
915	//
916	// (However, since the caller cannot assume the callee was going to
917	// save them, caller-saves registers are probably dead in the caller
918	// anyway, so compilers usually don't generate CFA for caller-saves
919	// registers.)
920	//
921	// - Exactly where the CFA points is a matter of convention that
922	// depends on the architecture and ABI in use. In the example, the
923	// CFA is the value the stack pointer had upon entry to the
924	// function, pointing at the saved return address. But on the x86,
925	// the call frame information generated by GCC follows the
926	// convention that the CFA is the address after* the saved return*
927	// address.
928	//
929	// But by definition, the CFA remains constant throughout the
930	// lifetime of the frame. This makes it a useful value for other
931	// columns to refer to. It is also gives debuggers a useful handle
932	// for identifying a frame.
933	//
934	// If you look at the table above, you'll notice that a given entry is
935	// often the same as the one immediately above it: most instructions
936	// change only one or two aspects of the stack frame, if they affect
937	// it at all. The DWARF format takes advantage of this fact, and
938	// reduces the size of the data by mentioning only the addresses and
939	// columns at which changes take place. So for the above, DWARF CFI
940	// data would only actually mention the following:
941	//
942	// insn cfa r0 r1 ... ra
943	// =======================================
944	// func+0: sp cfa[0]
945	// func+1: sp+16
946	// func+2: cfa[-4]
947	// func+11: sp+20
948	// func+21: r0
949	// func+22: sp
950	//
951	// In fact, this is the way the parser reports CFI to the consumer: as
952	// a series of statements of the form, "At address X, column Y changed
953	// to Z," and related conventions for describing the initial state.
954	//
955	// Naturally, it would be impractical to have to scan the entire
956	// program's CFI, noting changes as we go, just to recover the
957	// unwinding rules in effect at one particular instruction. To avoid
958	// this, CFI data is grouped into "entries", each of which covers a
959	// specified range of addresses and begins with a complete statement
960	// of the rules for all recoverable registers at that starting
961	// address. Each entry typically covers a single function.
962	//
963	// Thus, to compute the contents of a given row of the table --- that
964	// is, rules for recovering the CFA, RA, and registers at a given
965	// instruction --- the consumer should find the entry that covers that
966	// instruction's address, start with the initial state supplied at the
967	// beginning of the entry, and work forward until it has processed all
968	// the changes up to and including those for the present instruction.
969	//
970	// There are seven kinds of rules that can appear in an entry of the
971	// table:
972	//
973	// - "undefined": The given register is not preserved by the callee;
974	// its value cannot be recovered.
975	//
976	// - "same value": This register has the same value it did in the callee.
977	//
978	// - offset(N): The register is saved at offset N from the CFA.
979	//
980	// - val_offset(N): The value the register had in the caller is the
981	// CFA plus offset N. (This is usually only useful for describing
982	// the stack pointer.)
983	//
984	// - register(R): The register's value was saved in another register R.
985	//
986	// - expression(E): Evaluating the DWARF expression E using the
987	// current frame's registers' values yields the address at which the
988	// register was saved.
989	//
990	// - val_expression(E): Evaluating the DWARF expression E using the
991	// current frame's registers' values yields the value the register
992	// had in the caller.
993
994	class CallFrameInfo {
995	public:
996	// The different kinds of entries one finds in CFI. Used internally,
997	// and for error reporting.
998	enum EntryKind { kUnknown, kCIE, kFDE, kTerminator };
999
1000	// The handler class to which the parser hands the parsed call frame
1001	// information. Defined below.
1002	class Handler;
1003
1004	// A reporter class, which CallFrameInfo uses to report errors
1005	// encountered while parsing call frame information. Defined below.
1006	class Reporter;
1007
1008	// Create a DWARF CFI parser. BUFFER points to the contents of the
1009	// .debug_frame section to parse; BUFFER_LENGTH is its length in bytes.
1010	// REPORTER is an error reporter the parser should use to report
1011	// problems. READER is a ByteReader instance that has the endianness and
1012	// address size set properly. Report the data we find to HANDLER.
1013	//
1014	// This class can also parse Linux C++ exception handling data, as found
1015	// in '.eh_frame' sections. This data is a variant of DWARF CFI that is
1016	// placed in loadable segments so that it is present in the program's
1017	// address space, and is interpreted by the C++ runtime to search the
1018	// call stack for a handler interested in the exception being thrown,
1019	// actually pop the frames, and find cleanup code to run.
1020	//
1021	// There are two differences between the call frame information described
1022	// in the DWARF standard and the exception handling data Linux places in
1023	// the .eh_frame section:
1024	//
1025	// - Exception handling data uses uses a different format for call frame
1026	// information entry headers. The distinguished CIE id, the way FDEs
1027	// refer to their CIEs, and the way the end of the series of entries is
1028	// determined are all slightly different.
1029	//
1030	// If the constructor's EH_FRAME argument is true, then the
1031	// CallFrameInfo parses the entry headers as Linux C++ exception
1032	// handling data. If EH_FRAME is false or omitted, the CallFrameInfo
1033	// parses standard DWARF call frame information.
1034	//
1035	// - Linux C++ exception handling data uses CIE augmentation strings
1036	// beginning with 'z' to specify the presence of additional data after
1037	// the CIE and FDE headers and special encodings used for addresses in
1038	// frame description entries.
1039	//
1040	// CallFrameInfo can handle 'z' augmentations in either DWARF CFI or
1041	// exception handling data if you have supplied READER with the base
1042	// addresses needed to interpret the pointer encodings that 'z'
1043	// augmentations can specify. See the ByteReader interface for details
1044	// about the base addresses. See the CallFrameInfo::Handler interface
1045	// for details about the additional information one might find in
1046	// 'z'-augmented data.
1047	//
1048	// Thus:
1049	//
1050	// - If you are parsing standard DWARF CFI, as found in a .debug_frame
1051	// section, you should pass false for the EH_FRAME argument, or omit
1052	// it, and you need not worry about providing READER with the
1053	// additional base addresses.
1054	//
1055	// - If you want to parse Linux C++ exception handling data from a
1056	// .eh_frame section, you should pass EH_FRAME as true, and call
1057	// READER's SetBase member functions before calling our Start method.*
1058	//
1059	// - If you want to parse DWARF CFI that uses the 'z' augmentations
1060	// (although I don't think any toolchain ever emits such data), you
1061	// could pass false for EH_FRAME, but call READER's SetBase members.*
1062	//
1063	// The extensions the Linux C++ ABI makes to DWARF for exception
1064	// handling are described here, rather poorly:
1065	// http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
1066	// http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html
1067	//
1068	// The mechanics of C++ exception handling, personality routines,
1069	// and language-specific data areas are described here, rather nicely:
1070	// http://www.codesourcery.com/public/cxx-abi/abi-eh.html
1071	CallFrameInfo(const uint8_t* buffer, size_t buffer_length,
1072	ByteReader* reader, Handler* handler, Reporter* reporter,
1073	bool eh_frame = false)
1074	: buffer_(buffer), buffer_length_(buffer_length),
1075	reader_(reader), handler_(handler), reporter_(reporter),
1076	eh_frame_(eh_frame) { }
1077
1078	~CallFrameInfo() { }
1079
1080	// Parse the entries in BUFFER, reporting what we find to HANDLER.
1081	// Return true if we reach the end of the section successfully, or
1082	// false if we encounter an error.
1083	bool Start();
1084
1085	// Return the textual name of KIND. For error reporting.
1086	static const char* KindName(EntryKind kind);
1087
1088	private:
1089
1090	struct CIE;
1091
1092	// A CFI entry, either an FDE or a CIE.
1093	struct Entry {
1094	// The starting offset of the entry in the section, for error
1095	// reporting.
1096	size_t offset;
1097
1098	// The start of this entry in the buffer.
1099	const uint8_t* start;
1100
1101	// Which kind of entry this is.
1102	//
1103	// We want to be able to use this for error reporting even while we're
1104	// in the midst of parsing. Error reporting code may assume that kind,
1105	// offset, and start fields are valid, although kind may be kUnknown.
1106	EntryKind kind;
1107
1108	// The end of this entry's common prologue (initial length and id), and
1109	// the start of this entry's kind-specific fields.
1110	const uint8_t* fields;
1111
1112	// The start of this entry's instructions.
1113	const uint8_t* instructions;
1114
1115	// The address past the entry's last byte in the buffer. (Note that
1116	// since offset points to the entry's initial length field, and the
1117	// length field is the number of bytes after that field, this is not
1118	// simply buffer_ + offset + length.)
1119	const uint8_t* end;
1120
1121	// For both DWARF CFI and .eh_frame sections, this is the CIE id in a
1122	// CIE, and the offset of the associated CIE in an FDE.
1123	uint64_t id;
1124
1125	// The CIE that applies to this entry, if we've parsed it. If this is a
1126	// CIE, then this field points to this structure.
1127	CIE* cie;
1128	};
1129
1130	// A common information entry (CIE).
1131	struct CIE: public Entry {
1132	uint8_t version; // CFI data version number
1133	string augmentation; // vendor format extension markers
1134	uint64_t code_alignment_factor; // scale for code address adjustments
1135	int data_alignment_factor; // scale for stack pointer adjustments
1136	unsigned return_address_register; // which register holds the return addr
1137
1138	// True if this CIE includes Linux C++ ABI 'z' augmentation data.
1139	bool has_z_augmentation;
1140
1141	// Parsed 'z' augmentation data. These are meaningful only if
1142	// has_z_augmentation is true.
1143	bool has_z_lsda; // The 'z' augmentation included 'L'.
1144	bool has_z_personality; // The 'z' augmentation included 'P'.
1145	bool has_z_signal_frame; // The 'z' augmentation included 'S'.
1146
1147	// If has_z_lsda is true, this is the encoding to be used for language-
1148	// specific data area pointers in FDEs.
1149	DwarfPointerEncoding lsda_encoding;
1150
1151	// If has_z_personality is true, this is the encoding used for the
1152	// personality routine pointer in the augmentation data.
1153	DwarfPointerEncoding personality_encoding;
1154
1155	// If has_z_personality is true, this is the address of the personality
1156	// routine --- or, if personality_encoding & DW_EH_PE_indirect, the
1157	// address where the personality routine's address is stored.
1158	uint64_t personality_address;
1159
1160	// This is the encoding used for addresses in the FDE header and
1161	// in DW_CFA_set_loc instructions. This is always valid, whether
1162	// or not we saw a 'z' augmentation string; its default value is
1163	// DW_EH_PE_absptr, which is what normal DWARF CFI uses.
1164	DwarfPointerEncoding pointer_encoding;
1165
1166	// These were only introduced in DWARF4, so will not be set in older
1167	// versions.
1168	uint8_t address_size;
1169	uint8_t segment_size;
1170	};
1171
1172	// A frame description entry (FDE).
1173	struct FDE: public Entry {
1174	uint64_t address; // start address of described code
1175	uint64_t size; // size of described code, in bytes
1176
1177	// If cie->has_z_lsda is true, then this is the language-specific data
1178	// area's address --- or its address's address, if cie->lsda_encoding
1179	// has the DW_EH_PE_indirect bit set.
1180	uint64_t lsda_address;
1181	};
1182
1183	// Internal use.
1184	class Rule;
1185	class UndefinedRule;
1186	class SameValueRule;
1187	class OffsetRule;
1188	class ValOffsetRule;
1189	class RegisterRule;
1190	class ExpressionRule;
1191	class ValExpressionRule;
1192	class RuleMap;
1193	class State;
1194
1195	// Parse the initial length and id of a CFI entry, either a CIE, an FDE,
1196	// or a .eh_frame end-of-data mark. CURSOR points to the beginning of the
1197	// data to parse. On success, populate ENTRY as appropriate, and return
1198	// true. On failure, report the problem, and return false. Even if we
1199	// return false, set ENTRY->end to the first byte after the entry if we
1200	// were able to figure that out, or NULL if we weren't.
1201	bool ReadEntryPrologue(const uint8_t* cursor, Entry* entry);
1202
1203	// Parse the fields of a CIE after the entry prologue, including any 'z'
1204	// augmentation data. Assume that the 'Entry' fields of CIE are
1205	// populated; use CIE->fields and CIE->end as the start and limit for
1206	// parsing. On success, populate the rest of CIE, and return true; on*
1207	// failure, report the problem and return false.
1208	bool ReadCIEFields(CIE* cie);
1209
1210	// Parse the fields of an FDE after the entry prologue, including any 'z'
1211	// augmentation data. Assume that the 'Entry' fields of FDE are*
1212	// initialized; use FDE->fields and FDE->end as the start and limit for
1213	// parsing. Assume that FDE->cie is fully initialized. On success,
1214	// populate the rest of FDE, and return true; on failure, report the*
1215	// problem and return false.
1216	bool ReadFDEFields(FDE* fde);
1217
1218	// Report that ENTRY is incomplete, and return false. This is just a
1219	// trivial wrapper for invoking reporter_->Incomplete; it provides a
1220	// little brevity.
1221	bool ReportIncomplete(Entry* entry);
1222
1223	// Return true if ENCODING has the DW_EH_PE_indirect bit set.
1224	static bool IsIndirectEncoding(DwarfPointerEncoding encoding) {
1225	return encoding & DW_EH_PE_indirect;
1226	}
1227
1228	// The contents of the DWARF .debug_info section we're parsing.
1229	const uint8_t* buffer_;
1230	size_t buffer_length_;
1231
1232	// For reading multi-byte values with the appropriate endianness.
1233	ByteReader* reader_;
1234
1235	// The handler to which we should report the data we find.
1236	Handler* handler_;
1237
1238	// For reporting problems in the info we're parsing.
1239	Reporter* reporter_;
1240
1241	// True if we are processing .eh_frame-format data.
1242	bool eh_frame_;
1243	};
1244
1245	// The handler class for CallFrameInfo. The a CFI parser calls the
1246	// member functions of a handler object to report the data it finds.
1247	class CallFrameInfo::Handler {
1248	public:
1249	// The pseudo-register number for the canonical frame address.
1250	enum { kCFARegister = -`1` };
1251
1252	Handler() { }
1253	virtual ~Handler() { }
1254
1255	// The parser has found CFI for the machine code at ADDRESS,
1256	// extending for LENGTH bytes. OFFSET is the offset of the frame
1257	// description entry in the section, for use in error messages.
1258	// VERSION is the version number of the CFI format. AUGMENTATION is
1259	// a string describing any producer-specific extensions present in
1260	// the data. RETURN_ADDRESS is the number of the register that holds
1261	// the address to which the function should return.
1262	//
1263	// Entry should return true to process this CFI, or false to skip to
1264	// the next entry.
1265	//
1266	// The parser invokes Entry for each Frame Description Entry (FDE)
1267	// it finds. The parser doesn't report Common Information Entries
1268	// to the handler explicitly; instead, if the handler elects to
1269	// process a given FDE, the parser reiterates the appropriate CIE's
1270	// contents at the beginning of the FDE's rules.
1271	virtual bool Entry(size_t offset, uint64_t address, uint64_t length,
1272	uint8_t version, const string& augmentation,
1273	unsigned return_address) = `0`;
1274
1275	// When the Entry function returns true, the parser calls these
1276	// handler functions repeatedly to describe the rules for recovering
1277	// registers at each instruction in the given range of machine code.
1278	// Immediately after a call to Entry, the handler should assume that
1279	// the rule for each callee-saves register is "unchanged" --- that
1280	// is, that the register still has the value it had in the caller.
1281	//
1282	// If a Rule function returns true, we continue processing this entry's*
1283	// instructions. If a Rule function returns false, we stop evaluating*
1284	// instructions, and skip to the next entry. Either way, we call End
1285	// before going on to the next entry.
1286	//
1287	// In all of these functions, if the REG parameter is kCFARegister, then
1288	// the rule describes how to find the canonical frame address.
1289	// kCFARegister may be passed as a BASE_REGISTER argument, meaning that
1290	// the canonical frame address should be used as the base address for the
1291	// computation. All other REG values will be positive.
1292
1293	// At ADDRESS, register REG's value is not recoverable.
1294	virtual bool UndefinedRule(uint64_t address, int reg) = `0`;
1295
1296	// At ADDRESS, register REG's value is the same as that it had in
1297	// the caller.
1298	virtual bool SameValueRule(uint64_t address, int reg) = `0`;
1299
1300	// At ADDRESS, register REG has been saved at offset OFFSET from
1301	// BASE_REGISTER.
1302	virtual bool OffsetRule(uint64_t address, int reg,
1303	int base_register, long offset) = `0`;
1304
1305	// At ADDRESS, the caller's value of register REG is the current
1306	// value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an
1307	// address at which the register's value is saved.)
1308	virtual bool ValOffsetRule(uint64_t address, int reg,
1309	int base_register, long offset) = `0`;
1310
1311	// At ADDRESS, register REG has been saved in BASE_REGISTER. This differs
1312	// from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that
1313	// BASE_REGISTER is the "home" for REG's saved value: if you want to
1314	// assign to a variable whose home is REG in the calling frame, you
1315	// should put the value in BASE_REGISTER.
1316	virtual bool RegisterRule(uint64_t address, int reg, int base_register) = `0`;
1317
1318	// At ADDRESS, the DWARF expression EXPRESSION yields the address at
1319	// which REG was saved.
1320	virtual bool ExpressionRule(uint64_t address, int reg,
1321	const string& expression) = `0`;
1322
1323	// At ADDRESS, the DWARF expression EXPRESSION yields the caller's
1324	// value for REG. (This rule doesn't provide an address at which the
1325	// register's value is saved.)
1326	virtual bool ValExpressionRule(uint64_t address, int reg,
1327	const string& expression) = `0`;
1328
1329	// Indicate that the rules for the address range reported by the
1330	// last call to Entry are complete. End should return true if
1331	// everything is okay, or false if an error has occurred and parsing
1332	// should stop.
1333	virtual bool End() = `0`;
1334
1335	// Handler functions for Linux C++ exception handling data. These are
1336	// only called if the data includes 'z' augmentation strings.
1337
1338	// The Linux C++ ABI uses an extension of the DWARF CFI format to
1339	// walk the stack to propagate exceptions from the throw to the
1340	// appropriate catch, and do the appropriate cleanups along the way.
1341	// CFI entries used for exception handling have two additional data
1342	// associated with them:
1343	//
1344	// - The "language-specific data area" describes which exception
1345	// types the function has 'catch' clauses for, and indicates how
1346	// to go about re-entering the function at the appropriate catch
1347	// clause. If the exception is not caught, it describes the
1348	// destructors that must run before the frame is popped.
1349	//
1350	// - The "personality routine" is responsible for interpreting the
1351	// language-specific data area's contents, and deciding whether
1352	// the exception should continue to propagate down the stack,
1353	// perhaps after doing some cleanup for this frame, or whether the
1354	// exception will be caught here.
1355	//
1356	// In principle, the language-specific data area is opaque to
1357	// everybody but the personality routine. In practice, these values
1358	// may be useful or interesting to readers with extra context, and
1359	// we have to at least skip them anyway, so we might as well report
1360	// them to the handler.
1361
1362	// This entry's exception handling personality routine's address is
1363	// ADDRESS. If INDIRECT is true, then ADDRESS is the address at
1364	// which the routine's address is stored. The default definition for
1365	// this handler function simply returns true, allowing parsing of
1366	// the entry to continue.
1367	virtual bool PersonalityRoutine(uint64_t address, bool indirect) {
1368	return true;
1369	}
1370
1371	// This entry's language-specific data area (LSDA) is located at
1372	// ADDRESS. If INDIRECT is true, then ADDRESS is the address at
1373	// which the area's address is stored. The default definition for
1374	// this handler function simply returns true, allowing parsing of
1375	// the entry to continue.
1376	virtual bool LanguageSpecificDataArea(uint64_t address, bool indirect) {
1377	return true;
1378	}
1379
1380	// This entry describes a signal trampoline --- this frame is the
1381	// caller of a signal handler. The default definition for this
1382	// handler function simply returns true, allowing parsing of the
1383	// entry to continue.
1384	//
1385	// The best description of the rationale for and meaning of signal
1386	// trampoline CFI entries seems to be in the GCC bug database:
1387	// http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208
1388	virtual bool SignalHandler() { return true; }
1389	};
1390
1391	// The CallFrameInfo class makes calls on an instance of this class to
1392	// report errors or warn about problems in the data it is parsing. The
1393	// default definitions of these methods print a message to stderr, but
1394	// you can make a derived class that overrides them.
1395	class CallFrameInfo::Reporter {
1396	public:
1397	// Create an error reporter which attributes troubles to the section
1398	// named SECTION in FILENAME.
1399	//
1400	// Normally SECTION would be .debug_frame, but the Mac puts CFI data
1401	// in a Mach-O section named __debug_frame. If we support
1402	// Linux-style exception handling data, we could be reading an
1403	// .eh_frame section.
1404	Reporter(const string& filename,
1405	const string& section = ".debug_frame")
1406	: filename_(filename), section_(section) { }
1407	virtual ~Reporter() { }
1408
1409	// The CFI entry at OFFSET ends too early to be well-formed. KIND
1410	// indicates what kind of entry it is; KIND can be kUnknown if we
1411	// haven't parsed enough of the entry to tell yet.
1412	virtual void Incomplete(uint64_t offset, CallFrameInfo::EntryKind kind);
1413
1414	// The .eh_frame data has a four-byte zero at OFFSET where the next
1415	// entry's length would be; this is a terminator. However, the buffer
1416	// length as given to the CallFrameInfo constructor says there should be
1417	// more data.
1418	virtual void EarlyEHTerminator(uint64_t offset);
1419
1420	// The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the
1421	// section is not that large.
1422	virtual void CIEPointerOutOfRange(uint64_t offset, uint64_t cie_offset);
1423
1424	// The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry
1425	// there is not a CIE.
1426	virtual void BadCIEId(uint64_t offset, uint64_t cie_offset);
1427
1428	// The FDE at OFFSET refers to a CIE with an address size we don't know how
1429	// to handle.
1430	virtual void UnexpectedAddressSize(uint64_t offset, uint8_t address_size);
1431
1432	// The FDE at OFFSET refers to a CIE with an segment descriptor size we
1433	// don't know how to handle.
1434	virtual void UnexpectedSegmentSize(uint64_t offset, uint8_t segment_size);
1435
1436	// The FDE at OFFSET refers to a CIE with version number VERSION,
1437	// which we don't recognize. We cannot parse DWARF CFI if it uses
1438	// a version number we don't recognize.
1439	virtual void UnrecognizedVersion(uint64_t offset, int version);
1440
1441	// The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION,
1442	// which we don't recognize. We cannot parse DWARF CFI if it uses
1443	// augmentations we don't recognize.
1444	virtual void UnrecognizedAugmentation(uint64_t offset,
1445	const string& augmentation);
1446
1447	// The pointer encoding ENCODING, specified by the CIE at OFFSET, is not
1448	// a valid encoding.
1449	virtual void InvalidPointerEncoding(uint64_t offset, uint8_t encoding);
1450
1451	// The pointer encoding ENCODING, specified by the CIE at OFFSET, depends
1452	// on a base address which has not been supplied.
1453	virtual void UnusablePointerEncoding(uint64_t offset, uint8_t encoding);
1454
1455	// The CIE at OFFSET contains a DW_CFA_restore instruction at
1456	// INSN_OFFSET, which may not appear in a CIE.
1457	virtual void RestoreInCIE(uint64_t offset, uint64_t insn_offset);
1458
1459	// The entry at OFFSET, of kind KIND, has an unrecognized
1460	// instruction at INSN_OFFSET.
1461	virtual void BadInstruction(uint64_t offset, CallFrameInfo::EntryKind kind,
1462	uint64_t insn_offset);
1463
1464	// The instruction at INSN_OFFSET in the entry at OFFSET, of kind
1465	// KIND, establishes a rule that cites the CFA, but we have not
1466	// established a CFA rule yet.
1467	virtual void NoCFARule(uint64_t offset, CallFrameInfo::EntryKind kind,
1468	uint64_t insn_offset);
1469
1470	// The instruction at INSN_OFFSET in the entry at OFFSET, of kind
1471	// KIND, is a DW_CFA_restore_state instruction, but the stack of
1472	// saved states is empty.
1473	virtual void EmptyStateStack(uint64_t offset, CallFrameInfo::EntryKind kind,
1474	uint64_t insn_offset);
1475
1476	// The DW_CFA_remember_state instruction at INSN_OFFSET in the entry
1477	// at OFFSET, of kind KIND, would restore a state that has no CFA
1478	// rule, whereas the current state does have a CFA rule. This is
1479	// bogus input, which the CallFrameInfo::Handler interface doesn't
1480	// (and shouldn't) have any way to report.
1481	virtual void ClearingCFARule(uint64_t offset, CallFrameInfo::EntryKind kind,
1482	uint64_t insn_offset);
1483
1484	protected:
1485	// The name of the file whose CFI we're reading.
1486	string filename_;
1487
1488	// The name of the CFI section in that file.
1489	string section_;
1490	};
1491
1492	} // namespace google_breakpad
1493
1494	#endif // UTIL_DEBUGINFO_DWARF2READER_H__
1495

Browse the source code of breakpad/common/dwarf/dwarf2reader.h