dwarf2reader.cc source code [breakpad/common/dwarf/dwarf2reader.cc]

1	// Copyright (c) 2010 Google Inc. All Rights Reserved.
2	//
3	// Redistribution and use in source and binary forms, with or without
4	// modification, are permitted provided that the following conditions are
5	// met:
6	//
7	// Redistributions of source code must retain the above copyright*
8	// notice, this list of conditions and the following disclaimer.
9	// Redistributions in binary form must reproduce the above*
10	// copyright notice, this list of conditions and the following disclaimer
11	// in the documentation and/or other materials provided with the
12	// distribution.
13	// Neither the name of Google Inc. nor the names of its*
14	// contributors may be used to endorse or promote products derived from
15	// this software without specific prior written permission.
16	//
17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29	// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
30
31	// Implementation of LineInfo, CompilationUnit,
32	// and CallFrameInfo. See dwarf2reader.h for details.
33
34	#include "common/dwarf/dwarf2reader.h"
35
36	#include <stdint.h>
37	#include <stdio.h>
38	#include <string.h>
39
40	#include <map>
41	#include <memory>
42	#include <stack>
43	#include <string>
44	#include <utility>
45
46	#include <sys/stat.h>
47
48	#include "common/dwarf/bytereader-inl.h"
49	#include "common/dwarf/bytereader.h"
50	#include "common/dwarf/line_state_machine.h"
51	#include "common/using_std_string.h"
52	#include "google_breakpad/common/breakpad_types.h"
53
54	namespace google_breakpad {
55
56	const SectionMap::const_iterator GetSectionByName(const SectionMap&
57	sections, const char *name) {
58	assert(name[`0`] == `'.'`);
59	auto iter = sections.find(name);
60	if (iter != sections.end())
61	return iter;
62	std::string macho_name("__");
63	macho_name += name + `1`;
64	iter = sections.find(macho_name);
65	return iter;
66	}
67
68	CompilationUnit::CompilationUnit(const string& path,
69	const SectionMap& sections, uint64_t offset,
70	ByteReader* reader, Dwarf2Handler* handler)
71	: path_(path), offset_from_section_start_(offset), reader_(reader),
72	sections_(sections), handler_(handler), abbrevs_(),
73	string_buffer_(NULL), string_buffer_length_(`0`),
74	line_string_buffer_(NULL), line_string_buffer_length_(`0`),
75	str_offsets_buffer_(NULL), str_offsets_buffer_length_(`0`),
76	addr_buffer_(NULL), addr_buffer_length_(`0`),
77	is_split_dwarf_(false), is_type_unit_(false), dwo_id_(`0`), dwo_name_(),
78	skeleton_dwo_id_(`0`), ranges_base_(`0`), addr_base_(`0`),
79	str_offsets_base_(`0`), have_checked_for_dwp_(false), dwp_path_(),
80	dwp_byte_reader_(), dwp_reader_() {}
81
82	// Initialize a compilation unit from a .dwo or .dwp file.
83	// In this case, we need the .debug_addr section from the
84	// executable file that contains the corresponding skeleton
85	// compilation unit. We also inherit the Dwarf2Handler from
86	// the executable file, and call it as if we were still
87	// processing the original compilation unit.
88
89	void CompilationUnit::SetSplitDwarf(const uint8_t* addr_buffer,
90	uint64_t addr_buffer_length,
91	uint64_t addr_base,
92	uint64_t ranges_base,
93	uint64_t dwo_id) {
94	is_split_dwarf_ = true;
95	addr_buffer_ = addr_buffer;
96	addr_buffer_length_ = addr_buffer_length;
97	addr_base_ = addr_base;
98	ranges_base_ = ranges_base;
99	skeleton_dwo_id_ = dwo_id;
100	}
101
102	// Read a DWARF2/3 abbreviation section.
103	// Each abbrev consists of a abbreviation number, a tag, a byte
104	// specifying whether the tag has children, and a list of
105	// attribute/form pairs.
106	// The list of forms is terminated by a 0 for the attribute, and a
107	// zero for the form. The entire abbreviation section is terminated
108	// by a zero for the code.
109
110	void CompilationUnit::ReadAbbrevs() {
111	if (abbrevs_)
112	return;
113
114	// First get the debug_abbrev section.
115	SectionMap::const_iterator iter =
116	GetSectionByName(sections_, ".debug_abbrev");
117	assert(iter != sections_.end());
118
119	abbrevs_ = new std::vector<Abbrev>;
120	abbrevs_->resize(`1`);
121
122	// The only way to check whether we are reading over the end of the
123	// buffer would be to first compute the size of the leb128 data by
124	// reading it, then go back and read it again.
125	const uint8_t* abbrev_start = iter ->second.first +
126	header_.abbrev_offset;
127	const uint8_t* abbrevptr = abbrev_start;
128	#ifndef NDEBUG
129	const uint64_t abbrev_length = iter ->second.second - header_.abbrev_offset;
130	#endif
131
132	while (`1`) {
133	CompilationUnit::Abbrev abbrev;
134	size_t len;
135	const uint64_t number = reader_->ReadUnsignedLEB128(abbrevptr, &len);
136
137	if (number == `0`)
138	break;
139	abbrev.number = number;
140	abbrevptr += len;
141
142	assert(abbrevptr < abbrev_start + abbrev_length);
143	const uint64_t tag = reader_->ReadUnsignedLEB128(abbrevptr, &len);
144	abbrevptr += len;
145	abbrev.tag = static_cast<enum DwarfTag>(tag);
146
147	assert(abbrevptr < abbrev_start + abbrev_length);
148	abbrev.has_children = reader_->ReadOneByte(abbrevptr);
149	abbrevptr += `1`;
150
151	assert(abbrevptr < abbrev_start + abbrev_length);
152
153	while (`1`) {
154	const uint64_t nametemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
155	abbrevptr += len;
156
157	assert(abbrevptr < abbrev_start + abbrev_length);
158	const uint64_t formtemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
159	abbrevptr += len;
160	if (nametemp == `0` && formtemp == `0`)
161	break;
162
163	uint64_t value = `0`;
164	if (formtemp == DW_FORM_implicit_const) {
165	value = reader_->ReadUnsignedLEB128(abbrevptr, &len);
166	abbrevptr += len;
167	}
168	AttrForm abbrev_attr(static_cast<enum DwarfAttribute>(nametemp),
169	static_cast<enum DwarfForm>(formtemp),
170	value);
171	abbrev.attributes.push_back(abbrev_attr);
172	}
173	assert(abbrev.number == abbrevs_->size());
174	abbrevs_->push_back(abbrev);
175	}
176	}
177
178	// Skips a single DIE's attributes.
179	const uint8_t* CompilationUnit::SkipDIE(const uint8_t* start,
180	const Abbrev& abbrev) {
181	for (AttributeList::const_iterator i = abbrev.attributes.begin();
182	i != abbrev.attributes.end();
183	i ++) {
184	start = SkipAttribute(start, i ->form_);
185	}
186	return start;
187	}
188
189	// Skips a single attribute form's data.
190	const uint8_t* CompilationUnit::SkipAttribute(const uint8_t* start,
191	enum DwarfForm form) {
192	size_t len;
193
194	switch (form) {
195	case DW_FORM_indirect:
196	form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
197	&len));
198	start += len;
199	return SkipAttribute(start, form);
200
201	case DW_FORM_flag_present:
202	case DW_FORM_implicit_const:
203	return start;
204	case DW_FORM_addrx1:
205	case DW_FORM_data1:
206	case DW_FORM_flag:
207	case DW_FORM_ref1:
208	case DW_FORM_strx1:
209	return start + `1`;
210	case DW_FORM_addrx2:
211	case DW_FORM_ref2:
212	case DW_FORM_data2:
213	case DW_FORM_strx2:
214	return start + `2`;
215	case DW_FORM_addrx3:
216	case DW_FORM_strx3:
217	return start + `3`;
218	case DW_FORM_addrx4:
219	case DW_FORM_ref4:
220	case DW_FORM_data4:
221	case DW_FORM_strx4:
222	case DW_FORM_ref_sup4:
223	return start + `4`;
224	case DW_FORM_ref8:
225	case DW_FORM_data8:
226	case DW_FORM_ref_sig8:
227	case DW_FORM_ref_sup8:
228	return start + `8`;
229	case DW_FORM_data16:
230	return start + `16`;
231	case DW_FORM_string:
232	return start + strlen(reinterpret_cast<const char*>(start)) + `1`;
233	case DW_FORM_udata:
234	case DW_FORM_ref_udata:
235	case DW_FORM_strx:
236	case DW_FORM_GNU_str_index:
237	case DW_FORM_GNU_addr_index:
238	case DW_FORM_addrx:
239	case DW_FORM_rnglistx:
240	case DW_FORM_loclistx:
241	reader_->ReadUnsignedLEB128(start, &len);
242	return start + len;
243
244	case DW_FORM_sdata:
245	reader_->ReadSignedLEB128(start, &len);
246	return start + len;
247	case DW_FORM_addr:
248	return start + reader_->AddressSize();
249	case DW_FORM_ref_addr:
250	// DWARF2 and 3/4 differ on whether ref_addr is address size or
251	// offset size.
252	assert(header_.version >= `2`);
253	if (header_.version == `2`) {
254	return start + reader_->AddressSize();
255	} else if (header_.version >= `3`) {
256	return start + reader_->OffsetSize();
257	}
258	break;
259
260	case DW_FORM_block1:
261	return start + `1` + reader_->ReadOneByte(start);
262	case DW_FORM_block2:
263	return start + `2` + reader_->ReadTwoBytes(start);
264	case DW_FORM_block4:
265	return start + `4` + reader_->ReadFourBytes(start);
266	case DW_FORM_block:
267	case DW_FORM_exprloc: {
268	uint64_t size = reader_->ReadUnsignedLEB128(start, &len);
269	return start + size + len;
270	}
271	case DW_FORM_strp:
272	case DW_FORM_line_strp:
273	case DW_FORM_strp_sup:
274	case DW_FORM_sec_offset:
275	return start + reader_->OffsetSize();
276	}
277	fprintf(stderr,"Unhandled form type");
278	return NULL;
279	}
280
281	// Read the abbreviation offset from a compilation unit header.
282	size_t CompilationUnit::ReadAbbrevOffset(const uint8_t* headerptr) {
283	assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
284	header_.abbrev_offset = reader_->ReadOffset(headerptr);
285	return reader_->OffsetSize();
286	}
287
288	// Read the address size from a compilation unit header.
289	size_t CompilationUnit::ReadAddressSize(const uint8_t* headerptr) {
290	// Compare against less than or equal because this may be the last
291	// section in the file.
292	assert(headerptr + `1` <= buffer_ + buffer_length_);
293	header_.address_size = reader_->ReadOneByte(headerptr);
294	reader_->SetAddressSize(header_.address_size);
295	return `1`;
296	}
297
298	// Read the DWO id from a split or skeleton compilation unit header.
299	size_t CompilationUnit::ReadDwoId(const uint8_t* headerptr) {
300	assert(headerptr + `8` <= buffer_ + buffer_length_);
301	dwo_id_ = reader_->ReadEightBytes(headerptr);
302	return `8`;
303	}
304
305	// Read the type signature from a type or split type compilation unit header.
306	size_t CompilationUnit::ReadTypeSignature(const uint8_t* headerptr) {
307	assert(headerptr + `8` <= buffer_ + buffer_length_);
308	type_signature_ = reader_->ReadEightBytes(headerptr);
309	return `8`;
310	}
311
312	// Read the DWO id from a split or skeleton compilation unit header.
313	size_t CompilationUnit::ReadTypeOffset(const uint8_t* headerptr) {
314	assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
315	type_offset_ = reader_->ReadOffset(headerptr);
316	return reader_->OffsetSize();
317	}
318
319
320	// Read a DWARF header. The header is variable length in DWARF3 and DWARF4
321	// (and DWARF2 as extended by most compilers), and consists of an length
322	// field, a version number, the offset in the .debug_abbrev section for our
323	// abbrevs, and an address size. DWARF5 adds a unit_type to distinguish
324	// between partial-, full-, skeleton-, split-, and type- compilation units.
325	void CompilationUnit::ReadHeader() {
326	const uint8_t* headerptr = buffer_;
327	size_t initial_length_size;
328
329	assert(headerptr + `4` < buffer_ + buffer_length_);
330	const uint64_t initial_length
331	= reader_->ReadInitialLength(headerptr, &initial_length_size);
332	headerptr += initial_length_size;
333	header_.length = initial_length;
334
335	assert(headerptr + `2` < buffer_ + buffer_length_);
336	header_.version = reader_->ReadTwoBytes(headerptr);
337	headerptr += `2`;
338
339	if (header_.version <= `4`) {
340	// Older versions of dwarf have a relatively simple structure.
341	headerptr += ReadAbbrevOffset(headerptr);
342	headerptr += ReadAddressSize(headerptr);
343	} else {
344	// DWARF5 adds a unit_type field, and various fields based on unit_type.
345	assert(headerptr + `1` < buffer_ + buffer_length_);
346	uint8_t unit_type = reader_->ReadOneByte(headerptr);
347	headerptr += `1`;
348	headerptr += ReadAddressSize(headerptr);
349	headerptr += ReadAbbrevOffset(headerptr);
350	switch (unit_type) {
351	case DW_UT_compile:
352	case DW_UT_partial:
353	// nothing else to read
354	break;
355	case DW_UT_skeleton:
356	case DW_UT_split_compile:
357	headerptr += ReadDwoId(headerptr);
358	break;
359	case DW_UT_type:
360	case DW_UT_split_type:
361	is_type_unit_ = true;
362	headerptr += ReadTypeSignature(headerptr);
363	headerptr += ReadTypeOffset(headerptr);
364	break;
365	default:
366	fprintf(stderr, "Unhandled compilation unit type 0x%x", unit_type);
367	break;
368	}
369	}
370	after_header_ = headerptr;
371
372	// This check ensures that we don't have to do checking during the
373	// reading of DIEs. header_.length does not include the size of the
374	// initial length.
375	assert(buffer_ + initial_length_size + header_.length <=
376	buffer_ + buffer_length_);
377	}
378
379	uint64_t CompilationUnit::Start() {
380	// First get the debug_info section.
381	SectionMap::const_iterator iter =
382	GetSectionByName(sections_, ".debug_info");
383	assert(iter != sections_.end());
384
385	// Set up our buffer
386	buffer_ = iter ->second.first + offset_from_section_start_;
387	buffer_length_ = iter ->second.second - offset_from_section_start_;
388
389	// Read the header
390	ReadHeader();
391
392	// Figure out the real length from the end of the initial length to
393	// the end of the compilation unit, since that is the value we
394	// return.
395	uint64_t ourlength = header_.length;
396	if (reader_->OffsetSize() == `8`)
397	ourlength += `12`;
398	else
399	ourlength += `4`;
400
401	// See if the user wants this compilation unit, and if not, just return.
402	if (!handler_->StartCompilationUnit(offset_from_section_start_,
403	reader_->AddressSize(),
404	reader_->OffsetSize(),
405	header_.length,
406	header_.version))
407	return ourlength;
408	else if (header_.version == `5` && is_type_unit_)
409	return ourlength;
410
411	// Otherwise, continue by reading our abbreviation entries.
412	ReadAbbrevs();
413
414	// Set the string section if we have one.
415	iter = GetSectionByName(sections_, ".debug_str");
416	if (iter != sections_.end()) {
417	string_buffer_ = iter ->second.first;
418	string_buffer_length_ = iter ->second.second;
419	}
420
421	// Set the line string section if we have one.
422	iter = GetSectionByName(sections_, ".debug_line_str");
423	if (iter != sections_.end()) {
424	line_string_buffer_ = iter ->second.first;
425	line_string_buffer_length_ = iter ->second.second;
426	}
427
428	// Set the string offsets section if we have one.
429	iter = GetSectionByName(sections_, ".debug_str_offsets");
430	if (iter != sections_.end()) {
431	str_offsets_buffer_ = iter ->second.first;
432	str_offsets_buffer_length_ = iter ->second.second;
433	}
434
435	// Set the address section if we have one.
436	iter = GetSectionByName(sections_, ".debug_addr");
437	if (iter != sections_.end()) {
438	addr_buffer_ = iter ->second.first;
439	addr_buffer_length_ = iter ->second.second;
440	}
441
442	// Now that we have our abbreviations, start processing DIE's.
443	ProcessDIEs();
444
445	// If this is a skeleton compilation unit generated with split DWARF,
446	// and the client needs the full debug info, we need to find the full
447	// compilation unit in a .dwo or .dwp file.
448	if (!is_split_dwarf_
449	&& dwo_name_ != NULL
450	&& handler_->NeedSplitDebugInfo())
451	ProcessSplitDwarf();
452
453	return ourlength;
454	}
455
456	void CompilationUnit::ProcessFormStringIndex(
457	uint64_t dieoffset, enum DwarfAttribute attr, enum DwarfForm form,
458	uint64_t str_index) {
459	const size_t kStringOffsetsTableHeaderSize =
460	header_.version >= `5` ? (reader_->OffsetSize() == `8` ? `16` : `8`) : `0`;
461	const uint8_t* str_offsets_table_after_header = str_offsets_base_ ?
462	str_offsets_buffer_ + str_offsets_base_ :
463	str_offsets_buffer_ + kStringOffsetsTableHeaderSize;
464	const uint8_t* offset_ptr =
465	str_offsets_table_after_header + str_index * reader_->OffsetSize();
466
467	const uint64_t offset = reader_->ReadOffset(offset_ptr);
468	if (offset >= string_buffer_length_) {
469	return;
470	}
471
472	const char* str = reinterpret_cast<const char*>(string_buffer_) + offset;
473	ProcessAttributeString(dieoffset, attr, form, str);
474	}
475
476	// Special function for pre-processing the
477	// DW_AT_str_offsets_base and DW_AT_addr_base in a DW_TAG_compile_unit die (for
478	// DWARF v5). We must make sure to find and process the
479	// DW_AT_str_offsets_base and DW_AT_addr_base attributes before attempting to
480	// read any string and address attribute in the compile unit.
481	const uint8_t* CompilationUnit::ProcessOffsetBaseAttribute(
482	uint64_t dieoffset, const uint8_t* start, enum DwarfAttribute attr,
483	enum DwarfForm form, uint64_t implicit_const) {
484	size_t len;
485
486	switch (form) {
487	// DW_FORM_indirect is never used because it is such a space
488	// waster.
489	case DW_FORM_indirect:
490	form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
491	&len));
492	start += len;
493	return ProcessOffsetBaseAttribute(dieoffset, start, attr, form,
494	implicit_const);
495
496	case DW_FORM_flag_present:
497	return start;
498	case DW_FORM_data1:
499	case DW_FORM_flag:
500	return start + `1`;
501	case DW_FORM_data2:
502	return start + `2`;
503	case DW_FORM_data4:
504	return start + `4`;
505	case DW_FORM_data8:
506	return start + `8`;
507	case DW_FORM_data16:
508	// This form is designed for an md5 checksum inside line tables.
509	return start + `16`;
510	case DW_FORM_string: {
511	const char* str = reinterpret_cast<const char*>(start);
512	return start + strlen(str) + `1`;
513	}
514	case DW_FORM_udata:
515	reader_->ReadUnsignedLEB128(start, &len);
516	return start + len;
517	case DW_FORM_sdata:
518	reader_->ReadSignedLEB128(start, &len);
519	return start + len;
520	case DW_FORM_addr:
521	reader_->ReadAddress(start);
522	return start + reader_->AddressSize();
523
524	// This is the important one here!
525	case DW_FORM_sec_offset:
526	if (attr == DW_AT_str_offsets_base \|\|
527	attr == DW_AT_addr_base)
528	ProcessAttributeUnsigned(dieoffset, attr, form,
529	reader_->ReadOffset(start));
530	else
531	reader_->ReadOffset(start);
532	return start + reader_->OffsetSize();
533
534	case DW_FORM_ref1:
535	return start + `1`;
536	case DW_FORM_ref2:
537	return start + `2`;
538	case DW_FORM_ref4:
539	return start + `4`;
540	case DW_FORM_ref8:
541	return start + `8`;
542	case DW_FORM_ref_udata:
543	reader_->ReadUnsignedLEB128(start, &len);
544	return start + len;
545	case DW_FORM_ref_addr:
546	// DWARF2 and 3/4 differ on whether ref_addr is address size or
547	// offset size.
548	assert(header_.version >= `2`);
549	if (header_.version == `2`) {
550	reader_->ReadAddress(start);
551	return start + reader_->AddressSize();
552	} else if (header_.version >= `3`) {
553	reader_->ReadOffset(start);
554	return start + reader_->OffsetSize();
555	}
556	break;
557	case DW_FORM_ref_sig8:
558	return start + `8`;
559	case DW_FORM_implicit_const:
560	return start;
561	case DW_FORM_block1: {
562	uint64_t datalen = reader_->ReadOneByte(start);
563	return start + `1` + datalen;
564	}
565	case DW_FORM_block2: {
566	uint64_t datalen = reader_->ReadTwoBytes(start);
567	return start + `2` + datalen;
568	}
569	case DW_FORM_block4: {
570	uint64_t datalen = reader_->ReadFourBytes(start);
571	return start + `4` + datalen;
572	}
573	case DW_FORM_block:
574	case DW_FORM_exprloc: {
575	uint64_t datalen = reader_->ReadUnsignedLEB128(start, &len);
576	return start + datalen + len;
577	}
578	case DW_FORM_strp: {
579	reader_->ReadOffset(start);
580	return start + reader_->OffsetSize();
581	}
582	case DW_FORM_line_strp: {
583	reader_->ReadOffset(start);
584	return start + reader_->OffsetSize();
585	}
586	case DW_FORM_strp_sup:
587	return start + `4`;
588	case DW_FORM_ref_sup4:
589	return start + `4`;
590	case DW_FORM_ref_sup8:
591	return start + `8`;
592	case DW_FORM_loclistx:
593	reader_->ReadUnsignedLEB128(start, &len);
594	return start + len;
595	case DW_FORM_strx:
596	case DW_FORM_GNU_str_index: {
597	reader_->ReadUnsignedLEB128(start, &len);
598	return start + len;
599	}
600	case DW_FORM_strx1: {
601	return start + `1`;
602	}
603	case DW_FORM_strx2: {
604	return start + `2`;
605	}
606	case DW_FORM_strx3: {
607	return start + `3`;
608	}
609	case DW_FORM_strx4: {
610	return start + `4`;
611	}
612
613	case DW_FORM_addrx:
614	case DW_FORM_GNU_addr_index:
615	reader_->ReadUnsignedLEB128(start, &len);
616	return start + len;
617	case DW_FORM_addrx1:
618	return start + `1`;
619	case DW_FORM_addrx2:
620	return start + `2`;
621	case DW_FORM_addrx3:
622	return start + `3`;
623	case DW_FORM_addrx4:
624	return start + `4`;
625	case DW_FORM_rnglistx:
626	reader_->ReadUnsignedLEB128(start, &len);
627	return start + len;
628	}
629	fprintf(stderr, "Unhandled form type\n");
630	return NULL;
631	}
632
633	// If one really wanted, you could merge SkipAttribute and
634	// ProcessAttribute
635	// This is all boring data manipulation and calling of the handler.
636	const uint8_t* CompilationUnit::ProcessAttribute(
637	uint64_t dieoffset, const uint8_t* start, enum DwarfAttribute attr,
638	enum DwarfForm form, uint64_t implicit_const) {
639	size_t len;
640
641	switch (form) {
642	// DW_FORM_indirect is never used because it is such a space
643	// waster.
644	case DW_FORM_indirect:
645	form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
646	&len));
647	start += len;
648	return ProcessAttribute(dieoffset, start, attr, form, implicit_const);
649
650	case DW_FORM_flag_present:
651	ProcessAttributeUnsigned(dieoffset, attr, form, `1`);
652	return start;
653	case DW_FORM_data1:
654	case DW_FORM_flag:
655	ProcessAttributeUnsigned(dieoffset, attr, form,
656	reader_->ReadOneByte(start));
657	return start + `1`;
658	case DW_FORM_data2:
659	ProcessAttributeUnsigned(dieoffset, attr, form,
660	reader_->ReadTwoBytes(start));
661	return start + `2`;
662	case DW_FORM_data4:
663	ProcessAttributeUnsigned(dieoffset, attr, form,
664	reader_->ReadFourBytes(start));
665	return start + `4`;
666	case DW_FORM_data8:
667	ProcessAttributeUnsigned(dieoffset, attr, form,
668	reader_->ReadEightBytes(start));
669	return start + `8`;
670	case DW_FORM_data16:
671	// This form is designed for an md5 checksum inside line tables.
672	fprintf(stderr, "Unhandled form type: DW_FORM_data16\n");
673	return start + `16`;
674	case DW_FORM_string: {
675	const char* str = reinterpret_cast<const char*>(start);
676	ProcessAttributeString(dieoffset, attr, form, str);
677	return start + strlen(str) + `1`;
678	}
679	case DW_FORM_udata:
680	ProcessAttributeUnsigned(dieoffset, attr, form,
681	reader_->ReadUnsignedLEB128(start, &len));
682	return start + len;
683
684	case DW_FORM_sdata:
685	ProcessAttributeSigned(dieoffset, attr, form,
686	reader_->ReadSignedLEB128(start, &len));
687	return start + len;
688	case DW_FORM_addr:
689	ProcessAttributeUnsigned(dieoffset, attr, form,
690	reader_->ReadAddress(start));
691	return start + reader_->AddressSize();
692	case DW_FORM_sec_offset:
693	ProcessAttributeUnsigned(dieoffset, attr, form,
694	reader_->ReadOffset(start));
695	return start + reader_->OffsetSize();
696
697	case DW_FORM_ref1:
698	handler_->ProcessAttributeReference(dieoffset, attr, form,
699	reader_->ReadOneByte(start)
700	+ offset_from_section_start_);
701	return start + `1`;
702	case DW_FORM_ref2:
703	handler_->ProcessAttributeReference(dieoffset, attr, form,
704	reader_->ReadTwoBytes(start)
705	+ offset_from_section_start_);
706	return start + `2`;
707	case DW_FORM_ref4:
708	handler_->ProcessAttributeReference(dieoffset, attr, form,
709	reader_->ReadFourBytes(start)
710	+ offset_from_section_start_);
711	return start + `4`;
712	case DW_FORM_ref8:
713	handler_->ProcessAttributeReference(dieoffset, attr, form,
714	reader_->ReadEightBytes(start)
715	+ offset_from_section_start_);
716	return start + `8`;
717	case DW_FORM_ref_udata:
718	handler_->ProcessAttributeReference(dieoffset, attr, form,
719	reader_->ReadUnsignedLEB128(start,
720	&len)
721	+ offset_from_section_start_);
722	return start + len;
723	case DW_FORM_ref_addr:
724	// DWARF2 and 3/4 differ on whether ref_addr is address size or
725	// offset size.
726	assert(header_.version >= `2`);
727	if (header_.version == `2`) {
728	handler_->ProcessAttributeReference(dieoffset, attr, form,
729	reader_->ReadAddress(start));
730	return start + reader_->AddressSize();
731	} else if (header_.version >= `3`) {
732	handler_->ProcessAttributeReference(dieoffset, attr, form,
733	reader_->ReadOffset(start));
734	return start + reader_->OffsetSize();
735	}
736	break;
737	case DW_FORM_ref_sig8:
738	handler_->ProcessAttributeSignature(dieoffset, attr, form,
739	reader_->ReadEightBytes(start));
740	return start + `8`;
741	case DW_FORM_implicit_const:
742	handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
743	implicit_const);
744	return start;
745	case DW_FORM_block1: {
746	uint64_t datalen = reader_->ReadOneByte(start);
747	handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + `1`,
748	datalen);
749	return start + `1` + datalen;
750	}
751	case DW_FORM_block2: {
752	uint64_t datalen = reader_->ReadTwoBytes(start);
753	handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + `2`,
754	datalen);
755	return start + `2` + datalen;
756	}
757	case DW_FORM_block4: {
758	uint64_t datalen = reader_->ReadFourBytes(start);
759	handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + `4`,
760	datalen);
761	return start + `4` + datalen;
762	}
763	case DW_FORM_block:
764	case DW_FORM_exprloc: {
765	uint64_t datalen = reader_->ReadUnsignedLEB128(start, &len);
766	handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + len,
767	datalen);
768	return start + datalen + len;
769	}
770	case DW_FORM_strp: {
771	assert(string_buffer_ != NULL);
772
773	const uint64_t offset = reader_->ReadOffset(start);
774	assert(string_buffer_ + offset < string_buffer_ + string_buffer_length_);
775
776	const char* str = reinterpret_cast<const char*>(string_buffer_ + offset);
777	ProcessAttributeString(dieoffset, attr, form, str);
778	return start + reader_->OffsetSize();
779	}
780	case DW_FORM_line_strp: {
781	assert(line_string_buffer_ != NULL);
782
783	const uint64_t offset = reader_->ReadOffset(start);
784	assert(line_string_buffer_ + offset <
785	line_string_buffer_ + line_string_buffer_length_);
786
787	const char* str =
788	reinterpret_cast<const char*>(line_string_buffer_ + offset);
789	ProcessAttributeString(dieoffset, attr, form, str);
790	return start + reader_->OffsetSize();
791	}
792	case DW_FORM_strp_sup:
793	// No support currently for suplementary object files.
794	fprintf(stderr, "Unhandled form type: DW_FORM_strp_sup\n");
795	return start + `4`;
796	case DW_FORM_ref_sup4:
797	// No support currently for suplementary object files.
798	fprintf(stderr, "Unhandled form type: DW_FORM_ref_sup4\n");
799	return start + `4`;
800	case DW_FORM_ref_sup8:
801	// No support currently for suplementary object files.
802	fprintf(stderr, "Unhandled form type: DW_FORM_ref_sup8\n");
803	return start + `8`;
804	case DW_FORM_loclistx:
805	ProcessAttributeUnsigned(dieoffset, attr, form,
806	reader_->ReadUnsignedLEB128(start, &len));
807	return start + len;
808	case DW_FORM_strx:
809	case DW_FORM_GNU_str_index: {
810	uint64_t str_index = reader_->ReadUnsignedLEB128(start, &len);
811	ProcessFormStringIndex(dieoffset, attr, form, str_index);
812	return start + len;
813	}
814	case DW_FORM_strx1: {
815	uint64_t str_index = reader_->ReadOneByte(start);
816	ProcessFormStringIndex(dieoffset, attr, form, str_index);
817	return start + `1`;
818	}
819	case DW_FORM_strx2: {
820	uint64_t str_index = reader_->ReadTwoBytes(start);
821	ProcessFormStringIndex(dieoffset, attr, form, str_index);
822	return start + `2`;
823	}
824	case DW_FORM_strx3: {
825	uint64_t str_index = reader_->ReadThreeBytes(start);
826	ProcessFormStringIndex(dieoffset, attr, form, str_index);
827	return start + `3`;
828	}
829	case DW_FORM_strx4: {
830	uint64_t str_index = reader_->ReadFourBytes(start);
831	ProcessFormStringIndex(dieoffset, attr, form, str_index);
832	return start + `4`;
833	}
834
835	case DW_FORM_addrx:
836	case DW_FORM_GNU_addr_index:
837	ProcessAttributeAddrIndex(
838	dieoffset, attr, form, reader_->ReadUnsignedLEB128(start, &len));
839	return start + len;
840	case DW_FORM_addrx1:
841	ProcessAttributeAddrIndex(
842	dieoffset, attr, form, reader_->ReadOneByte(start));
843	return start + `1`;
844	case DW_FORM_addrx2:
845	ProcessAttributeAddrIndex(
846	dieoffset, attr, form, reader_->ReadTwoBytes(start));
847	return start + `2`;
848	case DW_FORM_addrx3:
849	ProcessAttributeAddrIndex(
850	dieoffset, attr, form, reader_->ReadThreeBytes(start));
851	return start + `3`;
852	case DW_FORM_addrx4:
853	ProcessAttributeAddrIndex(
854	dieoffset, attr, form, reader_->ReadFourBytes(start));
855	return start + `4`;
856	case DW_FORM_rnglistx:
857	ProcessAttributeUnsigned(
858	dieoffset, attr, form, reader_->ReadUnsignedLEB128(start, &len));
859	return start + len;
860	}
861	fprintf(stderr, "Unhandled form type\n");
862	return NULL;
863	}
864
865	const uint8_t* CompilationUnit::ProcessDIE(uint64_t dieoffset,
866	const uint8_t* start,
867	const Abbrev& abbrev) {
868	// With DWARF v5, the compile_unit die may contain a
869	// DW_AT_str_offsets_base or DW_AT_addr_base. If it does, that attribute must
870	// be found and processed before trying to process the other attributes;
871	// otherwise the string or address values will all come out incorrect.
872	if (abbrev.tag == DW_TAG_compile_unit && header_.version == `5`) {
873	uint64_t dieoffset_copy = dieoffset;
874	const uint8_t* start_copy = start;
875	for (AttributeList::const_iterator i = abbrev.attributes.begin();
876	i != abbrev.attributes.end();
877	i ++) {
878	start_copy = ProcessOffsetBaseAttribute(dieoffset_copy, start_copy,
879	i ->attr_, i ->form_,
880	i ->value_);
881	}
882	}
883
884	for (AttributeList::const_iterator i = abbrev.attributes.begin();
885	i != abbrev.attributes.end();
886	i ++) {
887	start = ProcessAttribute(dieoffset, start, i ->attr_, i ->form_, i ->value_);
888	}
889
890	// If this is a compilation unit in a split DWARF object, verify that
891	// the dwo_id matches. If it does not match, we will ignore this
892	// compilation unit.
893	if (abbrev.tag == DW_TAG_compile_unit
894	&& is_split_dwarf_
895	&& dwo_id_ != skeleton_dwo_id_) {
896	return NULL;
897	}
898
899	return start;
900	}
901
902	void CompilationUnit::ProcessDIEs() {
903	const uint8_t* dieptr = after_header_;
904	size_t len;
905
906	// lengthstart is the place the length field is based on.
907	// It is the point in the header after the initial length field
908	const uint8_t* lengthstart = buffer_;
909
910	// In 64 bit dwarf, the initial length is 12 bytes, because of the
911	// 0xffffffff at the start.
912	if (reader_->OffsetSize() == `8`)
913	lengthstart += `12`;
914	else
915	lengthstart += `4`;
916
917	std::stack<uint64_t> die_stack;
918
919	while (dieptr < (lengthstart + header_.length)) {
920	// We give the user the absolute offset from the beginning of
921	// debug_info, since they need it to deal with ref_addr forms.
922	uint64_t absolute_offset = (dieptr - buffer_) + offset_from_section_start_;
923
924	uint64_t abbrev_num = reader_->ReadUnsignedLEB128(dieptr, &len);
925
926	dieptr += len;
927
928	// Abbrev == 0 represents the end of a list of children, or padding
929	// at the end of the compilation unit.
930	if (abbrev_num == `0`) {
931	if (die_stack.size() == `0`)
932	// If it is padding, then we are done with the compilation unit's DIEs.
933	return;
934	const uint64_t offset = die_stack.top();
935	die_stack.pop();
936	handler_->EndDIE(offset);
937	continue;
938	}
939
940	const Abbrev& abbrev = abbrevs_->at(static_cast<size_t>(abbrev_num));
941	const enum DwarfTag tag = abbrev.tag;
942	if (!handler_->StartDIE(absolute_offset, tag)) {
943	dieptr = SkipDIE(dieptr, abbrev);
944	} else {
945	dieptr = ProcessDIE(absolute_offset, dieptr, abbrev);
946	}
947
948	if (abbrev.has_children) {
949	die_stack.push(absolute_offset);
950	} else {
951	handler_->EndDIE(absolute_offset);
952	}
953	}
954	}
955
956	// Check for a valid ELF file and return the Address size.
957	// Returns 0 if not a valid ELF file.
958	inline int GetElfWidth(const ElfReader& elf) {
959	if (elf.IsElf32File())
960	return `4`;
961	if (elf.IsElf64File())
962	return `8`;
963	return `0`;
964	}
965
966	void CompilationUnit::ProcessSplitDwarf() {
967	struct stat statbuf;
968	if (!have_checked_for_dwp_) {
969	// Look for a .dwp file in the same directory as the executable.
970	have_checked_for_dwp_ = true;
971	string dwp_suffix(".dwp");
972	dwp_path_ = path_ + dwp_suffix;
973	if (stat(dwp_path_.c_str(), &statbuf) != `0`) {
974	// Fall back to a split .debug file in the same directory.
975	string debug_suffix(".debug");
976	dwp_path_ = path_;
977	size_t found = path_.rfind(debug_suffix);
978	if (found + debug_suffix.length() == path_.length())
979	dwp_path_ = dwp_path_.replace(found, debug_suffix.length(), dwp_suffix);
980	}
981	if (stat(dwp_path_.c_str(), &statbuf) == `0`) {
982	ElfReader* elf = new ElfReader (dwp_path_);
983	int width = GetElfWidth(*elf);
984	if (width != `0`) {
985	dwp_byte_reader_.reset(new ByteReader (reader_->GetEndianness()));
986	dwp_byte_reader_->SetAddressSize(width);
987	dwp_reader_.reset(new DwpReader (*dwp_byte_reader_, elf));
988	dwp_reader_->Initialize();
989	} else {
990	delete elf;
991	}
992	}
993	}
994	bool found_in_dwp = false;
995	if (dwp_reader_) {
996	// If we have a .dwp file, read the debug sections for the requested CU.
997	SectionMap sections;
998	dwp_reader_->ReadDebugSectionsForCU(dwo_id_, &sections);
999	if (!sections.empty()) {
1000	found_in_dwp = true;
1001	CompilationUnit dwp_comp_unit(dwp_path_, sections, `0`,
1002	dwp_byte_reader_.get(), handler_);
1003	dwp_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_, addr_base_,
1004	ranges_base_, dwo_id_);
1005	dwp_comp_unit.Start();
1006	}
1007	}
1008	if (!found_in_dwp) {
1009	// If no .dwp file, try to open the .dwo file.
1010	if (stat(dwo_name_, &statbuf) == `0`) {
1011	ElfReader elf(dwo_name_);
1012	int width = GetElfWidth(elf);
1013	if (width != `0`) {
1014	ByteReader reader(ENDIANNESS_LITTLE);
1015	reader.SetAddressSize(width);
1016	SectionMap sections;
1017	ReadDebugSectionsFromDwo(&elf, &sections);
1018	CompilationUnit dwo_comp_unit(dwo_name_, sections, `0`, &reader,
1019	handler_);
1020	dwo_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_,
1021	addr_base_, ranges_base_, dwo_id_);
1022	dwo_comp_unit.Start();
1023	}
1024	}
1025	}
1026	}
1027
1028	void CompilationUnit::ReadDebugSectionsFromDwo(ElfReader* elf_reader,
1029	SectionMap* sections) {
1030	static const char* const section_names[] = {
1031	".debug_abbrev",
1032	".debug_info",
1033	".debug_str_offsets",
1034	".debug_str"
1035	};
1036	for (unsigned int i = `0u`;
1037	i < sizeof(section_names)/sizeof(*(section_names)); ++i) {
1038	string base_name = section_names[i];
1039	string dwo_name = base_name + ".dwo";
1040	size_t section_size;
1041	const char* section_data = elf_reader->GetSectionByName(dwo_name,
1042	&section_size);
1043	if (section_data != NULL)
1044	sections->insert(std::make_pair(
1045	base_name, std::make_pair(
1046	reinterpret_cast<const uint8_t*>(section_data),
1047	section_size)));
1048	}
1049	}
1050
1051	DwpReader::DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader)
1052	: elf_reader_(elf_reader), byte_reader_(byte_reader),
1053	cu_index_(NULL), cu_index_size_(`0`), string_buffer_(NULL),
1054	string_buffer_size_(`0`), version_(`0`), ncolumns_(`0`), nunits_(`0`),
1055	nslots_(`0`), phash_(NULL), pindex_(NULL), shndx_pool_(NULL),
1056	offset_table_(NULL), size_table_(NULL), abbrev_data_(NULL),
1057	abbrev_size_(`0`), info_data_(NULL), info_size_(`0`),
1058	str_offsets_data_(NULL), str_offsets_size_(`0`) {}
1059
1060	DwpReader::~DwpReader() {
1061	if (elf_reader_) delete elf_reader_;
1062	}
1063
1064	void DwpReader::Initialize() {
1065	cu_index_ = elf_reader_->GetSectionByName(".debug_cu_index",
1066	&cu_index_size_);
1067	if (cu_index_ == NULL) {
1068	return;
1069	}
1070	// The .debug_str.dwo section is shared by all CUs in the file.
1071	string_buffer_ = elf_reader_->GetSectionByName(".debug_str.dwo",
1072	&string_buffer_size_);
1073
1074	version_ = byte_reader_.ReadFourBytes(
1075	reinterpret_cast<const uint8_t*>(cu_index_));
1076
1077	if (version_ == `1`) {
1078	nslots_ = byte_reader_.ReadFourBytes(
1079	reinterpret_cast<const uint8_t*>(cu_index_)
1080	+ `3` * sizeof(uint32_t));
1081	phash_ = cu_index_ + `4` * sizeof(uint32_t);
1082	pindex_ = phash_ + nslots_ * sizeof(uint64_t);
1083	shndx_pool_ = pindex_ + nslots_ * sizeof(uint32_t);
1084	if (shndx_pool_ >= cu_index_ + cu_index_size_) {
1085	version_ = `0`;
1086	}
1087	} else if (version_ == `2` \|\| version_ == `5`) {
1088	ncolumns_ = byte_reader_.ReadFourBytes(
1089	reinterpret_cast<const uint8_t>(cu_index_) + sizeof*(uint32_t));
1090	nunits_ = byte_reader_.ReadFourBytes(
1091	reinterpret_cast<const uint8_t>(cu_index_) + `2` sizeof(uint32_t));
1092	nslots_ = byte_reader_.ReadFourBytes(
1093	reinterpret_cast<const uint8_t>(cu_index_) + `3` sizeof(uint32_t));
1094	phash_ = cu_index_ + `4` * sizeof(uint32_t);
1095	pindex_ = phash_ + nslots_ * sizeof(uint64_t);
1096	offset_table_ = pindex_ + nslots_ * sizeof(uint32_t);
1097	size_table_ = offset_table_ + ncolumns_ * (nunits_ + `1`) * sizeof(uint32_t);
1098	abbrev_data_ = elf_reader_->GetSectionByName(".debug_abbrev.dwo",
1099	&abbrev_size_);
1100	info_data_ = elf_reader_->GetSectionByName(".debug_info.dwo", &info_size_);
1101	str_offsets_data_ = elf_reader_->GetSectionByName(".debug_str_offsets.dwo",
1102	&str_offsets_size_);
1103	if (size_table_ >= cu_index_ + cu_index_size_) {
1104	version_ = `0`;
1105	}
1106	}
1107	}
1108
1109	void DwpReader::ReadDebugSectionsForCU(uint64_t dwo_id,
1110	SectionMap* sections) {
1111	if (version_ == `1`) {
1112	int slot = LookupCU(dwo_id);
1113	if (slot == -`1`) {
1114	return;
1115	}
1116
1117	// The index table points to the section index pool, where we
1118	// can read a list of section indexes for the debug sections
1119	// for the CU whose dwo_id we are looking for.
1120	int index = byte_reader_.ReadFourBytes(
1121	reinterpret_cast<const uint8_t*>(pindex_)
1122	+ slot * sizeof(uint32_t));
1123	const char* shndx_list = shndx_pool_ + index * sizeof(uint32_t);
1124	for (;;) {
1125	if (shndx_list >= cu_index_ + cu_index_size_) {
1126	version_ = `0`;
1127	return;
1128	}
1129	unsigned int shndx = byte_reader_.ReadFourBytes(
1130	reinterpret_cast<const uint8_t*>(shndx_list));
1131	shndx_list += sizeof(uint32_t);
1132	if (shndx == `0`)
1133	break;
1134	const char* section_name = elf_reader_->GetSectionName(shndx);
1135	size_t section_size;
1136	const char* section_data;
1137	// We're only interested in these four debug sections.
1138	// The section names in the .dwo file end with ".dwo", but we
1139	// add them to the sections table with their normal names.
1140	if (!strncmp(section_name, ".debug_abbrev", strlen(".debug_abbrev"))) {
1141	section_data = elf_reader_->GetSectionByIndex(shndx, &section_size);
1142	sections->insert(std::make_pair(
1143	".debug_abbrev",
1144	std::make_pair(reinterpret_cast<const uint8_t*> (section_data),
1145	section_size)));
1146	} else if (!strncmp(section_name, ".debug_info", strlen(".debug_info"))) {
1147	section_data = elf_reader_->GetSectionByIndex(shndx, &section_size);
1148	sections->insert(std::make_pair(
1149	".debug_info",
1150	std::make_pair(reinterpret_cast<const uint8_t*> (section_data),
1151	section_size)));
1152	} else if (!strncmp(section_name, ".debug_str_offsets",
1153	strlen(".debug_str_offsets"))) {
1154	section_data = elf_reader_->GetSectionByIndex(shndx, &section_size);
1155	sections->insert(std::make_pair(
1156	".debug_str_offsets",
1157	std::make_pair(reinterpret_cast<const uint8_t*> (section_data),
1158	section_size)));
1159	}
1160	}
1161	sections->insert(std::make_pair(
1162	".debug_str",
1163	std::make_pair(reinterpret_cast<const uint8_t*> (string_buffer_),
1164	string_buffer_size_)));
1165	} else if (version_ == `2` \|\| version_ == `5`) {
1166	uint32_t index = LookupCUv2(dwo_id);
1167	if (index == `0`) {
1168	return;
1169	}
1170
1171	// The index points to a row in each of the section offsets table
1172	// and the section size table, where we can read the offsets and sizes
1173	// of the contributions to each debug section from the CU whose dwo_id
1174	// we are looking for. Row 0 of the section offsets table has the
1175	// section ids for each column of the table. The size table begins
1176	// with row 1.
1177	const char* id_row = offset_table_;
1178	const char* offset_row = offset_table_
1179	+ index * ncolumns_ * sizeof(uint32_t);
1180	const char* size_row =
1181	size_table_ + (index - `1`) * ncolumns_ * sizeof(uint32_t);
1182	if (size_row + ncolumns_ * sizeof(uint32_t) > cu_index_ + cu_index_size_) {
1183	version_ = `0`;
1184	return;
1185	}
1186	for (unsigned int col = `0u`; col < ncolumns_; ++col) {
1187	uint32_t section_id =
1188	byte_reader_.ReadFourBytes(reinterpret_cast<const uint8_t*>(id_row)
1189	+ col * sizeof(uint32_t));
1190	uint32_t offset = byte_reader_.ReadFourBytes(
1191	reinterpret_cast<const uint8_t*>(offset_row)
1192	+ col * sizeof(uint32_t));
1193	uint32_t size = byte_reader_.ReadFourBytes(
1194	reinterpret_cast<const uint8_t>(size_row) + col sizeof(uint32_t));
1195	if (section_id == DW_SECT_ABBREV) {
1196	sections->insert(std::make_pair(
1197	".debug_abbrev",
1198	std::make_pair(reinterpret_cast<const uint8_t*> (abbrev_data_)
1199	+ offset, size)));
1200	} else if (section_id == DW_SECT_INFO) {
1201	sections->insert(std::make_pair(
1202	".debug_info",
1203	std::make_pair(reinterpret_cast<const uint8_t*> (info_data_)
1204	+ offset, size)));
1205	} else if (section_id == DW_SECT_STR_OFFSETS) {
1206	sections->insert(std::make_pair(
1207	".debug_str_offsets",
1208	std::make_pair(reinterpret_cast<const uint8_t*> (str_offsets_data_)
1209	+ offset, size)));
1210	}
1211	}
1212	sections->insert(std::make_pair(
1213	".debug_str",
1214	std::make_pair(reinterpret_cast<const uint8_t*> (string_buffer_),
1215	string_buffer_size_)));
1216	}
1217	}
1218
1219	int DwpReader::LookupCU(uint64_t dwo_id) {
1220	uint32_t slot = static_cast<uint32_t>(dwo_id) & (nslots_ - `1`);
1221	uint64_t probe = byte_reader_.ReadEightBytes(
1222	reinterpret_cast<const uint8_t>(phash_) + slot sizeof(uint64_t));
1223	if (probe != `0` && probe != dwo_id) {
1224	uint32_t secondary_hash =
1225	(static_cast<uint32_t>(dwo_id >> `32`) & (nslots_ - `1`)) \| `1`;
1226	do {
1227	slot = (slot + secondary_hash) & (nslots_ - `1`);
1228	probe = byte_reader_.ReadEightBytes(
1229	reinterpret_cast<const uint8_t>(phash_) + slot sizeof(uint64_t));
1230	} while (probe != `0` && probe != dwo_id);
1231	}
1232	if (probe == `0`)
1233	return -`1`;
1234	return slot;
1235	}
1236
1237	uint32_t DwpReader::LookupCUv2(uint64_t dwo_id) {
1238	uint32_t slot = static_cast<uint32_t>(dwo_id) & (nslots_ - `1`);
1239	uint64_t probe = byte_reader_.ReadEightBytes(
1240	reinterpret_cast<const uint8_t>(phash_) + slot sizeof(uint64_t));
1241	uint32_t index = byte_reader_.ReadFourBytes(
1242	reinterpret_cast<const uint8_t>(pindex_) + slot sizeof(uint32_t));
1243	if (index != `0` && probe != dwo_id) {
1244	uint32_t secondary_hash =
1245	(static_cast<uint32_t>(dwo_id >> `32`) & (nslots_ - `1`)) \| `1`;
1246	do {
1247	slot = (slot + secondary_hash) & (nslots_ - `1`);
1248	probe = byte_reader_.ReadEightBytes(
1249	reinterpret_cast<const uint8_t>(phash_) + slot sizeof(uint64_t));
1250	index = byte_reader_.ReadFourBytes(
1251	reinterpret_cast<const uint8_t>(pindex_) + slot sizeof(uint32_t));
1252	} while (index != `0` && probe != dwo_id);
1253	}
1254	return index;
1255	}
1256
1257	LineInfo::LineInfo(const uint8_t* buffer, uint64_t buffer_length,
1258	ByteReader* reader, const uint8_t* string_buffer,
1259	size_t string_buffer_length,
1260	const uint8_t* line_string_buffer,
1261	size_t line_string_buffer_length, LineInfoHandler* handler):
1262	handler_(handler), reader_(reader), buffer_(buffer),
1263	string_buffer_(string_buffer),
1264	line_string_buffer_(line_string_buffer) {
1265	#ifndef NDEBUG
1266	buffer_length_ = buffer_length;
1267	string_buffer_length_ = string_buffer_length;
1268	line_string_buffer_length_ = line_string_buffer_length;
1269	#endif
1270	header_.std_opcode_lengths = NULL;
1271	}
1272
1273	uint64_t LineInfo::Start() {
1274	ReadHeader();
1275	ReadLines();
1276	return after_header_ - buffer_;
1277	}
1278
1279	void LineInfo::ReadTypesAndForms(const uint8_t** lineptr,
1280	uint32_t* content_types,
1281	uint32_t* content_forms,
1282	uint32_t max_types,
1283	uint32_t* format_count) {
1284	size_t len;
1285
1286	uint32_t count = reader_->ReadUnsignedLEB128(*lineptr, &len);
1287	*lineptr += len;
1288	if (count < `1` \|\| count > max_types) {
1289	return;
1290	}
1291	for (uint32_t col = `0`; col < count; ++col) {
1292	content_types[col] = reader_->ReadUnsignedLEB128(*lineptr, &len);
1293	*lineptr += len;
1294	content_forms[col] = reader_->ReadUnsignedLEB128(*lineptr, &len);
1295	*lineptr += len;
1296	}
1297	*format_count = count;
1298	}
1299
1300	const char* LineInfo::ReadStringForm(uint32_t form, const uint8_t** lineptr) {
1301	const char* name = nullptr;
1302	if (form == DW_FORM_string) {
1303	name = reinterpret_cast<const char>(lineptr);
1304	*lineptr += strlen(name) + `1`;
1305	return name;
1306	} else if (form == DW_FORM_strp) {
1307	uint64_t offset = reader_->ReadOffset(*lineptr);
1308	assert(offset < string_buffer_length_);
1309	*lineptr += reader_->OffsetSize();
1310	if (string_buffer_ != nullptr) {
1311	name = reinterpret_cast<const char*>(string_buffer_) + offset;
1312	return name;
1313	}
1314	} else if (form == DW_FORM_line_strp) {
1315	uint64_t offset = reader_->ReadOffset(*lineptr);
1316	assert(offset < line_string_buffer_length_);
1317	*lineptr += reader_->OffsetSize();
1318	if (line_string_buffer_ != nullptr) {
1319	name = reinterpret_cast<const char*>(line_string_buffer_) + offset;
1320	return name;
1321	}
1322	}
1323	// Shouldn't be called with a non-string-form, and
1324	// if there is a string form but no string buffer,
1325	// that is a problem too.
1326	assert(`0`);
1327	return nullptr;
1328	}
1329
1330	uint64_t LineInfo::ReadUnsignedData(uint32_t form, const uint8_t** lineptr) {
1331	size_t len;
1332	uint64_t value;
1333
1334	switch (form) {
1335	case DW_FORM_data1:
1336	value = reader_->ReadOneByte(*lineptr);
1337	*lineptr += `1`;
1338	return value;
1339	case DW_FORM_data2:
1340	value = reader_->ReadTwoBytes(*lineptr);
1341	*lineptr += `2`;
1342	return value;
1343	case DW_FORM_data4:
1344	value = reader_->ReadFourBytes(*lineptr);
1345	*lineptr += `4`;
1346	return value;
1347	case DW_FORM_data8:
1348	value = reader_->ReadEightBytes(*lineptr);
1349	*lineptr += `8`;
1350	return value;
1351	case DW_FORM_udata:
1352	value = reader_->ReadUnsignedLEB128(*lineptr, &len);
1353	*lineptr += len;
1354	return value;
1355	default:
1356	fprintf(stderr, "Unrecognized data form.");
1357	return `0`;
1358	}
1359	}
1360
1361	void LineInfo::ReadFileRow(const uint8_t** lineptr,
1362	const uint32_t* content_types,
1363	const uint32_t* content_forms, uint32_t row,
1364	uint32_t format_count) {
1365	const char* filename = nullptr;
1366	uint64_t dirindex = `0`;
1367	uint64_t mod_time = `0`;
1368	uint64_t filelength = `0`;
1369
1370	for (uint32_t col = `0`; col < format_count; ++col) {
1371	switch (content_types[col]) {
1372	case DW_LNCT_path:
1373	filename = ReadStringForm(content_forms[col], lineptr);
1374	break;
1375	case DW_LNCT_directory_index:
1376	dirindex = ReadUnsignedData(content_forms[col], lineptr);
1377	break;
1378	case DW_LNCT_timestamp:
1379	mod_time = ReadUnsignedData(content_forms[col], lineptr);
1380	break;
1381	case DW_LNCT_size:
1382	filelength = ReadUnsignedData(content_forms[col], lineptr);
1383	break;
1384	case DW_LNCT_MD5:
1385	// MD5 entries help a debugger sort different versions of files with
1386	// the same name. It is always paired with a DW_FORM_data16 and is
1387	// unused in this case.
1388	*lineptr += `16`;
1389	break;
1390	default:
1391	fprintf(stderr, "Unrecognized form in line table header. %d\n",
1392	content_types[col]);
1393	assert(false);
1394	break;
1395	}
1396	}
1397	assert(filename != nullptr);
1398	handler_->DefineFile(filename, row, dirindex, mod_time, filelength);
1399	}
1400
1401	// The header for a debug_line section is mildly complicated, because
1402	// the line info is very tightly encoded.
1403	void LineInfo::ReadHeader() {
1404	const uint8_t* lineptr = buffer_;
1405	size_t initial_length_size;
1406
1407	const uint64_t initial_length
1408	= reader_->ReadInitialLength(lineptr, &initial_length_size);
1409
1410	lineptr += initial_length_size;
1411	header_.total_length = initial_length;
1412	assert(buffer_ + initial_length_size + header_.total_length <=
1413	buffer_ + buffer_length_);
1414
1415
1416	header_.version = reader_->ReadTwoBytes(lineptr);
1417	lineptr += `2`;
1418
1419	if (header_.version >= `5`) {
1420	uint8_t address_size = reader_->ReadOneByte(lineptr);
1421	reader_->SetAddressSize(address_size);
1422	lineptr += `1`;
1423	uint8_t segment_selector_size = reader_->ReadOneByte(lineptr);
1424	if (segment_selector_size != `0`) {
1425	fprintf(stderr,"No support for segmented memory.");
1426	}
1427	lineptr += `1`;
1428	} else {
1429	// Address size must* be set by CU ahead of time.*
1430	assert(reader_->AddressSize() != `0`);
1431	}
1432
1433	header_.prologue_length = reader_->ReadOffset(lineptr);
1434	lineptr += reader_->OffsetSize();
1435
1436	header_.min_insn_length = reader_->ReadOneByte(lineptr);
1437	lineptr += `1`;
1438
1439	if (header_.version >= `4`) {
1440	__attribute__((unused)) uint8_t max_ops_per_insn =
1441	reader_->ReadOneByte(lineptr);
1442	++lineptr;
1443	assert(max_ops_per_insn == `1`);
1444	}
1445
1446	header_.default_is_stmt = reader_->ReadOneByte(lineptr);
1447	lineptr += `1`;
1448
1449	header_.line_base = *reinterpret_cast<const int8_t*>(lineptr);
1450	lineptr += `1`;
1451
1452	header_.line_range = reader_->ReadOneByte(lineptr);
1453	lineptr += `1`;
1454
1455	header_.opcode_base = reader_->ReadOneByte(lineptr);
1456	lineptr += `1`;
1457
1458	header_.std_opcode_lengths = new std::vector<unsigned char>;
1459	header_.std_opcode_lengths->resize(header_.opcode_base + `1`);
1460	(*header_.std_opcode_lengths)[`0`] = `0`;
1461	for (int i = `1`; i < header_.opcode_base; i++) {
1462	(*header_.std_opcode_lengths)[i] = reader_->ReadOneByte(lineptr);
1463	lineptr += `1`;
1464	}
1465
1466	if (header_.version <= `4`) {
1467	// Directory zero is assumed to be the compilation directory and special
1468	// cased where used. It is not actually stored in the dwarf data. But an
1469	// empty entry here avoids off-by-one errors elsewhere in the code.
1470	handler_->DefineDir("", `0`);
1471	// It is legal for the directory entry table to be empty.
1472	if (*lineptr) {
1473	uint32_t dirindex = `1`;
1474	while (*lineptr) {
1475	const char* dirname = reinterpret_cast<const char*>(lineptr);
1476	handler_->DefineDir(dirname, dirindex);
1477	lineptr += strlen(dirname) + `1`;
1478	dirindex++;
1479	}
1480	}
1481	lineptr++;
1482	// It is also legal for the file entry table to be empty.
1483
1484	// Similarly for file zero.
1485	handler_->DefineFile("", `0`, `0`, `0`, `0`);
1486	if (*lineptr) {
1487	uint32_t fileindex = `1`;
1488	size_t len;
1489	while (*lineptr) {
1490	const char* filename = ReadStringForm(DW_FORM_string, &lineptr);
1491
1492	uint64_t dirindex = reader_->ReadUnsignedLEB128(lineptr, &len);
1493	lineptr += len;
1494
1495	uint64_t mod_time = reader_->ReadUnsignedLEB128(lineptr, &len);
1496	lineptr += len;
1497
1498	uint64_t filelength = reader_->ReadUnsignedLEB128(lineptr, &len);
1499	lineptr += len;
1500	handler_->DefineFile(filename, fileindex,
1501	static_cast<uint32_t>(dirindex), mod_time,
1502	filelength);
1503	fileindex++;
1504	}
1505	}
1506	lineptr++;
1507	} else {
1508	// Read the DWARF-5 directory table.
1509
1510	// Dwarf5 supports five different types and forms per directory- and
1511	// file-table entry. Theoretically, there could be duplicate entries
1512	// in this table, but that would be quite unusual.
1513	static const uint32_t kMaxTypesAndForms = `5`;
1514	uint32_t content_types[kMaxTypesAndForms];
1515	uint32_t content_forms[kMaxTypesAndForms];
1516	uint32_t format_count;
1517	size_t len;
1518
1519	ReadTypesAndForms(&lineptr, content_types, content_forms, kMaxTypesAndForms,
1520	&format_count);
1521	uint32_t entry_count = reader_->ReadUnsignedLEB128(lineptr, &len);
1522	lineptr += len;
1523	for (uint32_t row = `0`; row < entry_count; ++row) {
1524	const char* dirname = nullptr;
1525	for (uint32_t col = `0`; col < format_count; ++col) {
1526	// The path is the only relevant content type for this implementation.
1527	if (content_types[col] == DW_LNCT_path) {
1528	dirname = ReadStringForm(content_forms[col], &lineptr);
1529	}
1530	}
1531	handler_->DefineDir(dirname, row);
1532	}
1533
1534	// Read the DWARF-5 filename table.
1535	ReadTypesAndForms(&lineptr, content_types, content_forms, kMaxTypesAndForms,
1536	&format_count);
1537	entry_count = reader_->ReadUnsignedLEB128(lineptr, &len);
1538	lineptr += len;
1539
1540	for (uint32_t row = `0`; row < entry_count; ++row) {
1541	ReadFileRow(&lineptr, content_types, content_forms, row, format_count);
1542	}
1543	}
1544	after_header_ = lineptr;
1545	}
1546
1547	/ static /
1548	bool LineInfo::ProcessOneOpcode(ByteReader* reader,
1549	LineInfoHandler* handler,
1550	const struct LineInfoHeader& header,
1551	const uint8_t* start,
1552	struct LineStateMachine* lsm,
1553	size_t* len,
1554	uintptr pc,
1555	bool* lsm_passes_pc) {
1556	size_t oplen = `0`;
1557	size_t templen;
1558	uint8_t opcode = reader->ReadOneByte(start);
1559	oplen++;
1560	start++;
1561
1562	// If the opcode is great than the opcode_base, it is a special
1563	// opcode. Most line programs consist mainly of special opcodes.
1564	if (opcode >= header.opcode_base) {
1565	opcode -= header.opcode_base;
1566	const int64_t advance_address = (opcode / header.line_range)
1567	* header.min_insn_length;
1568	const int32_t advance_line = (opcode % header.line_range)
1569	+ header.line_base;
1570
1571	// Check if the lsm passes "pc". If so, mark it as passed.
1572	if (lsm_passes_pc &&
1573	lsm->address <= pc && pc < lsm->address + advance_address) {
1574	lsm_passes_pc = true*;
1575	}
1576
1577	lsm->address += advance_address;
1578	lsm->line_num += advance_line;
1579	lsm->basic_block = true;
1580	*len = oplen;
1581	return true;
1582	}
1583
1584	// Otherwise, we have the regular opcodes
1585	switch (opcode) {
1586	case DW_LNS_copy: {
1587	lsm->basic_block = false;
1588	*len = oplen;
1589	return true;
1590	}
1591
1592	case DW_LNS_advance_pc: {
1593	uint64_t advance_address = reader->ReadUnsignedLEB128(start, &templen);
1594	oplen += templen;
1595
1596	// Check if the lsm passes "pc". If so, mark it as passed.
1597	if (lsm_passes_pc && lsm->address <= pc &&
1598	pc < lsm->address + header.min_insn_length * advance_address) {
1599	lsm_passes_pc = true*;
1600	}
1601
1602	lsm->address += header.min_insn_length * advance_address;
1603	}
1604	break;
1605	case DW_LNS_advance_line: {
1606	const int64_t advance_line = reader->ReadSignedLEB128(start, &templen);
1607	oplen += templen;
1608	lsm->line_num += static_cast<int32_t>(advance_line);
1609
1610	// With gcc 4.2.1, we can get the line_no here for the first time
1611	// since DW_LNS_advance_line is called after DW_LNE_set_address is
1612	// called. So we check if the lsm passes "pc" here, not in
1613	// DW_LNE_set_address.
1614	if (lsm_passes_pc && lsm->address == pc) {
1615	lsm_passes_pc = true*;
1616	}
1617	}
1618	break;
1619	case DW_LNS_set_file: {
1620	const uint64_t fileno = reader->ReadUnsignedLEB128(start, &templen);
1621	oplen += templen;
1622	lsm->file_num = static_cast<uint32_t>(fileno);
1623	}
1624	break;
1625	case DW_LNS_set_column: {
1626	const uint64_t colno = reader->ReadUnsignedLEB128(start, &templen);
1627	oplen += templen;
1628	lsm->column_num = static_cast<uint32_t>(colno);
1629	}
1630	break;
1631	case DW_LNS_negate_stmt: {
1632	lsm->is_stmt = !lsm->is_stmt;
1633	}
1634	break;
1635	case DW_LNS_set_basic_block: {
1636	lsm->basic_block = true;
1637	}
1638	break;
1639	case DW_LNS_fixed_advance_pc: {
1640	const uint16_t advance_address = reader->ReadTwoBytes(start);
1641	oplen += `2`;
1642
1643	// Check if the lsm passes "pc". If so, mark it as passed.
1644	if (lsm_passes_pc &&
1645	lsm->address <= pc && pc < lsm->address + advance_address) {
1646	lsm_passes_pc = true*;
1647	}
1648
1649	lsm->address += advance_address;
1650	}
1651	break;
1652	case DW_LNS_const_add_pc: {
1653	const int64_t advance_address = header.min_insn_length
1654	* ((`255` - header.opcode_base)
1655	/ header.line_range);
1656
1657	// Check if the lsm passes "pc". If so, mark it as passed.
1658	if (lsm_passes_pc &&
1659	lsm->address <= pc && pc < lsm->address + advance_address) {
1660	lsm_passes_pc = true*;
1661	}
1662
1663	lsm->address += advance_address;
1664	}
1665	break;
1666	case DW_LNS_extended_op: {
1667	const uint64_t extended_op_len = reader->ReadUnsignedLEB128(start,
1668	&templen);
1669	start += templen;
1670	oplen += templen + extended_op_len;
1671
1672	const uint64_t extended_op = reader->ReadOneByte(start);
1673	start++;
1674
1675	switch (extended_op) {
1676	case DW_LNE_end_sequence: {
1677	lsm->end_sequence = true;
1678	*len = oplen;
1679	return true;
1680	}
1681	break;
1682	case DW_LNE_set_address: {
1683	// With gcc 4.2.1, we cannot tell the line_no here since
1684	// DW_LNE_set_address is called before DW_LNS_advance_line is
1685	// called. So we do not check if the lsm passes "pc" here. See
1686	// also the comment in DW_LNS_advance_line.
1687	uint64_t address = reader->ReadAddress(start);
1688	lsm->address = address;
1689	}
1690	break;
1691	case DW_LNE_define_file: {
1692	const char* filename = reinterpret_cast<const char*>(start);
1693
1694	templen = strlen(filename) + `1`;
1695	start += templen;
1696
1697	uint64_t dirindex = reader->ReadUnsignedLEB128(start, &templen);
1698	oplen += templen;
1699
1700	const uint64_t mod_time = reader->ReadUnsignedLEB128(start,
1701	&templen);
1702	oplen += templen;
1703
1704	const uint64_t filelength = reader->ReadUnsignedLEB128(start,
1705	&templen);
1706	oplen += templen;
1707
1708	if (handler) {
1709	handler->DefineFile(filename, -`1`, static_cast<uint32_t>(dirindex),
1710	mod_time, filelength);
1711	}
1712	}
1713	break;
1714	}
1715	}
1716	break;
1717
1718	default: {
1719	// Ignore unknown opcode silently
1720	if (header.std_opcode_lengths) {
1721	for (int i = `0`; i < (*header.std_opcode_lengths)[opcode]; i++) {
1722	reader->ReadUnsignedLEB128(start, &templen);
1723	start += templen;
1724	oplen += templen;
1725	}
1726	}
1727	}
1728	break;
1729	}
1730	*len = oplen;
1731	return false;
1732	}
1733
1734	void LineInfo::ReadLines() {
1735	struct LineStateMachine lsm;
1736
1737	// lengthstart is the place the length field is based on.
1738	// It is the point in the header after the initial length field
1739	const uint8_t* lengthstart = buffer_;
1740
1741	// In 64 bit dwarf, the initial length is 12 bytes, because of the
1742	// 0xffffffff at the start.
1743	if (reader_->OffsetSize() == `8`)
1744	lengthstart += `12`;
1745	else
1746	lengthstart += `4`;
1747
1748	const uint8_t* lineptr = after_header_;
1749	lsm.Reset(header_.default_is_stmt);
1750
1751	// The LineInfoHandler interface expects each line's length along
1752	// with its address, but DWARF only provides addresses (sans
1753	// length), and an end-of-sequence address; one infers the length
1754	// from the next address. So we report a line only when we get the
1755	// next line's address, or the end-of-sequence address.
1756	bool have_pending_line = false;
1757	uint64_t pending_address = `0`;
1758	uint32_t pending_file_num = `0`, pending_line_num = `0`, pending_column_num = `0`;
1759
1760	while (lineptr < lengthstart + header_.total_length) {
1761	size_t oplength;
1762	bool add_row = ProcessOneOpcode(reader_, handler_, header_,
1763	lineptr, &lsm, &oplength, (uintptr)-`1`,
1764	NULL);
1765	if (add_row) {
1766	if (have_pending_line)
1767	handler_->AddLine(pending_address, lsm.address - pending_address,
1768	pending_file_num, pending_line_num,
1769	pending_column_num);
1770	if (lsm.end_sequence) {
1771	lsm.Reset(header_.default_is_stmt);
1772	have_pending_line = false;
1773	} else {
1774	pending_address = lsm.address;
1775	pending_file_num = lsm.file_num;
1776	pending_line_num = lsm.line_num;
1777	pending_column_num = lsm.column_num;
1778	have_pending_line = true;
1779	}
1780	}
1781	lineptr += oplength;
1782	}
1783
1784	after_header_ = lengthstart + header_.total_length;
1785	}
1786
1787	bool RangeListReader::ReadRanges(enum DwarfForm form, uint64_t data) {
1788	if (form == DW_FORM_sec_offset) {
1789	if (cu_info_->version_ <= `4`) {
1790	return ReadDebugRanges(data);
1791	} else {
1792	return ReadDebugRngList(data);
1793	}
1794	} else if (form == DW_FORM_rnglistx) {
1795	offset_array_ = cu_info_->ranges_base_;
1796	uint64_t index_offset = reader_->OffsetSize() * data;
1797	uint64_t range_list_offset =
1798	reader_->ReadOffset(cu_info_->buffer_ + offset_array_ + index_offset);
1799
1800	return ReadDebugRngList(offset_array_ + range_list_offset);
1801	}
1802	return false;
1803	}
1804
1805	bool RangeListReader::ReadDebugRanges(uint64_t offset) {
1806	const uint64_t max_address =
1807	(reader_->AddressSize() == `4`) ? `0xffffffffUL`
1808	: `0xffffffffffffffffULL`;
1809	const uint64_t entry_size = reader_->AddressSize() * `2`;
1810	bool list_end = false;
1811
1812	do {
1813	if (offset > cu_info_->size_ - entry_size) {
1814	return false; // Invalid range detected
1815	}
1816
1817	uint64_t start_address = reader_->ReadAddress(cu_info_->buffer_ + offset);
1818	uint64_t end_address = reader_->ReadAddress(
1819	cu_info_->buffer_ + offset + reader_->AddressSize());
1820
1821	if (start_address == max_address) { // Base address selection
1822	cu_info_->base_address_ = end_address;
1823	} else if (start_address == `0` && end_address == `0`) { // End-of-list
1824	handler_->Finish();
1825	list_end = true;
1826	} else { // Add a range entry
1827	handler_->AddRange(start_address + cu_info_->base_address_,
1828	end_address + cu_info_->base_address_);
1829	}
1830
1831	offset += entry_size;
1832	} while (!list_end);
1833
1834	return true;
1835	}
1836
1837	bool RangeListReader::ReadDebugRngList(uint64_t offset) {
1838	uint64_t start = `0`;
1839	uint64_t end = `0`;
1840	uint64_t range_len = `0`;
1841	uint64_t index = `0`;
1842	// A uleb128's length isn't known until after it has been read, so overruns
1843	// are only caught after an entire entry.
1844	while (offset < cu_info_->size_) {
1845	uint8_t entry_type = reader_->ReadOneByte(cu_info_->buffer_ + offset);
1846	offset += `1`;
1847	// Handle each entry type per Dwarf 5 Standard, section 2.17.3.
1848	switch (entry_type) {
1849	case DW_RLE_end_of_list:
1850	handler_->Finish();
1851	return true;
1852	case DW_RLE_base_addressx:
1853	offset += ReadULEB(offset, &index);
1854	cu_info_->base_address_ = GetAddressAtIndex(index);
1855	break;
1856	case DW_RLE_startx_endx:
1857	offset += ReadULEB(offset, &index);
1858	start = GetAddressAtIndex(index);
1859	offset += ReadULEB(offset, &index);
1860	end = GetAddressAtIndex(index);
1861	handler_->AddRange(start, end);
1862	break;
1863	case DW_RLE_startx_length:
1864	offset += ReadULEB(offset, &index);
1865	start = GetAddressAtIndex(index);
1866	offset += ReadULEB(offset, &range_len);
1867	handler_->AddRange(start, start + range_len);
1868	break;
1869	case DW_RLE_offset_pair:
1870	offset += ReadULEB(offset, &start);
1871	offset += ReadULEB(offset, &end);
1872	handler_->AddRange(start + cu_info_->base_address_,
1873	end + cu_info_->base_address_);
1874	break;
1875	case DW_RLE_base_address:
1876	offset += ReadAddress(offset, &cu_info_->base_address_);
1877	break;
1878	case DW_RLE_start_end:
1879	offset += ReadAddress(offset, &start);
1880	offset += ReadAddress(offset, &end);
1881	handler_->AddRange(start, end);
1882	break;
1883	case DW_RLE_start_length:
1884	offset += ReadAddress(offset, &start);
1885	offset += ReadULEB(offset, &end);
1886	handler_->AddRange(start, start + end);
1887	break;
1888	}
1889	}
1890	return false;
1891	}
1892
1893	// A DWARF rule for recovering the address or value of a register, or
1894	// computing the canonical frame address. There is one subclass of this for
1895	// each 'Rule' member function in CallFrameInfo::Handler.*
1896	//
1897	// It's annoying that we have to handle Rules using pointers (because
1898	// the concrete instances can have an arbitrary size). They're small,
1899	// so it would be much nicer if we could just handle them by value
1900	// instead of fretting about ownership and destruction.
1901	//
1902	// It seems like all these could simply be instances of std::tr1::bind,
1903	// except that we need instances to be EqualityComparable, too.
1904	//
1905	// This could logically be nested within State, but then the qualified names
1906	// get horrendous.
1907	class CallFrameInfo::Rule {
1908	public:
1909	virtual ~Rule() { }
1910
1911	// Tell HANDLER that, at ADDRESS in the program, REG can be recovered using
1912	// this rule. If REG is kCFARegister, then this rule describes how to compute
1913	// the canonical frame address. Return what the HANDLER member function
1914	// returned.
1915	virtual bool Handle(Handler* handler,
1916	uint64_t address, int reg) const = `0`;
1917
1918	// Equality on rules. We use these to decide which rules we need
1919	// to report after a DW_CFA_restore_state instruction.
1920	virtual bool operator==(const Rule& rhs) const = `0`;
1921
1922	bool operator!=(const Rule& rhs) const { return ! (*this == rhs); }
1923
1924	// Return a pointer to a copy of this rule.
1925	virtual Rule* Copy() const = `0`;
1926
1927	// If this is a base+offset rule, change its base register to REG.
1928	// Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.)
1929	virtual void SetBaseRegister(unsigned reg) { }
1930
1931	// If this is a base+offset rule, change its offset to OFFSET. Otherwise,
1932	// do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.)
1933	virtual void SetOffset(long long offset) { }
1934	};
1935
1936	// Rule: the value the register had in the caller cannot be recovered.
1937	class CallFrameInfo::UndefinedRule: public CallFrameInfo::Rule {
1938	public:
1939	UndefinedRule() { }
1940	~UndefinedRule() { }
1941	bool Handle(Handler* handler, uint64_t address, int reg) const {
1942	return handler->UndefinedRule(address, reg);
1943	}
1944	bool operator==(const Rule& rhs) const {
1945	// dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1946	// been carefully considered; cheap RTTI-like workarounds are forbidden.
1947	const UndefinedRule* our_rhs = dynamic_cast<const UndefinedRule*>(&rhs);
1948	return (our_rhs != NULL);
1949	}
1950	Rule* Copy() const { return new UndefinedRule (*this); }
1951	};
1952
1953	// Rule: the register's value is the same as that it had in the caller.
1954	class CallFrameInfo::SameValueRule: public CallFrameInfo::Rule {
1955	public:
1956	SameValueRule() { }
1957	~SameValueRule() { }
1958	bool Handle(Handler* handler, uint64_t address, int reg) const {
1959	return handler->SameValueRule(address, reg);
1960	}
1961	bool operator==(const Rule& rhs) const {
1962	// dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1963	// been carefully considered; cheap RTTI-like workarounds are forbidden.
1964	const SameValueRule* our_rhs = dynamic_cast<const SameValueRule*>(&rhs);
1965	return (our_rhs != NULL);
1966	}
1967	Rule* Copy() const { return new SameValueRule (*this); }
1968	};
1969
1970	// Rule: the register is saved at OFFSET from BASE_REGISTER. BASE_REGISTER
1971	// may be CallFrameInfo::Handler::kCFARegister.
1972	class CallFrameInfo::OffsetRule: public CallFrameInfo::Rule {
1973	public:
1974	OffsetRule(int base_register, long offset)
1975	: base_register_(base_register), offset_(offset) { }
1976	~OffsetRule() { }
1977	bool Handle(Handler* handler, uint64_t address, int reg) const {
1978	return handler->OffsetRule(address, reg, base_register_, offset_);
1979	}
1980	bool operator==(const Rule& rhs) const {
1981	// dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1982	// been carefully considered; cheap RTTI-like workarounds are forbidden.
1983	const OffsetRule* our_rhs = dynamic_cast<const OffsetRule*>(&rhs);
1984	return (our_rhs &&
1985	base_register_ == our_rhs->base_register_ &&
1986	offset_ == our_rhs->offset_);
1987	}
1988	Rule* Copy() const { return new OffsetRule (*this); }
1989	// We don't actually need SetBaseRegister or SetOffset here, since they
1990	// are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it
1991	// doesn't make sense to use OffsetRule for computing the CFA: it
1992	// computes the address at which a register is saved, not a value.
1993	private:
1994	int base_register_;
1995	long offset_;
1996	};
1997
1998	// Rule: the value the register had in the caller is the value of
1999	// BASE_REGISTER plus offset. BASE_REGISTER may be
2000	// CallFrameInfo::Handler::kCFARegister.
2001	class CallFrameInfo::ValOffsetRule: public CallFrameInfo::Rule {
2002	public:
2003	ValOffsetRule(int base_register, long offset)
2004	: base_register_(base_register), offset_(offset) { }
2005	~ValOffsetRule() { }
2006	bool Handle(Handler* handler, uint64_t address, int reg) const {
2007	return handler->ValOffsetRule(address, reg, base_register_, offset_);
2008	}
2009	bool operator==(const Rule& rhs) const {
2010	// dynamic_cast is allowed by the Google C++ Style Guide, if the use has
2011	// been carefully considered; cheap RTTI-like workarounds are forbidden.
2012	const ValOffsetRule* our_rhs = dynamic_cast<const ValOffsetRule*>(&rhs);
2013	return (our_rhs &&
2014	base_register_ == our_rhs->base_register_ &&
2015	offset_ == our_rhs->offset_);
2016	}
2017	Rule* Copy() const { return new ValOffsetRule (*this); }
2018	void SetBaseRegister(unsigned reg) { base_register_ = reg; }
2019	void SetOffset(long long offset) { offset_ = offset; }
2020	private:
2021	int base_register_;
2022	long offset_;
2023	};
2024
2025	// Rule: the register has been saved in another register REGISTER_NUMBER_.
2026	class CallFrameInfo::RegisterRule: public CallFrameInfo::Rule {
2027	public:
2028	explicit RegisterRule(int register_number)
2029	: register_number_(register_number) { }
2030	~RegisterRule() { }
2031	bool Handle(Handler* handler, uint64_t address, int reg) const {
2032	return handler->RegisterRule(address, reg, register_number_);
2033	}
2034	bool operator==(const Rule& rhs) const {
2035	// dynamic_cast is allowed by the Google C++ Style Guide, if the use has
2036	// been carefully considered; cheap RTTI-like workarounds are forbidden.
2037	const RegisterRule* our_rhs = dynamic_cast<const RegisterRule*>(&rhs);
2038	return (our_rhs && register_number_ == our_rhs->register_number_);
2039	}
2040	Rule* Copy() const { return new RegisterRule (*this); }
2041	private:
2042	int register_number_;
2043	};
2044
2045	// Rule: EXPRESSION evaluates to the address at which the register is saved.
2046	class CallFrameInfo::ExpressionRule: public CallFrameInfo::Rule {
2047	public:
2048	explicit ExpressionRule(const string& expression)
2049	: expression_(expression) { }
2050	~ExpressionRule() { }
2051	bool Handle(Handler* handler, uint64_t address, int reg) const {
2052	return handler->ExpressionRule(address, reg, expression_);
2053	}
2054	bool operator==(const Rule& rhs) const {
2055	// dynamic_cast is allowed by the Google C++ Style Guide, if the use has
2056	// been carefully considered; cheap RTTI-like workarounds are forbidden.
2057	const ExpressionRule* our_rhs = dynamic_cast<const ExpressionRule*>(&rhs);
2058	return (our_rhs && expression_ == our_rhs->expression_);
2059	}
2060	Rule* Copy() const { return new ExpressionRule (*this); }
2061	private:
2062	string expression_;
2063	};
2064
2065	// Rule: EXPRESSION evaluates to the address at which the register is saved.
2066	class CallFrameInfo::ValExpressionRule: public CallFrameInfo::Rule {
2067	public:
2068	explicit ValExpressionRule(const string& expression)
2069	: expression_(expression) { }
2070	~ValExpressionRule() { }
2071	bool Handle(Handler* handler, uint64_t address, int reg) const {
2072	return handler->ValExpressionRule(address, reg, expression_);
2073	}
2074	bool operator==(const Rule& rhs) const {
2075	// dynamic_cast is allowed by the Google C++ Style Guide, if the use has
2076	// been carefully considered; cheap RTTI-like workarounds are forbidden.
2077	const ValExpressionRule* our_rhs =
2078	dynamic_cast<const ValExpressionRule*>(&rhs);
2079	return (our_rhs && expression_ == our_rhs->expression_);
2080	}
2081	Rule* Copy() const { return new ValExpressionRule (*this); }
2082	private:
2083	string expression_;
2084	};
2085
2086	// A map from register numbers to rules.
2087	class CallFrameInfo::RuleMap {
2088	public:
2089	RuleMap() : cfa_rule_(NULL) { }
2090	RuleMap(const RuleMap& rhs) : cfa_rule_(NULL) { *this = rhs; }
2091	~RuleMap() { Clear(); }
2092
2093	RuleMap& operator=(const RuleMap& rhs);
2094
2095	// Set the rule for computing the CFA to RULE. Take ownership of RULE.
2096	void SetCFARule(Rule* rule) { delete cfa_rule_; cfa_rule_ = rule; }
2097
2098	// Return the current CFA rule. Unlike RegisterRule, this RuleMap retains
2099	// ownership of the rule. We use this for DW_CFA_def_cfa_offset and
2100	// DW_CFA_def_cfa_register, and for detecting references to the CFA before
2101	// a rule for it has been established.
2102	Rule* CFARule() const { return cfa_rule_; }
2103
2104	// Return the rule for REG, or NULL if there is none. The caller takes
2105	// ownership of the result.
2106	Rule* RegisterRule(int reg) const;
2107
2108	// Set the rule for computing REG to RULE. Take ownership of RULE.
2109	void SetRegisterRule(int reg, Rule* rule);
2110
2111	// Make all the appropriate calls to HANDLER as if we were changing from
2112	// this RuleMap to NEW_RULES at ADDRESS. We use this to implement
2113	// DW_CFA_restore_state, where lots of rules can change simultaneously.
2114	// Return true if all handlers returned true; otherwise, return false.
2115	bool HandleTransitionTo(Handler* handler, uint64_t address,
2116	const RuleMap& new_rules) const;
2117
2118	private:
2119	// A map from register numbers to Rules.
2120	typedef std::map<int, Rule*> RuleByNumber;
2121
2122	// Remove all register rules and clear cfa_rule_.
2123	void Clear();
2124
2125	// The rule for computing the canonical frame address. This RuleMap owns
2126	// this rule.
2127	Rule* cfa_rule_;
2128
2129	// A map from register numbers to postfix expressions to recover
2130	// their values. This RuleMap owns the Rules the map refers to.
2131	RuleByNumber registers_;
2132	};
2133
2134	CallFrameInfo::RuleMap& CallFrameInfo::RuleMap::operator=(const RuleMap& rhs) {
2135	Clear();
2136	// Since each map owns the rules it refers to, assignment must copy them.
2137	if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy();
2138	for (RuleByNumber::const_iterator it = rhs.registers_.begin();
2139	it != rhs.registers_.end(); it ++)
2140	registers_[it ->first] = it ->second->Copy();
2141	return *this;
2142	}
2143
2144	CallFrameInfo::Rule* CallFrameInfo::RuleMap::RegisterRule(int reg) const {
2145	assert(reg != Handler::kCFARegister);
2146	RuleByNumber::const_iterator it = registers_.find(reg);
2147	if (it != registers_.end())
2148	return it ->second->Copy();
2149	else
2150	return NULL;
2151	}
2152
2153	void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule* rule) {
2154	assert(reg != Handler::kCFARegister);
2155	assert(rule);
2156	Rule** slot = &registers_[reg];
2157	delete *slot;
2158	*slot = rule;
2159	}
2160
2161	bool CallFrameInfo::RuleMap::HandleTransitionTo(
2162	Handler* handler,
2163	uint64_t address,
2164	const RuleMap& new_rules) const {
2165	// Transition from cfa_rule_ to new_rules.cfa_rule_.
2166	if (cfa_rule_ && new_rules.cfa_rule_) {
2167	if (cfa_rule_ != new_rules.cfa_rule_ &&
2168	!new_rules.cfa_rule_->Handle(handler, address,
2169	Handler::kCFARegister))
2170	return false;
2171	} else if (cfa_rule_) {
2172	// this RuleMap has a CFA rule but new_rules doesn't.
2173	// CallFrameInfo::Handler has no way to handle this --- and shouldn't;
2174	// it's garbage input. The instruction interpreter should have
2175	// detected this and warned, so take no action here.
2176	} else if (new_rules.cfa_rule_) {
2177	// This shouldn't be possible: NEW_RULES is some prior state, and
2178	// there's no way to remove entries.
2179	assert(`0`);
2180	} else {
2181	// Both CFA rules are empty. No action needed.
2182	}
2183
2184	// Traverse the two maps in order by register number, and report
2185	// whatever differences we find.
2186	RuleByNumber::const_iterator old_it = registers_.begin();
2187	RuleByNumber::const_iterator new_it = new_rules.registers_.begin();
2188	while (old_it != registers_.end() && new_it != new_rules.registers_.end()) {
2189	if (old_it ->first < new_it ->first) {
2190	// This RuleMap has an entry for old_it->first, but NEW_RULES
2191	// doesn't.
2192	//
2193	// This isn't really the right thing to do, but since CFI generally
2194	// only mentions callee-saves registers, and GCC's convention for
2195	// callee-saves registers is that they are unchanged, it's a good
2196	// approximation.
2197	if (!handler->SameValueRule(address, old_it ->first))
2198	return false;
2199	old_it ++;
2200	} else if (old_it ->first > new_it ->first) {
2201	// NEW_RULES has entry for new_it->first, but this RuleMap
2202	// doesn't. This shouldn't be possible: NEW_RULES is some prior
2203	// state, and there's no way to remove entries.
2204	assert(`0`);
2205	} else {
2206	// Both maps have an entry for this register. Report the new
2207	// rule if it is different.
2208	if (old_it ->second != new_it ->second &&
2209	!new_it ->second->Handle(handler, address, new_it ->first))
2210	return false;
2211	new_it ++, old_it ++;
2212	}
2213	}
2214	// Finish off entries from this RuleMap with no counterparts in new_rules.
2215	while (old_it != registers_.end()) {
2216	if (!handler->SameValueRule(address, old_it ->first))
2217	return false;
2218	old_it ++;
2219	}
2220	// Since we only make transitions from a rule set to some previously
2221	// saved rule set, and we can only add rules to the map, NEW_RULES
2222	// must have fewer rules than this.*
2223	assert(new_it == new_rules.registers_.end());
2224
2225	return true;
2226	}
2227
2228	// Remove all register rules and clear cfa_rule_.
2229	void CallFrameInfo::RuleMap::Clear() {
2230	delete cfa_rule_;
2231	cfa_rule_ = NULL;
2232	for (RuleByNumber::iterator it = registers_.begin();
2233	it != registers_.end(); it ++)
2234	delete it ->second;
2235	registers_.clear();
2236	}
2237
2238	// The state of the call frame information interpreter as it processes
2239	// instructions from a CIE and FDE.
2240	class CallFrameInfo::State {
2241	public:
2242	// Create a call frame information interpreter state with the given
2243	// reporter, reader, handler, and initial call frame info address.
2244	State(ByteReader* reader, Handler* handler, Reporter* reporter,
2245	uint64_t address)
2246	: reader_(reader), handler_(handler), reporter_(reporter),
2247	address_(address), entry_(NULL), cursor_(NULL) { }
2248
2249	// Interpret instructions from CIE, save the resulting rule set for
2250	// DW_CFA_restore instructions, and return true. On error, report
2251	// the problem to reporter_ and return false.
2252	bool InterpretCIE(const CIE& cie);
2253
2254	// Interpret instructions from FDE, and return true. On error,
2255	// report the problem to reporter_ and return false.
2256	bool InterpretFDE(const FDE& fde);
2257
2258	private:
2259	// The operands of a CFI instruction, for ParseOperands.
2260	struct Operands {
2261	unsigned register_number; // A register number.
2262	uint64_t offset; // An offset or address.
2263	long signed_offset; // A signed offset.
2264	string expression; // A DWARF expression.
2265	};
2266
2267	// Parse CFI instruction operands from STATE's instruction stream as
2268	// described by FORMAT. On success, populate OPERANDS with the
2269	// results, and return true. On failure, report the problem and
2270	// return false.
2271	//
2272	// Each character of FORMAT should be one of the following:
2273	//
2274	// 'r' unsigned LEB128 register number (OPERANDS->register_number)
2275	// 'o' unsigned LEB128 offset (OPERANDS->offset)
2276	// 's' signed LEB128 offset (OPERANDS->signed_offset)
2277	// 'a' machine-size address (OPERANDS->offset)
2278	// (If the CIE has a 'z' augmentation string, 'a' uses the
2279	// encoding specified by the 'R' argument.)
2280	// '1' a one-byte offset (OPERANDS->offset)
2281	// '2' a two-byte offset (OPERANDS->offset)
2282	// '4' a four-byte offset (OPERANDS->offset)
2283	// '8' an eight-byte offset (OPERANDS->offset)
2284	// 'e' a DW_FORM_block holding a (OPERANDS->expression)
2285	// DWARF expression
2286	bool ParseOperands(const char* format, Operands* operands);
2287
2288	// Interpret one CFI instruction from STATE's instruction stream, update
2289	// STATE, report any rule changes to handler_, and return true. On
2290	// failure, report the problem and return false.
2291	bool DoInstruction();
2292
2293	// The following Do member functions are subroutines of DoInstruction,*
2294	// factoring out the actual work of operations that have several
2295	// different encodings.
2296
2297	// Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and
2298	// return true. On failure, report and return false. (Used for
2299	// DW_CFA_def_cfa and DW_CFA_def_cfa_sf.)
2300	bool DoDefCFA(unsigned base_register, long offset);
2301
2302	// Change the offset of the CFA rule to OFFSET, and return true. On
2303	// failure, report and return false. (Subroutine for
2304	// DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.)
2305	bool DoDefCFAOffset(long offset);
2306
2307	// Specify that REG can be recovered using RULE, and return true. On
2308	// failure, report and return false.
2309	bool DoRule(unsigned reg, Rule* rule);
2310
2311	// Specify that REG can be found at OFFSET from the CFA, and return true.
2312	// On failure, report and return false. (Subroutine for DW_CFA_offset,
2313	// DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.)
2314	bool DoOffset(unsigned reg, long offset);
2315
2316	// Specify that the caller's value for REG is the CFA plus OFFSET,
2317	// and return true. On failure, report and return false. (Subroutine
2318	// for DW_CFA_val_offset and DW_CFA_val_offset_sf.)
2319	bool DoValOffset(unsigned reg, long offset);
2320
2321	// Restore REG to the rule established in the CIE, and return true. On
2322	// failure, report and return false. (Subroutine for DW_CFA_restore and
2323	// DW_CFA_restore_extended.)
2324	bool DoRestore(unsigned reg);
2325
2326	// Return the section offset of the instruction at cursor. For use
2327	// in error messages.
2328	uint64_t CursorOffset() { return entry_->offset + (cursor_ - entry_->start); }
2329
2330	// Report that entry_ is incomplete, and return false. For brevity.
2331	bool ReportIncomplete() {
2332	reporter_->Incomplete(entry_->offset, entry_->kind);
2333	return false;
2334	}
2335
2336	// For reading multi-byte values with the appropriate endianness.
2337	ByteReader* reader_;
2338
2339	// The handler to which we should report the data we find.
2340	Handler* handler_;
2341
2342	// For reporting problems in the info we're parsing.
2343	Reporter* reporter_;
2344
2345	// The code address to which the next instruction in the stream applies.
2346	uint64_t address_;
2347
2348	// The entry whose instructions we are currently processing. This is
2349	// first a CIE, and then an FDE.
2350	const Entry* entry_;
2351
2352	// The next instruction to process.
2353	const uint8_t* cursor_;
2354
2355	// The current set of rules.
2356	RuleMap rules_;
2357
2358	// The set of rules established by the CIE, used by DW_CFA_restore
2359	// and DW_CFA_restore_extended. We set this after interpreting the
2360	// CIE's instructions.
2361	RuleMap cie_rules_;
2362
2363	// A stack of saved states, for DW_CFA_remember_state and
2364	// DW_CFA_restore_state.
2365	std::stack<RuleMap> saved_rules_;
2366	};
2367
2368	bool CallFrameInfo::State::InterpretCIE(const CIE& cie) {
2369	entry_ = &cie;
2370	cursor_ = entry_->instructions;
2371	while (cursor_ < entry_->end)
2372	if (!DoInstruction())
2373	return false;
2374	// Note the rules established by the CIE, for use by DW_CFA_restore
2375	// and DW_CFA_restore_extended.
2376	cie_rules_ = rules_;
2377	return true;
2378	}
2379
2380	bool CallFrameInfo::State::InterpretFDE(const FDE& fde) {
2381	entry_ = &fde;
2382	cursor_ = entry_->instructions;
2383	while (cursor_ < entry_->end)
2384	if (!DoInstruction())
2385	return false;
2386	return true;
2387	}
2388
2389	bool CallFrameInfo::State::ParseOperands(const char* format,
2390	Operands* operands) {
2391	size_t len;
2392	const char* operand;
2393
2394	for (operand = format; *operand; operand++) {
2395	size_t bytes_left = entry_->end - cursor_;
2396	switch (*operand) {
2397	case `'r'`:
2398	operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len);
2399	if (len > bytes_left) return ReportIncomplete();
2400	cursor_ += len;
2401	break;
2402
2403	case `'o'`:
2404	operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len);
2405	if (len > bytes_left) return ReportIncomplete();
2406	cursor_ += len;
2407	break;
2408
2409	case `'s'`:
2410	operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len);
2411	if (len > bytes_left) return ReportIncomplete();
2412	cursor_ += len;
2413	break;
2414
2415	case `'a'`:
2416	operands->offset =
2417	reader_->ReadEncodedPointer(cursor_, entry_->cie->pointer_encoding,
2418	&len);
2419	if (len > bytes_left) return ReportIncomplete();
2420	cursor_ += len;
2421	break;
2422
2423	case `'1'`:
2424	if (`1` > bytes_left) return ReportIncomplete();
2425	operands->offset = static_cast<unsigned char>(*cursor_++);
2426	break;
2427
2428	case `'2'`:
2429	if (`2` > bytes_left) return ReportIncomplete();
2430	operands->offset = reader_->ReadTwoBytes(cursor_);
2431	cursor_ += `2`;
2432	break;
2433
2434	case `'4'`:
2435	if (`4` > bytes_left) return ReportIncomplete();
2436	operands->offset = reader_->ReadFourBytes(cursor_);
2437	cursor_ += `4`;
2438	break;
2439
2440	case `'8'`:
2441	if (`8` > bytes_left) return ReportIncomplete();
2442	operands->offset = reader_->ReadEightBytes(cursor_);
2443	cursor_ += `8`;
2444	break;
2445
2446	case `'e'`: {
2447	size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len);
2448	if (len > bytes_left \|\| expression_length > bytes_left - len)
2449	return ReportIncomplete();
2450	cursor_ += len;
2451	operands->expression = string (reinterpret_cast<const char*>(cursor_),
2452	expression_length);
2453	cursor_ += expression_length;
2454	break;
2455	}
2456
2457	default:
2458	assert(`0`);
2459	}
2460	}
2461
2462	return true;
2463	}
2464
2465	bool CallFrameInfo::State::DoInstruction() {
2466	CIE* cie = entry_->cie;
2467	Operands ops;
2468
2469	// Our entry's kind should have been set by now.
2470	assert(entry_->kind != kUnknown);
2471
2472	// We shouldn't have been invoked unless there were more
2473	// instructions to parse.
2474	assert(cursor_ < entry_->end);
2475
2476	unsigned opcode = *cursor_++;
2477	if ((opcode & `0xc0`) != `0`) {
2478	switch (opcode & `0xc0`) {
2479	// Advance the address.
2480	case DW_CFA_advance_loc: {
2481	size_t code_offset = opcode & `0x3f`;
2482	address_ += code_offset * cie->code_alignment_factor;
2483	break;
2484	}
2485
2486	// Find a register at an offset from the CFA.
2487	case DW_CFA_offset:
2488	if (!ParseOperands("o", &ops) \|\|
2489	!DoOffset(opcode & `0x3f`, ops.offset * cie->data_alignment_factor))
2490	return false;
2491	break;
2492
2493	// Restore the rule established for a register by the CIE.
2494	case DW_CFA_restore:
2495	if (!DoRestore(opcode & `0x3f`)) return false;
2496	break;
2497
2498	// The 'if' above should have excluded this possibility.
2499	default:
2500	assert(`0`);
2501	}
2502
2503	// Return here, so the big switch below won't be indented.
2504	return true;
2505	}
2506
2507	switch (opcode) {
2508	// Set the address.
2509	case DW_CFA_set_loc:
2510	if (!ParseOperands("a", &ops)) return false;
2511	address_ = ops.offset;
2512	break;
2513
2514	// Advance the address.
2515	case DW_CFA_advance_loc1:
2516	if (!ParseOperands("1", &ops)) return false;
2517	address_ += ops.offset * cie->code_alignment_factor;
2518	break;
2519
2520	// Advance the address.
2521	case DW_CFA_advance_loc2:
2522	if (!ParseOperands("2", &ops)) return false;
2523	address_ += ops.offset * cie->code_alignment_factor;
2524	break;
2525
2526	// Advance the address.
2527	case DW_CFA_advance_loc4:
2528	if (!ParseOperands("4", &ops)) return false;
2529	address_ += ops.offset * cie->code_alignment_factor;
2530	break;
2531
2532	// Advance the address.
2533	case DW_CFA_MIPS_advance_loc8:
2534	if (!ParseOperands("8", &ops)) return false;
2535	address_ += ops.offset * cie->code_alignment_factor;
2536	break;
2537
2538	// Compute the CFA by adding an offset to a register.
2539	case DW_CFA_def_cfa:
2540	if (!ParseOperands("ro", &ops) \|\|
2541	!DoDefCFA(ops.register_number, ops.offset))
2542	return false;
2543	break;
2544
2545	// Compute the CFA by adding an offset to a register.
2546	case DW_CFA_def_cfa_sf:
2547	if (!ParseOperands("rs", &ops) \|\|
2548	!DoDefCFA(ops.register_number,
2549	ops.signed_offset * cie->data_alignment_factor))
2550	return false;
2551	break;
2552
2553	// Change the base register used to compute the CFA.
2554	case DW_CFA_def_cfa_register: {
2555	if (!ParseOperands("r", &ops)) return false;
2556	Rule* cfa_rule = rules_.CFARule();
2557	if (!cfa_rule) {
2558	if (!DoDefCFA(ops.register_number, ops.offset)) {
2559	reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2560	return false;
2561	}
2562	} else {
2563	cfa_rule->SetBaseRegister(ops.register_number);
2564	if (!cfa_rule->Handle(handler_, address_,
2565	Handler::kCFARegister))
2566	return false;
2567	}
2568	break;
2569	}
2570
2571	// Change the offset used to compute the CFA.
2572	case DW_CFA_def_cfa_offset:
2573	if (!ParseOperands("o", &ops) \|\|
2574	!DoDefCFAOffset(ops.offset))
2575	return false;
2576	break;
2577
2578	// Change the offset used to compute the CFA.
2579	case DW_CFA_def_cfa_offset_sf:
2580	if (!ParseOperands("s", &ops) \|\|
2581	!DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor))
2582	return false;
2583	break;
2584
2585	// Specify an expression whose value is the CFA.
2586	case DW_CFA_def_cfa_expression: {
2587	if (!ParseOperands("e", &ops))
2588	return false;
2589	Rule* rule = new ValExpressionRule (ops.expression);
2590	rules_.SetCFARule(rule);
2591	if (!rule->Handle(handler_, address_,
2592	Handler::kCFARegister))
2593	return false;
2594	break;
2595	}
2596
2597	// The register's value cannot be recovered.
2598	case DW_CFA_undefined: {
2599	if (!ParseOperands("r", &ops) \|\|
2600	!DoRule(ops.register_number, new UndefinedRule ()))
2601	return false;
2602	break;
2603	}
2604
2605	// The register's value is unchanged from its value in the caller.
2606	case DW_CFA_same_value: {
2607	if (!ParseOperands("r", &ops) \|\|
2608	!DoRule(ops.register_number, new SameValueRule ()))
2609	return false;
2610	break;
2611	}
2612
2613	// Find a register at an offset from the CFA.
2614	case DW_CFA_offset_extended:
2615	if (!ParseOperands("ro", &ops) \|\|
2616	!DoOffset(ops.register_number,
2617	ops.offset * cie->data_alignment_factor))
2618	return false;
2619	break;
2620
2621	// The register is saved at an offset from the CFA.
2622	case DW_CFA_offset_extended_sf:
2623	if (!ParseOperands("rs", &ops) \|\|
2624	!DoOffset(ops.register_number,
2625	ops.signed_offset * cie->data_alignment_factor))
2626	return false;
2627	break;
2628
2629	// The register is saved at an offset from the CFA.
2630	case DW_CFA_GNU_negative_offset_extended:
2631	if (!ParseOperands("ro", &ops) \|\|
2632	!DoOffset(ops.register_number,
2633	-ops.offset * cie->data_alignment_factor))
2634	return false;
2635	break;
2636
2637	// The register's value is the sum of the CFA plus an offset.
2638	case DW_CFA_val_offset:
2639	if (!ParseOperands("ro", &ops) \|\|
2640	!DoValOffset(ops.register_number,
2641	ops.offset * cie->data_alignment_factor))
2642	return false;
2643	break;
2644
2645	// The register's value is the sum of the CFA plus an offset.
2646	case DW_CFA_val_offset_sf:
2647	if (!ParseOperands("rs", &ops) \|\|
2648	!DoValOffset(ops.register_number,
2649	ops.signed_offset * cie->data_alignment_factor))
2650	return false;
2651	break;
2652
2653	// The register has been saved in another register.
2654	case DW_CFA_register: {
2655	if (!ParseOperands("ro", &ops) \|\|
2656	!DoRule(ops.register_number, new RegisterRule (ops.offset)))
2657	return false;
2658	break;
2659	}
2660
2661	// An expression yields the address at which the register is saved.
2662	case DW_CFA_expression: {
2663	if (!ParseOperands("re", &ops) \|\|
2664	!DoRule(ops.register_number, new ExpressionRule (ops.expression)))
2665	return false;
2666	break;
2667	}
2668
2669	// An expression yields the caller's value for the register.
2670	case DW_CFA_val_expression: {
2671	if (!ParseOperands("re", &ops) \|\|
2672	!DoRule(ops.register_number, new ValExpressionRule (ops.expression)))
2673	return false;
2674	break;
2675	}
2676
2677	// Restore the rule established for a register by the CIE.
2678	case DW_CFA_restore_extended:
2679	if (!ParseOperands("r", &ops) \|\|
2680	!DoRestore( ops.register_number))
2681	return false;
2682	break;
2683
2684	// Save the current set of rules on a stack.
2685	case DW_CFA_remember_state:
2686	saved_rules_.push(rules_);
2687	break;
2688
2689	// Pop the current set of rules off the stack.
2690	case DW_CFA_restore_state: {
2691	if (saved_rules_.empty()) {
2692	reporter_->EmptyStateStack(entry_->offset, entry_->kind,
2693	CursorOffset());
2694	return false;
2695	}
2696	const RuleMap& new_rules = saved_rules_.top();
2697	if (rules_.CFARule() && !new_rules.CFARule()) {
2698	reporter_->ClearingCFARule(entry_->offset, entry_->kind,
2699	CursorOffset());
2700	return false;
2701	}
2702	rules_.HandleTransitionTo(handler_, address_, new_rules);
2703	rules_ = new_rules;
2704	saved_rules_.pop();
2705	break;
2706	}
2707
2708	// No operation. (Padding instruction.)
2709	case DW_CFA_nop:
2710	break;
2711
2712	// A SPARC register window save: Registers 8 through 15 (%o0-%o7)
2713	// are saved in registers 24 through 31 (%i0-%i7), and registers
2714	// 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets
2715	// (0-15 the register size). The register numbers must be*
2716	// hard-coded. A GNU extension, and not a pretty one.
2717	case DW_CFA_GNU_window_save: {
2718	// Save %o0-%o7 in %i0-%i7.
2719	for (int i = `8`; i < `16`; i++)
2720	if (!DoRule(i, new RegisterRule (i + `16`)))
2721	return false;
2722	// Save %l0-%l7 and %i0-%i7 at the CFA.
2723	for (int i = `16`; i < `32`; i++)
2724	// Assume that the byte reader's address size is the same as
2725	// the architecture's register size. !@#%^ hilarious.*
2726	if (!DoRule(i, new OffsetRule (Handler::kCFARegister,
2727	(i - `16`) * reader_->AddressSize())))
2728	return false;
2729	break;
2730	}
2731
2732	// I'm not sure what this is. GDB doesn't use it for unwinding.
2733	case DW_CFA_GNU_args_size:
2734	if (!ParseOperands("o", &ops)) return false;
2735	break;
2736
2737	// An opcode we don't recognize.
2738	default: {
2739	reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset());
2740	return false;
2741	}
2742	}
2743
2744	return true;
2745	}
2746
2747	bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) {
2748	Rule* rule = new ValOffsetRule (base_register, offset);
2749	rules_.SetCFARule(rule);
2750	return rule->Handle(handler_, address_,
2751	Handler::kCFARegister);
2752	}
2753
2754	bool CallFrameInfo::State::DoDefCFAOffset(long offset) {
2755	Rule* cfa_rule = rules_.CFARule();
2756	if (!cfa_rule) {
2757	reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2758	return false;
2759	}
2760	cfa_rule->SetOffset(offset);
2761	return cfa_rule->Handle(handler_, address_,
2762	Handler::kCFARegister);
2763	}
2764
2765	bool CallFrameInfo::State::DoRule(unsigned reg, Rule* rule) {
2766	rules_.SetRegisterRule(reg, rule);
2767	return rule->Handle(handler_, address_, reg);
2768	}
2769
2770	bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) {
2771	if (!rules_.CFARule()) {
2772	reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2773	return false;
2774	}
2775	return DoRule(reg,
2776	new OffsetRule (Handler::kCFARegister, offset));
2777	}
2778
2779	bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) {
2780	if (!rules_.CFARule()) {
2781	reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2782	return false;
2783	}
2784	return DoRule(reg,
2785	new ValOffsetRule (Handler::kCFARegister, offset));
2786	}
2787
2788	bool CallFrameInfo::State::DoRestore(unsigned reg) {
2789	// DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE.
2790	if (entry_->kind == kCIE) {
2791	reporter_->RestoreInCIE(entry_->offset, CursorOffset());
2792	return false;
2793	}
2794	Rule* rule = cie_rules_.RegisterRule(reg);
2795	if (!rule) {
2796	// This isn't really the right thing to do, but since CFI generally
2797	// only mentions callee-saves registers, and GCC's convention for
2798	// callee-saves registers is that they are unchanged, it's a good
2799	// approximation.
2800	rule = new SameValueRule ();
2801	}
2802	return DoRule(reg, rule);
2803	}
2804
2805	bool CallFrameInfo::ReadEntryPrologue(const uint8_t* cursor, Entry* entry) {
2806	const uint8_t* buffer_end = buffer_ + buffer_length_;
2807
2808	// Initialize enough of ENTRY for use in error reporting.
2809	entry->offset = cursor - buffer_;
2810	entry->start = cursor;
2811	entry->kind = kUnknown;
2812	entry->end = NULL;
2813
2814	// Read the initial length. This sets reader_'s offset size.
2815	size_t length_size;
2816	uint64_t length = reader_->ReadInitialLength(cursor, &length_size);
2817	if (length_size > size_t(buffer_end - cursor))
2818	return ReportIncomplete(entry);
2819	cursor += length_size;
2820
2821	// In a .eh_frame section, a length of zero marks the end of the series
2822	// of entries.
2823	if (length == `0` && eh_frame_) {
2824	entry->kind = kTerminator;
2825	entry->end = cursor;
2826	return true;
2827	}
2828
2829	// Validate the length.
2830	if (length > size_t(buffer_end - cursor))
2831	return ReportIncomplete(entry);
2832
2833	// The length is the number of bytes after the initial length field;
2834	// we have that position handy at this point, so compute the end
2835	// now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine,
2836	// and the length didn't fit in a size_t, we would have rejected it
2837	// above.)
2838	entry->end = cursor + length;
2839
2840	// Parse the next field: either the offset of a CIE or a CIE id.
2841	size_t offset_size = reader_->OffsetSize();
2842	if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry);
2843	entry->id = reader_->ReadOffset(cursor);
2844
2845	// Don't advance cursor past id field yet; in .eh_frame data we need
2846	// the id's position to compute the section offset of an FDE's CIE.
2847
2848	// Now we can decide what kind of entry this is.
2849	if (eh_frame_) {
2850	// In .eh_frame data, an ID of zero marks the entry as a CIE, and
2851	// anything else is an offset from the id field of the FDE to the start
2852	// of the CIE.
2853	if (entry->id == `0`) {
2854	entry->kind = kCIE;
2855	} else {
2856	entry->kind = kFDE;
2857	// Turn the offset from the id into an offset from the buffer's start.
2858	entry->id = (cursor - buffer_) - entry->id;
2859	}
2860	} else {
2861	// In DWARF CFI data, an ID of ~0 (of the appropriate width, given the
2862	// offset size for the entry) marks the entry as a CIE, and anything
2863	// else is the offset of the CIE from the beginning of the section.
2864	if (offset_size == `4`)
2865	entry->kind = (entry->id == `0xffffffff`) ? kCIE : kFDE;
2866	else {
2867	assert(offset_size == `8`);
2868	entry->kind = (entry->id == `0xffffffffffffffffULL`) ? kCIE : kFDE;
2869	}
2870	}
2871
2872	// Now advance cursor past the id.
2873	cursor += offset_size;
2874
2875	// The fields specific to this kind of entry start here.
2876	entry->fields = cursor;
2877
2878	entry->cie = NULL;
2879
2880	return true;
2881	}
2882
2883	bool CallFrameInfo::ReadCIEFields(CIE* cie) {
2884	const uint8_t* cursor = cie->fields;
2885	size_t len;
2886
2887	assert(cie->kind == kCIE);
2888
2889	// Prepare for early exit.
2890	cie->version = `0`;
2891	cie->augmentation.clear();
2892	cie->code_alignment_factor = `0`;
2893	cie->data_alignment_factor = `0`;
2894	cie->return_address_register = `0`;
2895	cie->has_z_augmentation = false;
2896	cie->pointer_encoding = DW_EH_PE_absptr;
2897	cie->instructions = `0`;
2898
2899	// Parse the version number.
2900	if (cie->end - cursor < `1`)
2901	return ReportIncomplete(cie);
2902	cie->version = reader_->ReadOneByte(cursor);
2903	cursor++;
2904
2905	// If we don't recognize the version, we can't parse any more fields of the
2906	// CIE. For DWARF CFI, we handle versions 1 through 4 (there was never a
2907	// version 2 of CFI data). For .eh_frame, we handle versions 1 and 4 as well;
2908	// the difference between those versions seems to be the same as for
2909	// .debug_frame.
2910	if (cie->version < `1` \|\| cie->version > `4`) {
2911	reporter_->UnrecognizedVersion(cie->offset, cie->version);
2912	return false;
2913	}
2914
2915	const uint8_t* augmentation_start = cursor;
2916	const uint8_t* augmentation_end =
2917	reinterpret_cast<const uint8_t*>(memchr(augmentation_start, `'\0'`,
2918	cie->end - augmentation_start));
2919	if (! augmentation_end) return ReportIncomplete(cie);
2920	cursor = augmentation_end;
2921	cie->augmentation = string (reinterpret_cast<const char*>(augmentation_start),
2922	cursor - augmentation_start);
2923	// Skip the terminating '\0'.
2924	cursor++;
2925
2926	// Is this CFI augmented?
2927	if (!cie->augmentation.empty()) {
2928	// Is it an augmentation we recognize?
2929	if (cie->augmentation [`0`] == DW_Z_augmentation_start) {
2930	// Linux C++ ABI 'z' augmentation, used for exception handling data.
2931	cie->has_z_augmentation = true;
2932	} else {
2933	// Not an augmentation we recognize. Augmentations can have arbitrary
2934	// effects on the form of rest of the content, so we have to give up.
2935	reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
2936	return false;
2937	}
2938	}
2939
2940	if (cie->version >= `4`) {
2941	cie->address_size = *cursor++;
2942	if (cie->address_size != `8` && cie->address_size != `4`) {
2943	reporter_->UnexpectedAddressSize(cie->offset, cie->address_size);
2944	return false;
2945	}
2946
2947	cie->segment_size = *cursor++;
2948	if (cie->segment_size != `0`) {
2949	reporter_->UnexpectedSegmentSize(cie->offset, cie->segment_size);
2950	return false;
2951	}
2952	}
2953
2954	// Parse the code alignment factor.
2955	cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len);
2956	if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
2957	cursor += len;
2958
2959	// Parse the data alignment factor.
2960	cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len);
2961	if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
2962	cursor += len;
2963
2964	// Parse the return address register. This is a ubyte in version 1, and
2965	// a ULEB128 in version 3.
2966	if (cie->version == `1`) {
2967	if (cursor >= cie->end) return ReportIncomplete(cie);
2968	cie->return_address_register = uint8_t(*cursor++);
2969	} else {
2970	cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len);
2971	if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
2972	cursor += len;
2973	}
2974
2975	// If we have a 'z' augmentation string, find the augmentation data and
2976	// use the augmentation string to parse it.
2977	if (cie->has_z_augmentation) {
2978	uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len);
2979	if (size_t(cie->end - cursor) < len + data_size)
2980	return ReportIncomplete(cie);
2981	cursor += len;
2982	const uint8_t* data = cursor;
2983	cursor += data_size;
2984	const uint8_t* data_end = cursor;
2985
2986	cie->has_z_lsda = false;
2987	cie->has_z_personality = false;
2988	cie->has_z_signal_frame = false;
2989
2990	// Walk the augmentation string, and extract values from the
2991	// augmentation data as the string directs.
2992	for (size_t i = `1`; i < cie->augmentation.size(); i++) {
2993	switch (cie->augmentation [i]) {
2994	case DW_Z_has_LSDA:
2995	// The CIE's augmentation data holds the language-specific data
2996	// area pointer's encoding, and the FDE's augmentation data holds
2997	// the pointer itself.
2998	cie->has_z_lsda = true;
2999	// Fetch the LSDA encoding from the augmentation data.
3000	if (data >= data_end) return ReportIncomplete(cie);
3001	cie->lsda_encoding = DwarfPointerEncoding(*data++);
3002	if (!reader_->ValidEncoding(cie->lsda_encoding)) {
3003	reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding);
3004	return false;
3005	}
3006	// Don't check if the encoding is usable here --- we haven't
3007	// read the FDE's fields yet, so we're not prepared for
3008	// DW_EH_PE_funcrel, although that's a fine encoding for the
3009	// LSDA to use, since it appears in the FDE.
3010	break;
3011
3012	case DW_Z_has_personality_routine:
3013	// The CIE's augmentation data holds the personality routine
3014	// pointer's encoding, followed by the pointer itself.
3015	cie->has_z_personality = true;
3016	// Fetch the personality routine pointer's encoding from the
3017	// augmentation data.
3018	if (data >= data_end) return ReportIncomplete(cie);
3019	cie->personality_encoding = DwarfPointerEncoding(*data++);
3020	if (!reader_->ValidEncoding(cie->personality_encoding)) {
3021	reporter_->InvalidPointerEncoding(cie->offset,
3022	cie->personality_encoding);
3023	return false;
3024	}
3025	if (!reader_->UsableEncoding(cie->personality_encoding)) {
3026	reporter_->UnusablePointerEncoding(cie->offset,
3027	cie->personality_encoding);
3028	return false;
3029	}
3030	// Fetch the personality routine's pointer itself from the data.
3031	cie->personality_address =
3032	reader_->ReadEncodedPointer(data, cie->personality_encoding,
3033	&len);
3034	if (len > size_t(data_end - data))
3035	return ReportIncomplete(cie);
3036	data += len;
3037	break;
3038
3039	case DW_Z_has_FDE_address_encoding:
3040	// The CIE's augmentation data holds the pointer encoding to use
3041	// for addresses in the FDE.
3042	if (data >= data_end) return ReportIncomplete(cie);
3043	cie->pointer_encoding = DwarfPointerEncoding(*data++);
3044	if (!reader_->ValidEncoding(cie->pointer_encoding)) {
3045	reporter_->InvalidPointerEncoding(cie->offset,
3046	cie->pointer_encoding);
3047	return false;
3048	}
3049	if (!reader_->UsableEncoding(cie->pointer_encoding)) {
3050	reporter_->UnusablePointerEncoding(cie->offset,
3051	cie->pointer_encoding);
3052	return false;
3053	}
3054	break;
3055
3056	case DW_Z_is_signal_trampoline:
3057	// Frames using this CIE are signal delivery frames.
3058	cie->has_z_signal_frame = true;
3059	break;
3060
3061	default:
3062	// An augmentation we don't recognize.
3063	reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
3064	return false;
3065	}
3066	}
3067	}
3068
3069	// The CIE's instructions start here.
3070	cie->instructions = cursor;
3071
3072	return true;
3073	}
3074
3075	bool CallFrameInfo::ReadFDEFields(FDE* fde) {
3076	const uint8_t* cursor = fde->fields;
3077	size_t size;
3078
3079	fde->address = reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding,
3080	&size);
3081	if (size > size_t(fde->end - cursor))
3082	return ReportIncomplete(fde);
3083	cursor += size;
3084	reader_->SetFunctionBase(fde->address);
3085
3086	// For the length, we strip off the upper nybble of the encoding used for
3087	// the starting address.
3088	DwarfPointerEncoding length_encoding =
3089	DwarfPointerEncoding(fde->cie->pointer_encoding & `0x0f`);
3090	fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size);
3091	if (size > size_t(fde->end - cursor))
3092	return ReportIncomplete(fde);
3093	cursor += size;
3094
3095	// If the CIE has a 'z' augmentation string, then augmentation data
3096	// appears here.
3097	if (fde->cie->has_z_augmentation) {
3098	uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size);
3099	if (size_t(fde->end - cursor) < size + data_size)
3100	return ReportIncomplete(fde);
3101	cursor += size;
3102
3103	// In the abstract, we should walk the augmentation string, and extract
3104	// items from the FDE's augmentation data as we encounter augmentation
3105	// string characters that specify their presence: the ordering of items
3106	// in the augmentation string determines the arrangement of values in
3107	// the augmentation data.
3108	//
3109	// In practice, there's only ever one value in FDE augmentation data
3110	// that we support --- the LSDA pointer --- and we have to bail if we
3111	// see any unrecognized augmentation string characters. So if there is
3112	// anything here at all, we know what it is, and where it starts.
3113	if (fde->cie->has_z_lsda) {
3114	// Check whether the LSDA's pointer encoding is usable now: only once
3115	// we've parsed the FDE's starting address do we call reader_->
3116	// SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes
3117	// usable.
3118	if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) {
3119	reporter_->UnusablePointerEncoding(fde->cie->offset,
3120	fde->cie->lsda_encoding);
3121	return false;
3122	}
3123
3124	fde->lsda_address =
3125	reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size);
3126	if (size > data_size)
3127	return ReportIncomplete(fde);
3128	// Ideally, we would also complain here if there were unconsumed
3129	// augmentation data.
3130	}
3131
3132	cursor += data_size;
3133	}
3134
3135	// The FDE's instructions start after those.
3136	fde->instructions = cursor;
3137
3138	return true;
3139	}
3140
3141	bool CallFrameInfo::Start() {
3142	const uint8_t* buffer_end = buffer_ + buffer_length_;
3143	const uint8_t* cursor;
3144	bool all_ok = true;
3145	const uint8_t* entry_end;
3146	bool ok;
3147
3148	// Traverse all the entries in buffer_, skipping CIEs and offering
3149	// FDEs to the handler.
3150	for (cursor = buffer_; cursor < buffer_end;
3151	cursor = entry_end, all_ok = all_ok && ok) {
3152	FDE fde;
3153
3154	// Make it easy to skip this entry with 'continue': assume that
3155	// things are not okay until we've checked all the data, and
3156	// prepare the address of the next entry.
3157	ok = false;
3158
3159	// Read the entry's prologue.
3160	if (!ReadEntryPrologue(cursor, &fde)) {
3161	if (!fde.end) {
3162	// If we couldn't even figure out this entry's extent, then we
3163	// must stop processing entries altogether.
3164	all_ok = false;
3165	break;
3166	}
3167	entry_end = fde.end;
3168	continue;
3169	}
3170
3171	// The next iteration picks up after this entry.
3172	entry_end = fde.end;
3173
3174	// Did we see an .eh_frame terminating mark?
3175	if (fde.kind == kTerminator) {
3176	// If there appears to be more data left in the section after the
3177	// terminating mark, warn the user. But this is just a warning;
3178	// we leave all_ok true.
3179	if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset);
3180	break;
3181	}
3182
3183	// In this loop, we skip CIEs. We only parse them fully when we
3184	// parse an FDE that refers to them. This limits our memory
3185	// consumption (beyond the buffer itself) to that needed to
3186	// process the largest single entry.
3187	if (fde.kind != kFDE) {
3188	ok = true;
3189	continue;
3190	}
3191
3192	// Validate the CIE pointer.
3193	if (fde.id > buffer_length_) {
3194	reporter_->CIEPointerOutOfRange(fde.offset, fde.id);
3195	continue;
3196	}
3197
3198	CIE cie;
3199
3200	// Parse this FDE's CIE header.
3201	if (!ReadEntryPrologue(buffer_ + fde.id, &cie))
3202	continue;
3203	// This had better be an actual CIE.
3204	if (cie.kind != kCIE) {
3205	reporter_->BadCIEId(fde.offset, fde.id);
3206	continue;
3207	}
3208	if (!ReadCIEFields(&cie))
3209	continue;
3210
3211	// TODO(nbilling): This could lead to strange behavior if a single buffer
3212	// contained a mixture of DWARF versions as well as address sizes. Not
3213	// sure if it's worth handling such a case.
3214
3215	// DWARF4 CIE specifies address_size, so use it for this call frame.
3216	if (cie.version >= `4`) {
3217	reader_->SetAddressSize(cie.address_size);
3218	}
3219
3220	// We now have the values that govern both the CIE and the FDE.
3221	cie.cie = &cie;
3222	fde.cie = &cie;
3223
3224	// Parse the FDE's header.
3225	if (!ReadFDEFields(&fde))
3226	continue;
3227
3228	// Call Entry to ask the consumer if they're interested.
3229	if (!handler_->Entry(fde.offset, fde.address, fde.size,
3230	cie.version, cie.augmentation,
3231	cie.return_address_register)) {
3232	// The handler isn't interested in this entry. That's not an error.
3233	ok = true;
3234	continue;
3235	}
3236
3237	if (cie.has_z_augmentation) {
3238	// Report the personality routine address, if we have one.
3239	if (cie.has_z_personality) {
3240	if (!handler_
3241	->PersonalityRoutine(cie.personality_address,
3242	IsIndirectEncoding(cie.personality_encoding)))
3243	continue;
3244	}
3245
3246	// Report the language-specific data area address, if we have one.
3247	if (cie.has_z_lsda) {
3248	if (!handler_
3249	->LanguageSpecificDataArea(fde.lsda_address,
3250	IsIndirectEncoding(cie.lsda_encoding)))
3251	continue;
3252	}
3253
3254	// If this is a signal-handling frame, report that.
3255	if (cie.has_z_signal_frame) {
3256	if (!handler_->SignalHandler())
3257	continue;
3258	}
3259	}
3260
3261	// Interpret the CIE's instructions, and then the FDE's instructions.
3262	State state(reader_, handler_, reporter_, fde.address);
3263	ok = state.InterpretCIE(cie) && state.InterpretFDE(fde);
3264
3265	// Tell the ByteReader that the function start address from the
3266	// FDE header is no longer valid.
3267	reader_->ClearFunctionBase();
3268
3269	// Report the end of the entry.
3270	handler_->End();
3271	}
3272
3273	return all_ok;
3274	}
3275
3276	const char* CallFrameInfo::KindName(EntryKind kind) {
3277	if (kind == CallFrameInfo::kUnknown)
3278	return "entry";
3279	else if (kind == CallFrameInfo::kCIE)
3280	return "common information entry";
3281	else if (kind == CallFrameInfo::kFDE)
3282	return "frame description entry";
3283	else {
3284	assert (kind == CallFrameInfo::kTerminator);
3285	return ".eh_frame sequence terminator";
3286	}
3287	}
3288
3289	bool CallFrameInfo::ReportIncomplete(Entry* entry) {
3290	reporter_->Incomplete(entry->offset, entry->kind);
3291	return false;
3292	}
3293
3294	void CallFrameInfo::Reporter::Incomplete(uint64_t offset,
3295	CallFrameInfo::EntryKind kind) {
3296	fprintf(stderr,
3297	"%s: CFI %s at offset 0x%" PRIx64 " in '%s': entry ends early\n",
3298	filename_.c_str(), CallFrameInfo::KindName(kind), offset,
3299	section_.c_str());
3300	}
3301
3302	void CallFrameInfo::Reporter::EarlyEHTerminator(uint64_t offset) {
3303	fprintf(stderr,
3304	"%s: CFI at offset 0x%" PRIx64 " in '%s': saw end-of-data marker"
3305	" before end of section contents\n",
3306	filename_.c_str(), offset, section_.c_str());
3307	}
3308
3309	void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64_t offset,
3310	uint64_t cie_offset) {
3311	fprintf(stderr,
3312	"%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
3313	" CIE pointer is out of range: 0x%" PRIx64 "\n",
3314	filename_.c_str(), offset, section_.c_str(), cie_offset);
3315	}
3316
3317	void CallFrameInfo::Reporter::BadCIEId(uint64_t offset, uint64_t cie_offset) {
3318	fprintf(stderr,
3319	"%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
3320	" CIE pointer does not point to a CIE: 0x%" PRIx64 "\n",
3321	filename_.c_str(), offset, section_.c_str(), cie_offset);
3322	}
3323
3324	void CallFrameInfo::Reporter::UnexpectedAddressSize(uint64_t offset,
3325	uint8_t address_size) {
3326	fprintf(stderr,
3327	"%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
3328	" CIE specifies unexpected address size: %d\n",
3329	filename_.c_str(), offset, section_.c_str(), address_size);
3330	}
3331
3332	void CallFrameInfo::Reporter::UnexpectedSegmentSize(uint64_t offset,
3333	uint8_t segment_size) {
3334	fprintf(stderr,
3335	"%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
3336	" CIE specifies unexpected segment size: %d\n",
3337	filename_.c_str(), offset, section_.c_str(), segment_size);
3338	}
3339
3340	void CallFrameInfo::Reporter::UnrecognizedVersion(uint64_t offset, int version) {
3341	fprintf(stderr,
3342	"%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
3343	" CIE specifies unrecognized version: %d\n",
3344	filename_.c_str(), offset, section_.c_str(), version);
3345	}
3346
3347	void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64_t offset,
3348	const string& aug) {
3349	fprintf(stderr,
3350	"%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
3351	" CIE specifies unrecognized augmentation: '%s'\n",
3352	filename_.c_str(), offset, section_.c_str(), aug.c_str());
3353	}
3354
3355	void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64_t offset,
3356	uint8_t encoding) {
3357	fprintf(stderr,
3358	"%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':"
3359	" 'z' augmentation specifies invalid pointer encoding: 0x%02x\n",
3360	filename_.c_str(), offset, section_.c_str(), encoding);
3361	}
3362
3363	void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64_t offset,
3364	uint8_t encoding) {
3365	fprintf(stderr,
3366	"%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':"
3367	" 'z' augmentation specifies a pointer encoding for which"
3368	" we have no base address: 0x%02x\n",
3369	filename_.c_str(), offset, section_.c_str(), encoding);
3370	}
3371
3372	void CallFrameInfo::Reporter::RestoreInCIE(uint64_t offset, uint64_t insn_offset) {
3373	fprintf(stderr,
3374	"%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':"
3375	" the DW_CFA_restore instruction at offset 0x%" PRIx64
3376	" cannot be used in a common information entry\n",
3377	filename_.c_str(), offset, section_.c_str(), insn_offset);
3378	}
3379
3380	void CallFrameInfo::Reporter::BadInstruction(uint64_t offset,
3381	CallFrameInfo::EntryKind kind,
3382	uint64_t insn_offset) {
3383	fprintf(stderr,
3384	"%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
3385	" the instruction at offset 0x%" PRIx64 " is unrecognized\n",
3386	filename_.c_str(), CallFrameInfo::KindName(kind),
3387	offset, section_.c_str(), insn_offset);
3388	}
3389
3390	void CallFrameInfo::Reporter::NoCFARule(uint64_t offset,
3391	CallFrameInfo::EntryKind kind,
3392	uint64_t insn_offset) {
3393	fprintf(stderr,
3394	"%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
3395	" the instruction at offset 0x%" PRIx64 " assumes that a CFA rule has"
3396	" been set, but none has been set\n",
3397	filename_.c_str(), CallFrameInfo::KindName(kind), offset,
3398	section_.c_str(), insn_offset);
3399	}
3400
3401	void CallFrameInfo::Reporter::EmptyStateStack(uint64_t offset,
3402	CallFrameInfo::EntryKind kind,
3403	uint64_t insn_offset) {
3404	fprintf(stderr,
3405	"%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
3406	" the DW_CFA_restore_state instruction at offset 0x%" PRIx64
3407	" should pop a saved state from the stack, but the stack is empty\n",
3408	filename_.c_str(), CallFrameInfo::KindName(kind), offset,
3409	section_.c_str(), insn_offset);
3410	}
3411
3412	void CallFrameInfo::Reporter::ClearingCFARule(uint64_t offset,
3413	CallFrameInfo::EntryKind kind,
3414	uint64_t insn_offset) {
3415	fprintf(stderr,
3416	"%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
3417	" the DW_CFA_restore_state instruction at offset 0x%" PRIx64
3418	" would clear the CFA rule in effect\n",
3419	filename_.c_str(), CallFrameInfo::KindName(kind), offset,
3420	section_.c_str(), insn_offset);
3421	}
3422
3423	} // namespace google_breakpad
3424

Browse the source code of breakpad/common/dwarf/dwarf2reader.cc