1// Copyright (c) 2010 Google Inc. All Rights Reserved.
2//
3// Redistribution and use in source and binary forms, with or without
4// modification, are permitted provided that the following conditions are
5// met:
6//
7// * Redistributions of source code must retain the above copyright
8// notice, this list of conditions and the following disclaimer.
9// * Redistributions in binary form must reproduce the above
10// copyright notice, this list of conditions and the following disclaimer
11// in the documentation and/or other materials provided with the
12// distribution.
13// * Neither the name of Google Inc. nor the names of its
14// contributors may be used to endorse or promote products derived from
15// this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29// CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
30
31// Implementation of LineInfo, CompilationUnit,
32// and CallFrameInfo. See dwarf2reader.h for details.
33
34#include "common/dwarf/dwarf2reader.h"
35
36#include <stdint.h>
37#include <stdio.h>
38#include <string.h>
39
40#include <map>
41#include <memory>
42#include <stack>
43#include <string>
44#include <utility>
45
46#include <sys/stat.h>
47
48#include "common/dwarf/bytereader-inl.h"
49#include "common/dwarf/bytereader.h"
50#include "common/dwarf/line_state_machine.h"
51#include "common/using_std_string.h"
52#include "google_breakpad/common/breakpad_types.h"
53
54namespace google_breakpad {
55
56const SectionMap::const_iterator GetSectionByName(const SectionMap&
57 sections, const char *name) {
58 assert(name[0] == '.');
59 auto iter = sections.find(name);
60 if (iter != sections.end())
61 return iter;
62 std::string macho_name("__");
63 macho_name += name + 1;
64 iter = sections.find(macho_name);
65 return iter;
66}
67
68CompilationUnit::CompilationUnit(const string& path,
69 const SectionMap& sections, uint64_t offset,
70 ByteReader* reader, Dwarf2Handler* handler)
71 : path_(path), offset_from_section_start_(offset), reader_(reader),
72 sections_(sections), handler_(handler), abbrevs_(),
73 string_buffer_(NULL), string_buffer_length_(0),
74 line_string_buffer_(NULL), line_string_buffer_length_(0),
75 str_offsets_buffer_(NULL), str_offsets_buffer_length_(0),
76 addr_buffer_(NULL), addr_buffer_length_(0),
77 is_split_dwarf_(false), is_type_unit_(false), dwo_id_(0), dwo_name_(),
78 skeleton_dwo_id_(0), ranges_base_(0), addr_base_(0),
79 str_offsets_base_(0), have_checked_for_dwp_(false), dwp_path_(),
80 dwp_byte_reader_(), dwp_reader_() {}
81
82// Initialize a compilation unit from a .dwo or .dwp file.
83// In this case, we need the .debug_addr section from the
84// executable file that contains the corresponding skeleton
85// compilation unit. We also inherit the Dwarf2Handler from
86// the executable file, and call it as if we were still
87// processing the original compilation unit.
88
89void CompilationUnit::SetSplitDwarf(const uint8_t* addr_buffer,
90 uint64_t addr_buffer_length,
91 uint64_t addr_base,
92 uint64_t ranges_base,
93 uint64_t dwo_id) {
94 is_split_dwarf_ = true;
95 addr_buffer_ = addr_buffer;
96 addr_buffer_length_ = addr_buffer_length;
97 addr_base_ = addr_base;
98 ranges_base_ = ranges_base;
99 skeleton_dwo_id_ = dwo_id;
100}
101
102// Read a DWARF2/3 abbreviation section.
103// Each abbrev consists of a abbreviation number, a tag, a byte
104// specifying whether the tag has children, and a list of
105// attribute/form pairs.
106// The list of forms is terminated by a 0 for the attribute, and a
107// zero for the form. The entire abbreviation section is terminated
108// by a zero for the code.
109
110void CompilationUnit::ReadAbbrevs() {
111 if (abbrevs_)
112 return;
113
114 // First get the debug_abbrev section.
115 SectionMap::const_iterator iter =
116 GetSectionByName(sections_, ".debug_abbrev");
117 assert(iter != sections_.end());
118
119 abbrevs_ = new std::vector<Abbrev>;
120 abbrevs_->resize(1);
121
122 // The only way to check whether we are reading over the end of the
123 // buffer would be to first compute the size of the leb128 data by
124 // reading it, then go back and read it again.
125 const uint8_t* abbrev_start = iter->second.first +
126 header_.abbrev_offset;
127 const uint8_t* abbrevptr = abbrev_start;
128#ifndef NDEBUG
129 const uint64_t abbrev_length = iter->second.second - header_.abbrev_offset;
130#endif
131
132 while (1) {
133 CompilationUnit::Abbrev abbrev;
134 size_t len;
135 const uint64_t number = reader_->ReadUnsignedLEB128(abbrevptr, &len);
136
137 if (number == 0)
138 break;
139 abbrev.number = number;
140 abbrevptr += len;
141
142 assert(abbrevptr < abbrev_start + abbrev_length);
143 const uint64_t tag = reader_->ReadUnsignedLEB128(abbrevptr, &len);
144 abbrevptr += len;
145 abbrev.tag = static_cast<enum DwarfTag>(tag);
146
147 assert(abbrevptr < abbrev_start + abbrev_length);
148 abbrev.has_children = reader_->ReadOneByte(abbrevptr);
149 abbrevptr += 1;
150
151 assert(abbrevptr < abbrev_start + abbrev_length);
152
153 while (1) {
154 const uint64_t nametemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
155 abbrevptr += len;
156
157 assert(abbrevptr < abbrev_start + abbrev_length);
158 const uint64_t formtemp = reader_->ReadUnsignedLEB128(abbrevptr, &len);
159 abbrevptr += len;
160 if (nametemp == 0 && formtemp == 0)
161 break;
162
163 uint64_t value = 0;
164 if (formtemp == DW_FORM_implicit_const) {
165 value = reader_->ReadUnsignedLEB128(abbrevptr, &len);
166 abbrevptr += len;
167 }
168 AttrForm abbrev_attr(static_cast<enum DwarfAttribute>(nametemp),
169 static_cast<enum DwarfForm>(formtemp),
170 value);
171 abbrev.attributes.push_back(abbrev_attr);
172 }
173 assert(abbrev.number == abbrevs_->size());
174 abbrevs_->push_back(abbrev);
175 }
176}
177
178// Skips a single DIE's attributes.
179const uint8_t* CompilationUnit::SkipDIE(const uint8_t* start,
180 const Abbrev& abbrev) {
181 for (AttributeList::const_iterator i = abbrev.attributes.begin();
182 i != abbrev.attributes.end();
183 i++) {
184 start = SkipAttribute(start, i->form_);
185 }
186 return start;
187}
188
189// Skips a single attribute form's data.
190const uint8_t* CompilationUnit::SkipAttribute(const uint8_t* start,
191 enum DwarfForm form) {
192 size_t len;
193
194 switch (form) {
195 case DW_FORM_indirect:
196 form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
197 &len));
198 start += len;
199 return SkipAttribute(start, form);
200
201 case DW_FORM_flag_present:
202 case DW_FORM_implicit_const:
203 return start;
204 case DW_FORM_addrx1:
205 case DW_FORM_data1:
206 case DW_FORM_flag:
207 case DW_FORM_ref1:
208 case DW_FORM_strx1:
209 return start + 1;
210 case DW_FORM_addrx2:
211 case DW_FORM_ref2:
212 case DW_FORM_data2:
213 case DW_FORM_strx2:
214 return start + 2;
215 case DW_FORM_addrx3:
216 case DW_FORM_strx3:
217 return start + 3;
218 case DW_FORM_addrx4:
219 case DW_FORM_ref4:
220 case DW_FORM_data4:
221 case DW_FORM_strx4:
222 case DW_FORM_ref_sup4:
223 return start + 4;
224 case DW_FORM_ref8:
225 case DW_FORM_data8:
226 case DW_FORM_ref_sig8:
227 case DW_FORM_ref_sup8:
228 return start + 8;
229 case DW_FORM_data16:
230 return start + 16;
231 case DW_FORM_string:
232 return start + strlen(reinterpret_cast<const char*>(start)) + 1;
233 case DW_FORM_udata:
234 case DW_FORM_ref_udata:
235 case DW_FORM_strx:
236 case DW_FORM_GNU_str_index:
237 case DW_FORM_GNU_addr_index:
238 case DW_FORM_addrx:
239 case DW_FORM_rnglistx:
240 case DW_FORM_loclistx:
241 reader_->ReadUnsignedLEB128(start, &len);
242 return start + len;
243
244 case DW_FORM_sdata:
245 reader_->ReadSignedLEB128(start, &len);
246 return start + len;
247 case DW_FORM_addr:
248 return start + reader_->AddressSize();
249 case DW_FORM_ref_addr:
250 // DWARF2 and 3/4 differ on whether ref_addr is address size or
251 // offset size.
252 assert(header_.version >= 2);
253 if (header_.version == 2) {
254 return start + reader_->AddressSize();
255 } else if (header_.version >= 3) {
256 return start + reader_->OffsetSize();
257 }
258 break;
259
260 case DW_FORM_block1:
261 return start + 1 + reader_->ReadOneByte(start);
262 case DW_FORM_block2:
263 return start + 2 + reader_->ReadTwoBytes(start);
264 case DW_FORM_block4:
265 return start + 4 + reader_->ReadFourBytes(start);
266 case DW_FORM_block:
267 case DW_FORM_exprloc: {
268 uint64_t size = reader_->ReadUnsignedLEB128(start, &len);
269 return start + size + len;
270 }
271 case DW_FORM_strp:
272 case DW_FORM_line_strp:
273 case DW_FORM_strp_sup:
274 case DW_FORM_sec_offset:
275 return start + reader_->OffsetSize();
276 }
277 fprintf(stderr,"Unhandled form type");
278 return NULL;
279}
280
281// Read the abbreviation offset from a compilation unit header.
282size_t CompilationUnit::ReadAbbrevOffset(const uint8_t* headerptr) {
283 assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
284 header_.abbrev_offset = reader_->ReadOffset(headerptr);
285 return reader_->OffsetSize();
286}
287
288// Read the address size from a compilation unit header.
289size_t CompilationUnit::ReadAddressSize(const uint8_t* headerptr) {
290 // Compare against less than or equal because this may be the last
291 // section in the file.
292 assert(headerptr + 1 <= buffer_ + buffer_length_);
293 header_.address_size = reader_->ReadOneByte(headerptr);
294 reader_->SetAddressSize(header_.address_size);
295 return 1;
296}
297
298// Read the DWO id from a split or skeleton compilation unit header.
299size_t CompilationUnit::ReadDwoId(const uint8_t* headerptr) {
300 assert(headerptr + 8 <= buffer_ + buffer_length_);
301 dwo_id_ = reader_->ReadEightBytes(headerptr);
302 return 8;
303}
304
305// Read the type signature from a type or split type compilation unit header.
306size_t CompilationUnit::ReadTypeSignature(const uint8_t* headerptr) {
307 assert(headerptr + 8 <= buffer_ + buffer_length_);
308 type_signature_ = reader_->ReadEightBytes(headerptr);
309 return 8;
310}
311
312// Read the DWO id from a split or skeleton compilation unit header.
313size_t CompilationUnit::ReadTypeOffset(const uint8_t* headerptr) {
314 assert(headerptr + reader_->OffsetSize() < buffer_ + buffer_length_);
315 type_offset_ = reader_->ReadOffset(headerptr);
316 return reader_->OffsetSize();
317}
318
319
320// Read a DWARF header. The header is variable length in DWARF3 and DWARF4
321// (and DWARF2 as extended by most compilers), and consists of an length
322// field, a version number, the offset in the .debug_abbrev section for our
323// abbrevs, and an address size. DWARF5 adds a unit_type to distinguish
324// between partial-, full-, skeleton-, split-, and type- compilation units.
325void CompilationUnit::ReadHeader() {
326 const uint8_t* headerptr = buffer_;
327 size_t initial_length_size;
328
329 assert(headerptr + 4 < buffer_ + buffer_length_);
330 const uint64_t initial_length
331 = reader_->ReadInitialLength(headerptr, &initial_length_size);
332 headerptr += initial_length_size;
333 header_.length = initial_length;
334
335 assert(headerptr + 2 < buffer_ + buffer_length_);
336 header_.version = reader_->ReadTwoBytes(headerptr);
337 headerptr += 2;
338
339 if (header_.version <= 4) {
340 // Older versions of dwarf have a relatively simple structure.
341 headerptr += ReadAbbrevOffset(headerptr);
342 headerptr += ReadAddressSize(headerptr);
343 } else {
344 // DWARF5 adds a unit_type field, and various fields based on unit_type.
345 assert(headerptr + 1 < buffer_ + buffer_length_);
346 uint8_t unit_type = reader_->ReadOneByte(headerptr);
347 headerptr += 1;
348 headerptr += ReadAddressSize(headerptr);
349 headerptr += ReadAbbrevOffset(headerptr);
350 switch (unit_type) {
351 case DW_UT_compile:
352 case DW_UT_partial:
353 // nothing else to read
354 break;
355 case DW_UT_skeleton:
356 case DW_UT_split_compile:
357 headerptr += ReadDwoId(headerptr);
358 break;
359 case DW_UT_type:
360 case DW_UT_split_type:
361 is_type_unit_ = true;
362 headerptr += ReadTypeSignature(headerptr);
363 headerptr += ReadTypeOffset(headerptr);
364 break;
365 default:
366 fprintf(stderr, "Unhandled compilation unit type 0x%x", unit_type);
367 break;
368 }
369 }
370 after_header_ = headerptr;
371
372 // This check ensures that we don't have to do checking during the
373 // reading of DIEs. header_.length does not include the size of the
374 // initial length.
375 assert(buffer_ + initial_length_size + header_.length <=
376 buffer_ + buffer_length_);
377}
378
379uint64_t CompilationUnit::Start() {
380 // First get the debug_info section.
381 SectionMap::const_iterator iter =
382 GetSectionByName(sections_, ".debug_info");
383 assert(iter != sections_.end());
384
385 // Set up our buffer
386 buffer_ = iter->second.first + offset_from_section_start_;
387 buffer_length_ = iter->second.second - offset_from_section_start_;
388
389 // Read the header
390 ReadHeader();
391
392 // Figure out the real length from the end of the initial length to
393 // the end of the compilation unit, since that is the value we
394 // return.
395 uint64_t ourlength = header_.length;
396 if (reader_->OffsetSize() == 8)
397 ourlength += 12;
398 else
399 ourlength += 4;
400
401 // See if the user wants this compilation unit, and if not, just return.
402 if (!handler_->StartCompilationUnit(offset_from_section_start_,
403 reader_->AddressSize(),
404 reader_->OffsetSize(),
405 header_.length,
406 header_.version))
407 return ourlength;
408 else if (header_.version == 5 && is_type_unit_)
409 return ourlength;
410
411 // Otherwise, continue by reading our abbreviation entries.
412 ReadAbbrevs();
413
414 // Set the string section if we have one.
415 iter = GetSectionByName(sections_, ".debug_str");
416 if (iter != sections_.end()) {
417 string_buffer_ = iter->second.first;
418 string_buffer_length_ = iter->second.second;
419 }
420
421 // Set the line string section if we have one.
422 iter = GetSectionByName(sections_, ".debug_line_str");
423 if (iter != sections_.end()) {
424 line_string_buffer_ = iter->second.first;
425 line_string_buffer_length_ = iter->second.second;
426 }
427
428 // Set the string offsets section if we have one.
429 iter = GetSectionByName(sections_, ".debug_str_offsets");
430 if (iter != sections_.end()) {
431 str_offsets_buffer_ = iter->second.first;
432 str_offsets_buffer_length_ = iter->second.second;
433 }
434
435 // Set the address section if we have one.
436 iter = GetSectionByName(sections_, ".debug_addr");
437 if (iter != sections_.end()) {
438 addr_buffer_ = iter->second.first;
439 addr_buffer_length_ = iter->second.second;
440 }
441
442 // Now that we have our abbreviations, start processing DIE's.
443 ProcessDIEs();
444
445 // If this is a skeleton compilation unit generated with split DWARF,
446 // and the client needs the full debug info, we need to find the full
447 // compilation unit in a .dwo or .dwp file.
448 if (!is_split_dwarf_
449 && dwo_name_ != NULL
450 && handler_->NeedSplitDebugInfo())
451 ProcessSplitDwarf();
452
453 return ourlength;
454}
455
456void CompilationUnit::ProcessFormStringIndex(
457 uint64_t dieoffset, enum DwarfAttribute attr, enum DwarfForm form,
458 uint64_t str_index) {
459 const size_t kStringOffsetsTableHeaderSize =
460 header_.version >= 5 ? (reader_->OffsetSize() == 8 ? 16 : 8) : 0;
461 const uint8_t* str_offsets_table_after_header = str_offsets_base_ ?
462 str_offsets_buffer_ + str_offsets_base_ :
463 str_offsets_buffer_ + kStringOffsetsTableHeaderSize;
464 const uint8_t* offset_ptr =
465 str_offsets_table_after_header + str_index * reader_->OffsetSize();
466
467 const uint64_t offset = reader_->ReadOffset(offset_ptr);
468 if (offset >= string_buffer_length_) {
469 return;
470 }
471
472 const char* str = reinterpret_cast<const char*>(string_buffer_) + offset;
473 ProcessAttributeString(dieoffset, attr, form, str);
474}
475
476// Special function for pre-processing the
477// DW_AT_str_offsets_base and DW_AT_addr_base in a DW_TAG_compile_unit die (for
478// DWARF v5). We must make sure to find and process the
479// DW_AT_str_offsets_base and DW_AT_addr_base attributes before attempting to
480// read any string and address attribute in the compile unit.
481const uint8_t* CompilationUnit::ProcessOffsetBaseAttribute(
482 uint64_t dieoffset, const uint8_t* start, enum DwarfAttribute attr,
483 enum DwarfForm form, uint64_t implicit_const) {
484 size_t len;
485
486 switch (form) {
487 // DW_FORM_indirect is never used because it is such a space
488 // waster.
489 case DW_FORM_indirect:
490 form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
491 &len));
492 start += len;
493 return ProcessOffsetBaseAttribute(dieoffset, start, attr, form,
494 implicit_const);
495
496 case DW_FORM_flag_present:
497 return start;
498 case DW_FORM_data1:
499 case DW_FORM_flag:
500 return start + 1;
501 case DW_FORM_data2:
502 return start + 2;
503 case DW_FORM_data4:
504 return start + 4;
505 case DW_FORM_data8:
506 return start + 8;
507 case DW_FORM_data16:
508 // This form is designed for an md5 checksum inside line tables.
509 return start + 16;
510 case DW_FORM_string: {
511 const char* str = reinterpret_cast<const char*>(start);
512 return start + strlen(str) + 1;
513 }
514 case DW_FORM_udata:
515 reader_->ReadUnsignedLEB128(start, &len);
516 return start + len;
517 case DW_FORM_sdata:
518 reader_->ReadSignedLEB128(start, &len);
519 return start + len;
520 case DW_FORM_addr:
521 reader_->ReadAddress(start);
522 return start + reader_->AddressSize();
523
524 // This is the important one here!
525 case DW_FORM_sec_offset:
526 if (attr == DW_AT_str_offsets_base ||
527 attr == DW_AT_addr_base)
528 ProcessAttributeUnsigned(dieoffset, attr, form,
529 reader_->ReadOffset(start));
530 else
531 reader_->ReadOffset(start);
532 return start + reader_->OffsetSize();
533
534 case DW_FORM_ref1:
535 return start + 1;
536 case DW_FORM_ref2:
537 return start + 2;
538 case DW_FORM_ref4:
539 return start + 4;
540 case DW_FORM_ref8:
541 return start + 8;
542 case DW_FORM_ref_udata:
543 reader_->ReadUnsignedLEB128(start, &len);
544 return start + len;
545 case DW_FORM_ref_addr:
546 // DWARF2 and 3/4 differ on whether ref_addr is address size or
547 // offset size.
548 assert(header_.version >= 2);
549 if (header_.version == 2) {
550 reader_->ReadAddress(start);
551 return start + reader_->AddressSize();
552 } else if (header_.version >= 3) {
553 reader_->ReadOffset(start);
554 return start + reader_->OffsetSize();
555 }
556 break;
557 case DW_FORM_ref_sig8:
558 return start + 8;
559 case DW_FORM_implicit_const:
560 return start;
561 case DW_FORM_block1: {
562 uint64_t datalen = reader_->ReadOneByte(start);
563 return start + 1 + datalen;
564 }
565 case DW_FORM_block2: {
566 uint64_t datalen = reader_->ReadTwoBytes(start);
567 return start + 2 + datalen;
568 }
569 case DW_FORM_block4: {
570 uint64_t datalen = reader_->ReadFourBytes(start);
571 return start + 4 + datalen;
572 }
573 case DW_FORM_block:
574 case DW_FORM_exprloc: {
575 uint64_t datalen = reader_->ReadUnsignedLEB128(start, &len);
576 return start + datalen + len;
577 }
578 case DW_FORM_strp: {
579 reader_->ReadOffset(start);
580 return start + reader_->OffsetSize();
581 }
582 case DW_FORM_line_strp: {
583 reader_->ReadOffset(start);
584 return start + reader_->OffsetSize();
585 }
586 case DW_FORM_strp_sup:
587 return start + 4;
588 case DW_FORM_ref_sup4:
589 return start + 4;
590 case DW_FORM_ref_sup8:
591 return start + 8;
592 case DW_FORM_loclistx:
593 reader_->ReadUnsignedLEB128(start, &len);
594 return start + len;
595 case DW_FORM_strx:
596 case DW_FORM_GNU_str_index: {
597 reader_->ReadUnsignedLEB128(start, &len);
598 return start + len;
599 }
600 case DW_FORM_strx1: {
601 return start + 1;
602 }
603 case DW_FORM_strx2: {
604 return start + 2;
605 }
606 case DW_FORM_strx3: {
607 return start + 3;
608 }
609 case DW_FORM_strx4: {
610 return start + 4;
611 }
612
613 case DW_FORM_addrx:
614 case DW_FORM_GNU_addr_index:
615 reader_->ReadUnsignedLEB128(start, &len);
616 return start + len;
617 case DW_FORM_addrx1:
618 return start + 1;
619 case DW_FORM_addrx2:
620 return start + 2;
621 case DW_FORM_addrx3:
622 return start + 3;
623 case DW_FORM_addrx4:
624 return start + 4;
625 case DW_FORM_rnglistx:
626 reader_->ReadUnsignedLEB128(start, &len);
627 return start + len;
628 }
629 fprintf(stderr, "Unhandled form type\n");
630 return NULL;
631}
632
633// If one really wanted, you could merge SkipAttribute and
634// ProcessAttribute
635// This is all boring data manipulation and calling of the handler.
636const uint8_t* CompilationUnit::ProcessAttribute(
637 uint64_t dieoffset, const uint8_t* start, enum DwarfAttribute attr,
638 enum DwarfForm form, uint64_t implicit_const) {
639 size_t len;
640
641 switch (form) {
642 // DW_FORM_indirect is never used because it is such a space
643 // waster.
644 case DW_FORM_indirect:
645 form = static_cast<enum DwarfForm>(reader_->ReadUnsignedLEB128(start,
646 &len));
647 start += len;
648 return ProcessAttribute(dieoffset, start, attr, form, implicit_const);
649
650 case DW_FORM_flag_present:
651 ProcessAttributeUnsigned(dieoffset, attr, form, 1);
652 return start;
653 case DW_FORM_data1:
654 case DW_FORM_flag:
655 ProcessAttributeUnsigned(dieoffset, attr, form,
656 reader_->ReadOneByte(start));
657 return start + 1;
658 case DW_FORM_data2:
659 ProcessAttributeUnsigned(dieoffset, attr, form,
660 reader_->ReadTwoBytes(start));
661 return start + 2;
662 case DW_FORM_data4:
663 ProcessAttributeUnsigned(dieoffset, attr, form,
664 reader_->ReadFourBytes(start));
665 return start + 4;
666 case DW_FORM_data8:
667 ProcessAttributeUnsigned(dieoffset, attr, form,
668 reader_->ReadEightBytes(start));
669 return start + 8;
670 case DW_FORM_data16:
671 // This form is designed for an md5 checksum inside line tables.
672 fprintf(stderr, "Unhandled form type: DW_FORM_data16\n");
673 return start + 16;
674 case DW_FORM_string: {
675 const char* str = reinterpret_cast<const char*>(start);
676 ProcessAttributeString(dieoffset, attr, form, str);
677 return start + strlen(str) + 1;
678 }
679 case DW_FORM_udata:
680 ProcessAttributeUnsigned(dieoffset, attr, form,
681 reader_->ReadUnsignedLEB128(start, &len));
682 return start + len;
683
684 case DW_FORM_sdata:
685 ProcessAttributeSigned(dieoffset, attr, form,
686 reader_->ReadSignedLEB128(start, &len));
687 return start + len;
688 case DW_FORM_addr:
689 ProcessAttributeUnsigned(dieoffset, attr, form,
690 reader_->ReadAddress(start));
691 return start + reader_->AddressSize();
692 case DW_FORM_sec_offset:
693 ProcessAttributeUnsigned(dieoffset, attr, form,
694 reader_->ReadOffset(start));
695 return start + reader_->OffsetSize();
696
697 case DW_FORM_ref1:
698 handler_->ProcessAttributeReference(dieoffset, attr, form,
699 reader_->ReadOneByte(start)
700 + offset_from_section_start_);
701 return start + 1;
702 case DW_FORM_ref2:
703 handler_->ProcessAttributeReference(dieoffset, attr, form,
704 reader_->ReadTwoBytes(start)
705 + offset_from_section_start_);
706 return start + 2;
707 case DW_FORM_ref4:
708 handler_->ProcessAttributeReference(dieoffset, attr, form,
709 reader_->ReadFourBytes(start)
710 + offset_from_section_start_);
711 return start + 4;
712 case DW_FORM_ref8:
713 handler_->ProcessAttributeReference(dieoffset, attr, form,
714 reader_->ReadEightBytes(start)
715 + offset_from_section_start_);
716 return start + 8;
717 case DW_FORM_ref_udata:
718 handler_->ProcessAttributeReference(dieoffset, attr, form,
719 reader_->ReadUnsignedLEB128(start,
720 &len)
721 + offset_from_section_start_);
722 return start + len;
723 case DW_FORM_ref_addr:
724 // DWARF2 and 3/4 differ on whether ref_addr is address size or
725 // offset size.
726 assert(header_.version >= 2);
727 if (header_.version == 2) {
728 handler_->ProcessAttributeReference(dieoffset, attr, form,
729 reader_->ReadAddress(start));
730 return start + reader_->AddressSize();
731 } else if (header_.version >= 3) {
732 handler_->ProcessAttributeReference(dieoffset, attr, form,
733 reader_->ReadOffset(start));
734 return start + reader_->OffsetSize();
735 }
736 break;
737 case DW_FORM_ref_sig8:
738 handler_->ProcessAttributeSignature(dieoffset, attr, form,
739 reader_->ReadEightBytes(start));
740 return start + 8;
741 case DW_FORM_implicit_const:
742 handler_->ProcessAttributeUnsigned(dieoffset, attr, form,
743 implicit_const);
744 return start;
745 case DW_FORM_block1: {
746 uint64_t datalen = reader_->ReadOneByte(start);
747 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 1,
748 datalen);
749 return start + 1 + datalen;
750 }
751 case DW_FORM_block2: {
752 uint64_t datalen = reader_->ReadTwoBytes(start);
753 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 2,
754 datalen);
755 return start + 2 + datalen;
756 }
757 case DW_FORM_block4: {
758 uint64_t datalen = reader_->ReadFourBytes(start);
759 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + 4,
760 datalen);
761 return start + 4 + datalen;
762 }
763 case DW_FORM_block:
764 case DW_FORM_exprloc: {
765 uint64_t datalen = reader_->ReadUnsignedLEB128(start, &len);
766 handler_->ProcessAttributeBuffer(dieoffset, attr, form, start + len,
767 datalen);
768 return start + datalen + len;
769 }
770 case DW_FORM_strp: {
771 assert(string_buffer_ != NULL);
772
773 const uint64_t offset = reader_->ReadOffset(start);
774 assert(string_buffer_ + offset < string_buffer_ + string_buffer_length_);
775
776 const char* str = reinterpret_cast<const char*>(string_buffer_ + offset);
777 ProcessAttributeString(dieoffset, attr, form, str);
778 return start + reader_->OffsetSize();
779 }
780 case DW_FORM_line_strp: {
781 assert(line_string_buffer_ != NULL);
782
783 const uint64_t offset = reader_->ReadOffset(start);
784 assert(line_string_buffer_ + offset <
785 line_string_buffer_ + line_string_buffer_length_);
786
787 const char* str =
788 reinterpret_cast<const char*>(line_string_buffer_ + offset);
789 ProcessAttributeString(dieoffset, attr, form, str);
790 return start + reader_->OffsetSize();
791 }
792 case DW_FORM_strp_sup:
793 // No support currently for suplementary object files.
794 fprintf(stderr, "Unhandled form type: DW_FORM_strp_sup\n");
795 return start + 4;
796 case DW_FORM_ref_sup4:
797 // No support currently for suplementary object files.
798 fprintf(stderr, "Unhandled form type: DW_FORM_ref_sup4\n");
799 return start + 4;
800 case DW_FORM_ref_sup8:
801 // No support currently for suplementary object files.
802 fprintf(stderr, "Unhandled form type: DW_FORM_ref_sup8\n");
803 return start + 8;
804 case DW_FORM_loclistx:
805 ProcessAttributeUnsigned(dieoffset, attr, form,
806 reader_->ReadUnsignedLEB128(start, &len));
807 return start + len;
808 case DW_FORM_strx:
809 case DW_FORM_GNU_str_index: {
810 uint64_t str_index = reader_->ReadUnsignedLEB128(start, &len);
811 ProcessFormStringIndex(dieoffset, attr, form, str_index);
812 return start + len;
813 }
814 case DW_FORM_strx1: {
815 uint64_t str_index = reader_->ReadOneByte(start);
816 ProcessFormStringIndex(dieoffset, attr, form, str_index);
817 return start + 1;
818 }
819 case DW_FORM_strx2: {
820 uint64_t str_index = reader_->ReadTwoBytes(start);
821 ProcessFormStringIndex(dieoffset, attr, form, str_index);
822 return start + 2;
823 }
824 case DW_FORM_strx3: {
825 uint64_t str_index = reader_->ReadThreeBytes(start);
826 ProcessFormStringIndex(dieoffset, attr, form, str_index);
827 return start + 3;
828 }
829 case DW_FORM_strx4: {
830 uint64_t str_index = reader_->ReadFourBytes(start);
831 ProcessFormStringIndex(dieoffset, attr, form, str_index);
832 return start + 4;
833 }
834
835 case DW_FORM_addrx:
836 case DW_FORM_GNU_addr_index:
837 ProcessAttributeAddrIndex(
838 dieoffset, attr, form, reader_->ReadUnsignedLEB128(start, &len));
839 return start + len;
840 case DW_FORM_addrx1:
841 ProcessAttributeAddrIndex(
842 dieoffset, attr, form, reader_->ReadOneByte(start));
843 return start + 1;
844 case DW_FORM_addrx2:
845 ProcessAttributeAddrIndex(
846 dieoffset, attr, form, reader_->ReadTwoBytes(start));
847 return start + 2;
848 case DW_FORM_addrx3:
849 ProcessAttributeAddrIndex(
850 dieoffset, attr, form, reader_->ReadThreeBytes(start));
851 return start + 3;
852 case DW_FORM_addrx4:
853 ProcessAttributeAddrIndex(
854 dieoffset, attr, form, reader_->ReadFourBytes(start));
855 return start + 4;
856 case DW_FORM_rnglistx:
857 ProcessAttributeUnsigned(
858 dieoffset, attr, form, reader_->ReadUnsignedLEB128(start, &len));
859 return start + len;
860 }
861 fprintf(stderr, "Unhandled form type\n");
862 return NULL;
863}
864
865const uint8_t* CompilationUnit::ProcessDIE(uint64_t dieoffset,
866 const uint8_t* start,
867 const Abbrev& abbrev) {
868 // With DWARF v5, the compile_unit die may contain a
869 // DW_AT_str_offsets_base or DW_AT_addr_base. If it does, that attribute must
870 // be found and processed before trying to process the other attributes;
871 // otherwise the string or address values will all come out incorrect.
872 if (abbrev.tag == DW_TAG_compile_unit && header_.version == 5) {
873 uint64_t dieoffset_copy = dieoffset;
874 const uint8_t* start_copy = start;
875 for (AttributeList::const_iterator i = abbrev.attributes.begin();
876 i != abbrev.attributes.end();
877 i++) {
878 start_copy = ProcessOffsetBaseAttribute(dieoffset_copy, start_copy,
879 i->attr_, i->form_,
880 i->value_);
881 }
882 }
883
884 for (AttributeList::const_iterator i = abbrev.attributes.begin();
885 i != abbrev.attributes.end();
886 i++) {
887 start = ProcessAttribute(dieoffset, start, i->attr_, i->form_, i->value_);
888 }
889
890 // If this is a compilation unit in a split DWARF object, verify that
891 // the dwo_id matches. If it does not match, we will ignore this
892 // compilation unit.
893 if (abbrev.tag == DW_TAG_compile_unit
894 && is_split_dwarf_
895 && dwo_id_ != skeleton_dwo_id_) {
896 return NULL;
897 }
898
899 return start;
900}
901
902void CompilationUnit::ProcessDIEs() {
903 const uint8_t* dieptr = after_header_;
904 size_t len;
905
906 // lengthstart is the place the length field is based on.
907 // It is the point in the header after the initial length field
908 const uint8_t* lengthstart = buffer_;
909
910 // In 64 bit dwarf, the initial length is 12 bytes, because of the
911 // 0xffffffff at the start.
912 if (reader_->OffsetSize() == 8)
913 lengthstart += 12;
914 else
915 lengthstart += 4;
916
917 std::stack<uint64_t> die_stack;
918
919 while (dieptr < (lengthstart + header_.length)) {
920 // We give the user the absolute offset from the beginning of
921 // debug_info, since they need it to deal with ref_addr forms.
922 uint64_t absolute_offset = (dieptr - buffer_) + offset_from_section_start_;
923
924 uint64_t abbrev_num = reader_->ReadUnsignedLEB128(dieptr, &len);
925
926 dieptr += len;
927
928 // Abbrev == 0 represents the end of a list of children, or padding
929 // at the end of the compilation unit.
930 if (abbrev_num == 0) {
931 if (die_stack.size() == 0)
932 // If it is padding, then we are done with the compilation unit's DIEs.
933 return;
934 const uint64_t offset = die_stack.top();
935 die_stack.pop();
936 handler_->EndDIE(offset);
937 continue;
938 }
939
940 const Abbrev& abbrev = abbrevs_->at(static_cast<size_t>(abbrev_num));
941 const enum DwarfTag tag = abbrev.tag;
942 if (!handler_->StartDIE(absolute_offset, tag)) {
943 dieptr = SkipDIE(dieptr, abbrev);
944 } else {
945 dieptr = ProcessDIE(absolute_offset, dieptr, abbrev);
946 }
947
948 if (abbrev.has_children) {
949 die_stack.push(absolute_offset);
950 } else {
951 handler_->EndDIE(absolute_offset);
952 }
953 }
954}
955
956// Check for a valid ELF file and return the Address size.
957// Returns 0 if not a valid ELF file.
958inline int GetElfWidth(const ElfReader& elf) {
959 if (elf.IsElf32File())
960 return 4;
961 if (elf.IsElf64File())
962 return 8;
963 return 0;
964}
965
966void CompilationUnit::ProcessSplitDwarf() {
967 struct stat statbuf;
968 if (!have_checked_for_dwp_) {
969 // Look for a .dwp file in the same directory as the executable.
970 have_checked_for_dwp_ = true;
971 string dwp_suffix(".dwp");
972 dwp_path_ = path_ + dwp_suffix;
973 if (stat(dwp_path_.c_str(), &statbuf) != 0) {
974 // Fall back to a split .debug file in the same directory.
975 string debug_suffix(".debug");
976 dwp_path_ = path_;
977 size_t found = path_.rfind(debug_suffix);
978 if (found + debug_suffix.length() == path_.length())
979 dwp_path_ = dwp_path_.replace(found, debug_suffix.length(), dwp_suffix);
980 }
981 if (stat(dwp_path_.c_str(), &statbuf) == 0) {
982 ElfReader* elf = new ElfReader(dwp_path_);
983 int width = GetElfWidth(*elf);
984 if (width != 0) {
985 dwp_byte_reader_.reset(new ByteReader(reader_->GetEndianness()));
986 dwp_byte_reader_->SetAddressSize(width);
987 dwp_reader_.reset(new DwpReader(*dwp_byte_reader_, elf));
988 dwp_reader_->Initialize();
989 } else {
990 delete elf;
991 }
992 }
993 }
994 bool found_in_dwp = false;
995 if (dwp_reader_) {
996 // If we have a .dwp file, read the debug sections for the requested CU.
997 SectionMap sections;
998 dwp_reader_->ReadDebugSectionsForCU(dwo_id_, &sections);
999 if (!sections.empty()) {
1000 found_in_dwp = true;
1001 CompilationUnit dwp_comp_unit(dwp_path_, sections, 0,
1002 dwp_byte_reader_.get(), handler_);
1003 dwp_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_, addr_base_,
1004 ranges_base_, dwo_id_);
1005 dwp_comp_unit.Start();
1006 }
1007 }
1008 if (!found_in_dwp) {
1009 // If no .dwp file, try to open the .dwo file.
1010 if (stat(dwo_name_, &statbuf) == 0) {
1011 ElfReader elf(dwo_name_);
1012 int width = GetElfWidth(elf);
1013 if (width != 0) {
1014 ByteReader reader(ENDIANNESS_LITTLE);
1015 reader.SetAddressSize(width);
1016 SectionMap sections;
1017 ReadDebugSectionsFromDwo(&elf, &sections);
1018 CompilationUnit dwo_comp_unit(dwo_name_, sections, 0, &reader,
1019 handler_);
1020 dwo_comp_unit.SetSplitDwarf(addr_buffer_, addr_buffer_length_,
1021 addr_base_, ranges_base_, dwo_id_);
1022 dwo_comp_unit.Start();
1023 }
1024 }
1025 }
1026}
1027
1028void CompilationUnit::ReadDebugSectionsFromDwo(ElfReader* elf_reader,
1029 SectionMap* sections) {
1030 static const char* const section_names[] = {
1031 ".debug_abbrev",
1032 ".debug_info",
1033 ".debug_str_offsets",
1034 ".debug_str"
1035 };
1036 for (unsigned int i = 0u;
1037 i < sizeof(section_names)/sizeof(*(section_names)); ++i) {
1038 string base_name = section_names[i];
1039 string dwo_name = base_name + ".dwo";
1040 size_t section_size;
1041 const char* section_data = elf_reader->GetSectionByName(dwo_name,
1042 &section_size);
1043 if (section_data != NULL)
1044 sections->insert(std::make_pair(
1045 base_name, std::make_pair(
1046 reinterpret_cast<const uint8_t*>(section_data),
1047 section_size)));
1048 }
1049}
1050
1051DwpReader::DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader)
1052 : elf_reader_(elf_reader), byte_reader_(byte_reader),
1053 cu_index_(NULL), cu_index_size_(0), string_buffer_(NULL),
1054 string_buffer_size_(0), version_(0), ncolumns_(0), nunits_(0),
1055 nslots_(0), phash_(NULL), pindex_(NULL), shndx_pool_(NULL),
1056 offset_table_(NULL), size_table_(NULL), abbrev_data_(NULL),
1057 abbrev_size_(0), info_data_(NULL), info_size_(0),
1058 str_offsets_data_(NULL), str_offsets_size_(0) {}
1059
1060DwpReader::~DwpReader() {
1061 if (elf_reader_) delete elf_reader_;
1062}
1063
1064void DwpReader::Initialize() {
1065 cu_index_ = elf_reader_->GetSectionByName(".debug_cu_index",
1066 &cu_index_size_);
1067 if (cu_index_ == NULL) {
1068 return;
1069 }
1070 // The .debug_str.dwo section is shared by all CUs in the file.
1071 string_buffer_ = elf_reader_->GetSectionByName(".debug_str.dwo",
1072 &string_buffer_size_);
1073
1074 version_ = byte_reader_.ReadFourBytes(
1075 reinterpret_cast<const uint8_t*>(cu_index_));
1076
1077 if (version_ == 1) {
1078 nslots_ = byte_reader_.ReadFourBytes(
1079 reinterpret_cast<const uint8_t*>(cu_index_)
1080 + 3 * sizeof(uint32_t));
1081 phash_ = cu_index_ + 4 * sizeof(uint32_t);
1082 pindex_ = phash_ + nslots_ * sizeof(uint64_t);
1083 shndx_pool_ = pindex_ + nslots_ * sizeof(uint32_t);
1084 if (shndx_pool_ >= cu_index_ + cu_index_size_) {
1085 version_ = 0;
1086 }
1087 } else if (version_ == 2 || version_ == 5) {
1088 ncolumns_ = byte_reader_.ReadFourBytes(
1089 reinterpret_cast<const uint8_t*>(cu_index_) + sizeof(uint32_t));
1090 nunits_ = byte_reader_.ReadFourBytes(
1091 reinterpret_cast<const uint8_t*>(cu_index_) + 2 * sizeof(uint32_t));
1092 nslots_ = byte_reader_.ReadFourBytes(
1093 reinterpret_cast<const uint8_t*>(cu_index_) + 3 * sizeof(uint32_t));
1094 phash_ = cu_index_ + 4 * sizeof(uint32_t);
1095 pindex_ = phash_ + nslots_ * sizeof(uint64_t);
1096 offset_table_ = pindex_ + nslots_ * sizeof(uint32_t);
1097 size_table_ = offset_table_ + ncolumns_ * (nunits_ + 1) * sizeof(uint32_t);
1098 abbrev_data_ = elf_reader_->GetSectionByName(".debug_abbrev.dwo",
1099 &abbrev_size_);
1100 info_data_ = elf_reader_->GetSectionByName(".debug_info.dwo", &info_size_);
1101 str_offsets_data_ = elf_reader_->GetSectionByName(".debug_str_offsets.dwo",
1102 &str_offsets_size_);
1103 if (size_table_ >= cu_index_ + cu_index_size_) {
1104 version_ = 0;
1105 }
1106 }
1107}
1108
1109void DwpReader::ReadDebugSectionsForCU(uint64_t dwo_id,
1110 SectionMap* sections) {
1111 if (version_ == 1) {
1112 int slot = LookupCU(dwo_id);
1113 if (slot == -1) {
1114 return;
1115 }
1116
1117 // The index table points to the section index pool, where we
1118 // can read a list of section indexes for the debug sections
1119 // for the CU whose dwo_id we are looking for.
1120 int index = byte_reader_.ReadFourBytes(
1121 reinterpret_cast<const uint8_t*>(pindex_)
1122 + slot * sizeof(uint32_t));
1123 const char* shndx_list = shndx_pool_ + index * sizeof(uint32_t);
1124 for (;;) {
1125 if (shndx_list >= cu_index_ + cu_index_size_) {
1126 version_ = 0;
1127 return;
1128 }
1129 unsigned int shndx = byte_reader_.ReadFourBytes(
1130 reinterpret_cast<const uint8_t*>(shndx_list));
1131 shndx_list += sizeof(uint32_t);
1132 if (shndx == 0)
1133 break;
1134 const char* section_name = elf_reader_->GetSectionName(shndx);
1135 size_t section_size;
1136 const char* section_data;
1137 // We're only interested in these four debug sections.
1138 // The section names in the .dwo file end with ".dwo", but we
1139 // add them to the sections table with their normal names.
1140 if (!strncmp(section_name, ".debug_abbrev", strlen(".debug_abbrev"))) {
1141 section_data = elf_reader_->GetSectionByIndex(shndx, &section_size);
1142 sections->insert(std::make_pair(
1143 ".debug_abbrev",
1144 std::make_pair(reinterpret_cast<const uint8_t*> (section_data),
1145 section_size)));
1146 } else if (!strncmp(section_name, ".debug_info", strlen(".debug_info"))) {
1147 section_data = elf_reader_->GetSectionByIndex(shndx, &section_size);
1148 sections->insert(std::make_pair(
1149 ".debug_info",
1150 std::make_pair(reinterpret_cast<const uint8_t*> (section_data),
1151 section_size)));
1152 } else if (!strncmp(section_name, ".debug_str_offsets",
1153 strlen(".debug_str_offsets"))) {
1154 section_data = elf_reader_->GetSectionByIndex(shndx, &section_size);
1155 sections->insert(std::make_pair(
1156 ".debug_str_offsets",
1157 std::make_pair(reinterpret_cast<const uint8_t*> (section_data),
1158 section_size)));
1159 }
1160 }
1161 sections->insert(std::make_pair(
1162 ".debug_str",
1163 std::make_pair(reinterpret_cast<const uint8_t*> (string_buffer_),
1164 string_buffer_size_)));
1165 } else if (version_ == 2 || version_ == 5) {
1166 uint32_t index = LookupCUv2(dwo_id);
1167 if (index == 0) {
1168 return;
1169 }
1170
1171 // The index points to a row in each of the section offsets table
1172 // and the section size table, where we can read the offsets and sizes
1173 // of the contributions to each debug section from the CU whose dwo_id
1174 // we are looking for. Row 0 of the section offsets table has the
1175 // section ids for each column of the table. The size table begins
1176 // with row 1.
1177 const char* id_row = offset_table_;
1178 const char* offset_row = offset_table_
1179 + index * ncolumns_ * sizeof(uint32_t);
1180 const char* size_row =
1181 size_table_ + (index - 1) * ncolumns_ * sizeof(uint32_t);
1182 if (size_row + ncolumns_ * sizeof(uint32_t) > cu_index_ + cu_index_size_) {
1183 version_ = 0;
1184 return;
1185 }
1186 for (unsigned int col = 0u; col < ncolumns_; ++col) {
1187 uint32_t section_id =
1188 byte_reader_.ReadFourBytes(reinterpret_cast<const uint8_t*>(id_row)
1189 + col * sizeof(uint32_t));
1190 uint32_t offset = byte_reader_.ReadFourBytes(
1191 reinterpret_cast<const uint8_t*>(offset_row)
1192 + col * sizeof(uint32_t));
1193 uint32_t size = byte_reader_.ReadFourBytes(
1194 reinterpret_cast<const uint8_t*>(size_row) + col * sizeof(uint32_t));
1195 if (section_id == DW_SECT_ABBREV) {
1196 sections->insert(std::make_pair(
1197 ".debug_abbrev",
1198 std::make_pair(reinterpret_cast<const uint8_t*> (abbrev_data_)
1199 + offset, size)));
1200 } else if (section_id == DW_SECT_INFO) {
1201 sections->insert(std::make_pair(
1202 ".debug_info",
1203 std::make_pair(reinterpret_cast<const uint8_t*> (info_data_)
1204 + offset, size)));
1205 } else if (section_id == DW_SECT_STR_OFFSETS) {
1206 sections->insert(std::make_pair(
1207 ".debug_str_offsets",
1208 std::make_pair(reinterpret_cast<const uint8_t*> (str_offsets_data_)
1209 + offset, size)));
1210 }
1211 }
1212 sections->insert(std::make_pair(
1213 ".debug_str",
1214 std::make_pair(reinterpret_cast<const uint8_t*> (string_buffer_),
1215 string_buffer_size_)));
1216 }
1217}
1218
1219int DwpReader::LookupCU(uint64_t dwo_id) {
1220 uint32_t slot = static_cast<uint32_t>(dwo_id) & (nslots_ - 1);
1221 uint64_t probe = byte_reader_.ReadEightBytes(
1222 reinterpret_cast<const uint8_t*>(phash_) + slot * sizeof(uint64_t));
1223 if (probe != 0 && probe != dwo_id) {
1224 uint32_t secondary_hash =
1225 (static_cast<uint32_t>(dwo_id >> 32) & (nslots_ - 1)) | 1;
1226 do {
1227 slot = (slot + secondary_hash) & (nslots_ - 1);
1228 probe = byte_reader_.ReadEightBytes(
1229 reinterpret_cast<const uint8_t*>(phash_) + slot * sizeof(uint64_t));
1230 } while (probe != 0 && probe != dwo_id);
1231 }
1232 if (probe == 0)
1233 return -1;
1234 return slot;
1235}
1236
1237uint32_t DwpReader::LookupCUv2(uint64_t dwo_id) {
1238 uint32_t slot = static_cast<uint32_t>(dwo_id) & (nslots_ - 1);
1239 uint64_t probe = byte_reader_.ReadEightBytes(
1240 reinterpret_cast<const uint8_t*>(phash_) + slot * sizeof(uint64_t));
1241 uint32_t index = byte_reader_.ReadFourBytes(
1242 reinterpret_cast<const uint8_t*>(pindex_) + slot * sizeof(uint32_t));
1243 if (index != 0 && probe != dwo_id) {
1244 uint32_t secondary_hash =
1245 (static_cast<uint32_t>(dwo_id >> 32) & (nslots_ - 1)) | 1;
1246 do {
1247 slot = (slot + secondary_hash) & (nslots_ - 1);
1248 probe = byte_reader_.ReadEightBytes(
1249 reinterpret_cast<const uint8_t*>(phash_) + slot * sizeof(uint64_t));
1250 index = byte_reader_.ReadFourBytes(
1251 reinterpret_cast<const uint8_t*>(pindex_) + slot * sizeof(uint32_t));
1252 } while (index != 0 && probe != dwo_id);
1253 }
1254 return index;
1255}
1256
1257LineInfo::LineInfo(const uint8_t* buffer, uint64_t buffer_length,
1258 ByteReader* reader, const uint8_t* string_buffer,
1259 size_t string_buffer_length,
1260 const uint8_t* line_string_buffer,
1261 size_t line_string_buffer_length, LineInfoHandler* handler):
1262 handler_(handler), reader_(reader), buffer_(buffer),
1263 string_buffer_(string_buffer),
1264 line_string_buffer_(line_string_buffer) {
1265#ifndef NDEBUG
1266 buffer_length_ = buffer_length;
1267 string_buffer_length_ = string_buffer_length;
1268 line_string_buffer_length_ = line_string_buffer_length;
1269#endif
1270 header_.std_opcode_lengths = NULL;
1271}
1272
1273uint64_t LineInfo::Start() {
1274 ReadHeader();
1275 ReadLines();
1276 return after_header_ - buffer_;
1277}
1278
1279void LineInfo::ReadTypesAndForms(const uint8_t** lineptr,
1280 uint32_t* content_types,
1281 uint32_t* content_forms,
1282 uint32_t max_types,
1283 uint32_t* format_count) {
1284 size_t len;
1285
1286 uint32_t count = reader_->ReadUnsignedLEB128(*lineptr, &len);
1287 *lineptr += len;
1288 if (count < 1 || count > max_types) {
1289 return;
1290 }
1291 for (uint32_t col = 0; col < count; ++col) {
1292 content_types[col] = reader_->ReadUnsignedLEB128(*lineptr, &len);
1293 *lineptr += len;
1294 content_forms[col] = reader_->ReadUnsignedLEB128(*lineptr, &len);
1295 *lineptr += len;
1296 }
1297 *format_count = count;
1298}
1299
1300const char* LineInfo::ReadStringForm(uint32_t form, const uint8_t** lineptr) {
1301 const char* name = nullptr;
1302 if (form == DW_FORM_string) {
1303 name = reinterpret_cast<const char*>(*lineptr);
1304 *lineptr += strlen(name) + 1;
1305 return name;
1306 } else if (form == DW_FORM_strp) {
1307 uint64_t offset = reader_->ReadOffset(*lineptr);
1308 assert(offset < string_buffer_length_);
1309 *lineptr += reader_->OffsetSize();
1310 if (string_buffer_ != nullptr) {
1311 name = reinterpret_cast<const char*>(string_buffer_) + offset;
1312 return name;
1313 }
1314 } else if (form == DW_FORM_line_strp) {
1315 uint64_t offset = reader_->ReadOffset(*lineptr);
1316 assert(offset < line_string_buffer_length_);
1317 *lineptr += reader_->OffsetSize();
1318 if (line_string_buffer_ != nullptr) {
1319 name = reinterpret_cast<const char*>(line_string_buffer_) + offset;
1320 return name;
1321 }
1322 }
1323 // Shouldn't be called with a non-string-form, and
1324 // if there is a string form but no string buffer,
1325 // that is a problem too.
1326 assert(0);
1327 return nullptr;
1328}
1329
1330uint64_t LineInfo::ReadUnsignedData(uint32_t form, const uint8_t** lineptr) {
1331 size_t len;
1332 uint64_t value;
1333
1334 switch (form) {
1335 case DW_FORM_data1:
1336 value = reader_->ReadOneByte(*lineptr);
1337 *lineptr += 1;
1338 return value;
1339 case DW_FORM_data2:
1340 value = reader_->ReadTwoBytes(*lineptr);
1341 *lineptr += 2;
1342 return value;
1343 case DW_FORM_data4:
1344 value = reader_->ReadFourBytes(*lineptr);
1345 *lineptr += 4;
1346 return value;
1347 case DW_FORM_data8:
1348 value = reader_->ReadEightBytes(*lineptr);
1349 *lineptr += 8;
1350 return value;
1351 case DW_FORM_udata:
1352 value = reader_->ReadUnsignedLEB128(*lineptr, &len);
1353 *lineptr += len;
1354 return value;
1355 default:
1356 fprintf(stderr, "Unrecognized data form.");
1357 return 0;
1358 }
1359}
1360
1361void LineInfo::ReadFileRow(const uint8_t** lineptr,
1362 const uint32_t* content_types,
1363 const uint32_t* content_forms, uint32_t row,
1364 uint32_t format_count) {
1365 const char* filename = nullptr;
1366 uint64_t dirindex = 0;
1367 uint64_t mod_time = 0;
1368 uint64_t filelength = 0;
1369
1370 for (uint32_t col = 0; col < format_count; ++col) {
1371 switch (content_types[col]) {
1372 case DW_LNCT_path:
1373 filename = ReadStringForm(content_forms[col], lineptr);
1374 break;
1375 case DW_LNCT_directory_index:
1376 dirindex = ReadUnsignedData(content_forms[col], lineptr);
1377 break;
1378 case DW_LNCT_timestamp:
1379 mod_time = ReadUnsignedData(content_forms[col], lineptr);
1380 break;
1381 case DW_LNCT_size:
1382 filelength = ReadUnsignedData(content_forms[col], lineptr);
1383 break;
1384 case DW_LNCT_MD5:
1385 // MD5 entries help a debugger sort different versions of files with
1386 // the same name. It is always paired with a DW_FORM_data16 and is
1387 // unused in this case.
1388 *lineptr += 16;
1389 break;
1390 default:
1391 fprintf(stderr, "Unrecognized form in line table header. %d\n",
1392 content_types[col]);
1393 assert(false);
1394 break;
1395 }
1396 }
1397 assert(filename != nullptr);
1398 handler_->DefineFile(filename, row, dirindex, mod_time, filelength);
1399}
1400
1401// The header for a debug_line section is mildly complicated, because
1402// the line info is very tightly encoded.
1403void LineInfo::ReadHeader() {
1404 const uint8_t* lineptr = buffer_;
1405 size_t initial_length_size;
1406
1407 const uint64_t initial_length
1408 = reader_->ReadInitialLength(lineptr, &initial_length_size);
1409
1410 lineptr += initial_length_size;
1411 header_.total_length = initial_length;
1412 assert(buffer_ + initial_length_size + header_.total_length <=
1413 buffer_ + buffer_length_);
1414
1415
1416 header_.version = reader_->ReadTwoBytes(lineptr);
1417 lineptr += 2;
1418
1419 if (header_.version >= 5) {
1420 uint8_t address_size = reader_->ReadOneByte(lineptr);
1421 reader_->SetAddressSize(address_size);
1422 lineptr += 1;
1423 uint8_t segment_selector_size = reader_->ReadOneByte(lineptr);
1424 if (segment_selector_size != 0) {
1425 fprintf(stderr,"No support for segmented memory.");
1426 }
1427 lineptr += 1;
1428 } else {
1429 // Address size *must* be set by CU ahead of time.
1430 assert(reader_->AddressSize() != 0);
1431 }
1432
1433 header_.prologue_length = reader_->ReadOffset(lineptr);
1434 lineptr += reader_->OffsetSize();
1435
1436 header_.min_insn_length = reader_->ReadOneByte(lineptr);
1437 lineptr += 1;
1438
1439 if (header_.version >= 4) {
1440 __attribute__((unused)) uint8_t max_ops_per_insn =
1441 reader_->ReadOneByte(lineptr);
1442 ++lineptr;
1443 assert(max_ops_per_insn == 1);
1444 }
1445
1446 header_.default_is_stmt = reader_->ReadOneByte(lineptr);
1447 lineptr += 1;
1448
1449 header_.line_base = *reinterpret_cast<const int8_t*>(lineptr);
1450 lineptr += 1;
1451
1452 header_.line_range = reader_->ReadOneByte(lineptr);
1453 lineptr += 1;
1454
1455 header_.opcode_base = reader_->ReadOneByte(lineptr);
1456 lineptr += 1;
1457
1458 header_.std_opcode_lengths = new std::vector<unsigned char>;
1459 header_.std_opcode_lengths->resize(header_.opcode_base + 1);
1460 (*header_.std_opcode_lengths)[0] = 0;
1461 for (int i = 1; i < header_.opcode_base; i++) {
1462 (*header_.std_opcode_lengths)[i] = reader_->ReadOneByte(lineptr);
1463 lineptr += 1;
1464 }
1465
1466 if (header_.version <= 4) {
1467 // Directory zero is assumed to be the compilation directory and special
1468 // cased where used. It is not actually stored in the dwarf data. But an
1469 // empty entry here avoids off-by-one errors elsewhere in the code.
1470 handler_->DefineDir("", 0);
1471 // It is legal for the directory entry table to be empty.
1472 if (*lineptr) {
1473 uint32_t dirindex = 1;
1474 while (*lineptr) {
1475 const char* dirname = reinterpret_cast<const char*>(lineptr);
1476 handler_->DefineDir(dirname, dirindex);
1477 lineptr += strlen(dirname) + 1;
1478 dirindex++;
1479 }
1480 }
1481 lineptr++;
1482 // It is also legal for the file entry table to be empty.
1483
1484 // Similarly for file zero.
1485 handler_->DefineFile("", 0, 0, 0, 0);
1486 if (*lineptr) {
1487 uint32_t fileindex = 1;
1488 size_t len;
1489 while (*lineptr) {
1490 const char* filename = ReadStringForm(DW_FORM_string, &lineptr);
1491
1492 uint64_t dirindex = reader_->ReadUnsignedLEB128(lineptr, &len);
1493 lineptr += len;
1494
1495 uint64_t mod_time = reader_->ReadUnsignedLEB128(lineptr, &len);
1496 lineptr += len;
1497
1498 uint64_t filelength = reader_->ReadUnsignedLEB128(lineptr, &len);
1499 lineptr += len;
1500 handler_->DefineFile(filename, fileindex,
1501 static_cast<uint32_t>(dirindex), mod_time,
1502 filelength);
1503 fileindex++;
1504 }
1505 }
1506 lineptr++;
1507 } else {
1508 // Read the DWARF-5 directory table.
1509
1510 // Dwarf5 supports five different types and forms per directory- and
1511 // file-table entry. Theoretically, there could be duplicate entries
1512 // in this table, but that would be quite unusual.
1513 static const uint32_t kMaxTypesAndForms = 5;
1514 uint32_t content_types[kMaxTypesAndForms];
1515 uint32_t content_forms[kMaxTypesAndForms];
1516 uint32_t format_count;
1517 size_t len;
1518
1519 ReadTypesAndForms(&lineptr, content_types, content_forms, kMaxTypesAndForms,
1520 &format_count);
1521 uint32_t entry_count = reader_->ReadUnsignedLEB128(lineptr, &len);
1522 lineptr += len;
1523 for (uint32_t row = 0; row < entry_count; ++row) {
1524 const char* dirname = nullptr;
1525 for (uint32_t col = 0; col < format_count; ++col) {
1526 // The path is the only relevant content type for this implementation.
1527 if (content_types[col] == DW_LNCT_path) {
1528 dirname = ReadStringForm(content_forms[col], &lineptr);
1529 }
1530 }
1531 handler_->DefineDir(dirname, row);
1532 }
1533
1534 // Read the DWARF-5 filename table.
1535 ReadTypesAndForms(&lineptr, content_types, content_forms, kMaxTypesAndForms,
1536 &format_count);
1537 entry_count = reader_->ReadUnsignedLEB128(lineptr, &len);
1538 lineptr += len;
1539
1540 for (uint32_t row = 0; row < entry_count; ++row) {
1541 ReadFileRow(&lineptr, content_types, content_forms, row, format_count);
1542 }
1543 }
1544 after_header_ = lineptr;
1545}
1546
1547/* static */
1548bool LineInfo::ProcessOneOpcode(ByteReader* reader,
1549 LineInfoHandler* handler,
1550 const struct LineInfoHeader& header,
1551 const uint8_t* start,
1552 struct LineStateMachine* lsm,
1553 size_t* len,
1554 uintptr pc,
1555 bool* lsm_passes_pc) {
1556 size_t oplen = 0;
1557 size_t templen;
1558 uint8_t opcode = reader->ReadOneByte(start);
1559 oplen++;
1560 start++;
1561
1562 // If the opcode is great than the opcode_base, it is a special
1563 // opcode. Most line programs consist mainly of special opcodes.
1564 if (opcode >= header.opcode_base) {
1565 opcode -= header.opcode_base;
1566 const int64_t advance_address = (opcode / header.line_range)
1567 * header.min_insn_length;
1568 const int32_t advance_line = (opcode % header.line_range)
1569 + header.line_base;
1570
1571 // Check if the lsm passes "pc". If so, mark it as passed.
1572 if (lsm_passes_pc &&
1573 lsm->address <= pc && pc < lsm->address + advance_address) {
1574 *lsm_passes_pc = true;
1575 }
1576
1577 lsm->address += advance_address;
1578 lsm->line_num += advance_line;
1579 lsm->basic_block = true;
1580 *len = oplen;
1581 return true;
1582 }
1583
1584 // Otherwise, we have the regular opcodes
1585 switch (opcode) {
1586 case DW_LNS_copy: {
1587 lsm->basic_block = false;
1588 *len = oplen;
1589 return true;
1590 }
1591
1592 case DW_LNS_advance_pc: {
1593 uint64_t advance_address = reader->ReadUnsignedLEB128(start, &templen);
1594 oplen += templen;
1595
1596 // Check if the lsm passes "pc". If so, mark it as passed.
1597 if (lsm_passes_pc && lsm->address <= pc &&
1598 pc < lsm->address + header.min_insn_length * advance_address) {
1599 *lsm_passes_pc = true;
1600 }
1601
1602 lsm->address += header.min_insn_length * advance_address;
1603 }
1604 break;
1605 case DW_LNS_advance_line: {
1606 const int64_t advance_line = reader->ReadSignedLEB128(start, &templen);
1607 oplen += templen;
1608 lsm->line_num += static_cast<int32_t>(advance_line);
1609
1610 // With gcc 4.2.1, we can get the line_no here for the first time
1611 // since DW_LNS_advance_line is called after DW_LNE_set_address is
1612 // called. So we check if the lsm passes "pc" here, not in
1613 // DW_LNE_set_address.
1614 if (lsm_passes_pc && lsm->address == pc) {
1615 *lsm_passes_pc = true;
1616 }
1617 }
1618 break;
1619 case DW_LNS_set_file: {
1620 const uint64_t fileno = reader->ReadUnsignedLEB128(start, &templen);
1621 oplen += templen;
1622 lsm->file_num = static_cast<uint32_t>(fileno);
1623 }
1624 break;
1625 case DW_LNS_set_column: {
1626 const uint64_t colno = reader->ReadUnsignedLEB128(start, &templen);
1627 oplen += templen;
1628 lsm->column_num = static_cast<uint32_t>(colno);
1629 }
1630 break;
1631 case DW_LNS_negate_stmt: {
1632 lsm->is_stmt = !lsm->is_stmt;
1633 }
1634 break;
1635 case DW_LNS_set_basic_block: {
1636 lsm->basic_block = true;
1637 }
1638 break;
1639 case DW_LNS_fixed_advance_pc: {
1640 const uint16_t advance_address = reader->ReadTwoBytes(start);
1641 oplen += 2;
1642
1643 // Check if the lsm passes "pc". If so, mark it as passed.
1644 if (lsm_passes_pc &&
1645 lsm->address <= pc && pc < lsm->address + advance_address) {
1646 *lsm_passes_pc = true;
1647 }
1648
1649 lsm->address += advance_address;
1650 }
1651 break;
1652 case DW_LNS_const_add_pc: {
1653 const int64_t advance_address = header.min_insn_length
1654 * ((255 - header.opcode_base)
1655 / header.line_range);
1656
1657 // Check if the lsm passes "pc". If so, mark it as passed.
1658 if (lsm_passes_pc &&
1659 lsm->address <= pc && pc < lsm->address + advance_address) {
1660 *lsm_passes_pc = true;
1661 }
1662
1663 lsm->address += advance_address;
1664 }
1665 break;
1666 case DW_LNS_extended_op: {
1667 const uint64_t extended_op_len = reader->ReadUnsignedLEB128(start,
1668 &templen);
1669 start += templen;
1670 oplen += templen + extended_op_len;
1671
1672 const uint64_t extended_op = reader->ReadOneByte(start);
1673 start++;
1674
1675 switch (extended_op) {
1676 case DW_LNE_end_sequence: {
1677 lsm->end_sequence = true;
1678 *len = oplen;
1679 return true;
1680 }
1681 break;
1682 case DW_LNE_set_address: {
1683 // With gcc 4.2.1, we cannot tell the line_no here since
1684 // DW_LNE_set_address is called before DW_LNS_advance_line is
1685 // called. So we do not check if the lsm passes "pc" here. See
1686 // also the comment in DW_LNS_advance_line.
1687 uint64_t address = reader->ReadAddress(start);
1688 lsm->address = address;
1689 }
1690 break;
1691 case DW_LNE_define_file: {
1692 const char* filename = reinterpret_cast<const char*>(start);
1693
1694 templen = strlen(filename) + 1;
1695 start += templen;
1696
1697 uint64_t dirindex = reader->ReadUnsignedLEB128(start, &templen);
1698 oplen += templen;
1699
1700 const uint64_t mod_time = reader->ReadUnsignedLEB128(start,
1701 &templen);
1702 oplen += templen;
1703
1704 const uint64_t filelength = reader->ReadUnsignedLEB128(start,
1705 &templen);
1706 oplen += templen;
1707
1708 if (handler) {
1709 handler->DefineFile(filename, -1, static_cast<uint32_t>(dirindex),
1710 mod_time, filelength);
1711 }
1712 }
1713 break;
1714 }
1715 }
1716 break;
1717
1718 default: {
1719 // Ignore unknown opcode silently
1720 if (header.std_opcode_lengths) {
1721 for (int i = 0; i < (*header.std_opcode_lengths)[opcode]; i++) {
1722 reader->ReadUnsignedLEB128(start, &templen);
1723 start += templen;
1724 oplen += templen;
1725 }
1726 }
1727 }
1728 break;
1729 }
1730 *len = oplen;
1731 return false;
1732}
1733
1734void LineInfo::ReadLines() {
1735 struct LineStateMachine lsm;
1736
1737 // lengthstart is the place the length field is based on.
1738 // It is the point in the header after the initial length field
1739 const uint8_t* lengthstart = buffer_;
1740
1741 // In 64 bit dwarf, the initial length is 12 bytes, because of the
1742 // 0xffffffff at the start.
1743 if (reader_->OffsetSize() == 8)
1744 lengthstart += 12;
1745 else
1746 lengthstart += 4;
1747
1748 const uint8_t* lineptr = after_header_;
1749 lsm.Reset(header_.default_is_stmt);
1750
1751 // The LineInfoHandler interface expects each line's length along
1752 // with its address, but DWARF only provides addresses (sans
1753 // length), and an end-of-sequence address; one infers the length
1754 // from the next address. So we report a line only when we get the
1755 // next line's address, or the end-of-sequence address.
1756 bool have_pending_line = false;
1757 uint64_t pending_address = 0;
1758 uint32_t pending_file_num = 0, pending_line_num = 0, pending_column_num = 0;
1759
1760 while (lineptr < lengthstart + header_.total_length) {
1761 size_t oplength;
1762 bool add_row = ProcessOneOpcode(reader_, handler_, header_,
1763 lineptr, &lsm, &oplength, (uintptr)-1,
1764 NULL);
1765 if (add_row) {
1766 if (have_pending_line)
1767 handler_->AddLine(pending_address, lsm.address - pending_address,
1768 pending_file_num, pending_line_num,
1769 pending_column_num);
1770 if (lsm.end_sequence) {
1771 lsm.Reset(header_.default_is_stmt);
1772 have_pending_line = false;
1773 } else {
1774 pending_address = lsm.address;
1775 pending_file_num = lsm.file_num;
1776 pending_line_num = lsm.line_num;
1777 pending_column_num = lsm.column_num;
1778 have_pending_line = true;
1779 }
1780 }
1781 lineptr += oplength;
1782 }
1783
1784 after_header_ = lengthstart + header_.total_length;
1785}
1786
1787bool RangeListReader::ReadRanges(enum DwarfForm form, uint64_t data) {
1788 if (form == DW_FORM_sec_offset) {
1789 if (cu_info_->version_ <= 4) {
1790 return ReadDebugRanges(data);
1791 } else {
1792 return ReadDebugRngList(data);
1793 }
1794 } else if (form == DW_FORM_rnglistx) {
1795 offset_array_ = cu_info_->ranges_base_;
1796 uint64_t index_offset = reader_->OffsetSize() * data;
1797 uint64_t range_list_offset =
1798 reader_->ReadOffset(cu_info_->buffer_ + offset_array_ + index_offset);
1799
1800 return ReadDebugRngList(offset_array_ + range_list_offset);
1801 }
1802 return false;
1803}
1804
1805bool RangeListReader::ReadDebugRanges(uint64_t offset) {
1806 const uint64_t max_address =
1807 (reader_->AddressSize() == 4) ? 0xffffffffUL
1808 : 0xffffffffffffffffULL;
1809 const uint64_t entry_size = reader_->AddressSize() * 2;
1810 bool list_end = false;
1811
1812 do {
1813 if (offset > cu_info_->size_ - entry_size) {
1814 return false; // Invalid range detected
1815 }
1816
1817 uint64_t start_address = reader_->ReadAddress(cu_info_->buffer_ + offset);
1818 uint64_t end_address = reader_->ReadAddress(
1819 cu_info_->buffer_ + offset + reader_->AddressSize());
1820
1821 if (start_address == max_address) { // Base address selection
1822 cu_info_->base_address_ = end_address;
1823 } else if (start_address == 0 && end_address == 0) { // End-of-list
1824 handler_->Finish();
1825 list_end = true;
1826 } else { // Add a range entry
1827 handler_->AddRange(start_address + cu_info_->base_address_,
1828 end_address + cu_info_->base_address_);
1829 }
1830
1831 offset += entry_size;
1832 } while (!list_end);
1833
1834 return true;
1835}
1836
1837bool RangeListReader::ReadDebugRngList(uint64_t offset) {
1838 uint64_t start = 0;
1839 uint64_t end = 0;
1840 uint64_t range_len = 0;
1841 uint64_t index = 0;
1842 // A uleb128's length isn't known until after it has been read, so overruns
1843 // are only caught after an entire entry.
1844 while (offset < cu_info_->size_) {
1845 uint8_t entry_type = reader_->ReadOneByte(cu_info_->buffer_ + offset);
1846 offset += 1;
1847 // Handle each entry type per Dwarf 5 Standard, section 2.17.3.
1848 switch (entry_type) {
1849 case DW_RLE_end_of_list:
1850 handler_->Finish();
1851 return true;
1852 case DW_RLE_base_addressx:
1853 offset += ReadULEB(offset, &index);
1854 cu_info_->base_address_ = GetAddressAtIndex(index);
1855 break;
1856 case DW_RLE_startx_endx:
1857 offset += ReadULEB(offset, &index);
1858 start = GetAddressAtIndex(index);
1859 offset += ReadULEB(offset, &index);
1860 end = GetAddressAtIndex(index);
1861 handler_->AddRange(start, end);
1862 break;
1863 case DW_RLE_startx_length:
1864 offset += ReadULEB(offset, &index);
1865 start = GetAddressAtIndex(index);
1866 offset += ReadULEB(offset, &range_len);
1867 handler_->AddRange(start, start + range_len);
1868 break;
1869 case DW_RLE_offset_pair:
1870 offset += ReadULEB(offset, &start);
1871 offset += ReadULEB(offset, &end);
1872 handler_->AddRange(start + cu_info_->base_address_,
1873 end + cu_info_->base_address_);
1874 break;
1875 case DW_RLE_base_address:
1876 offset += ReadAddress(offset, &cu_info_->base_address_);
1877 break;
1878 case DW_RLE_start_end:
1879 offset += ReadAddress(offset, &start);
1880 offset += ReadAddress(offset, &end);
1881 handler_->AddRange(start, end);
1882 break;
1883 case DW_RLE_start_length:
1884 offset += ReadAddress(offset, &start);
1885 offset += ReadULEB(offset, &end);
1886 handler_->AddRange(start, start + end);
1887 break;
1888 }
1889 }
1890 return false;
1891}
1892
1893// A DWARF rule for recovering the address or value of a register, or
1894// computing the canonical frame address. There is one subclass of this for
1895// each '*Rule' member function in CallFrameInfo::Handler.
1896//
1897// It's annoying that we have to handle Rules using pointers (because
1898// the concrete instances can have an arbitrary size). They're small,
1899// so it would be much nicer if we could just handle them by value
1900// instead of fretting about ownership and destruction.
1901//
1902// It seems like all these could simply be instances of std::tr1::bind,
1903// except that we need instances to be EqualityComparable, too.
1904//
1905// This could logically be nested within State, but then the qualified names
1906// get horrendous.
1907class CallFrameInfo::Rule {
1908 public:
1909 virtual ~Rule() { }
1910
1911 // Tell HANDLER that, at ADDRESS in the program, REG can be recovered using
1912 // this rule. If REG is kCFARegister, then this rule describes how to compute
1913 // the canonical frame address. Return what the HANDLER member function
1914 // returned.
1915 virtual bool Handle(Handler* handler,
1916 uint64_t address, int reg) const = 0;
1917
1918 // Equality on rules. We use these to decide which rules we need
1919 // to report after a DW_CFA_restore_state instruction.
1920 virtual bool operator==(const Rule& rhs) const = 0;
1921
1922 bool operator!=(const Rule& rhs) const { return ! (*this == rhs); }
1923
1924 // Return a pointer to a copy of this rule.
1925 virtual Rule* Copy() const = 0;
1926
1927 // If this is a base+offset rule, change its base register to REG.
1928 // Otherwise, do nothing. (Ugly, but required for DW_CFA_def_cfa_register.)
1929 virtual void SetBaseRegister(unsigned reg) { }
1930
1931 // If this is a base+offset rule, change its offset to OFFSET. Otherwise,
1932 // do nothing. (Ugly, but required for DW_CFA_def_cfa_offset.)
1933 virtual void SetOffset(long long offset) { }
1934};
1935
1936// Rule: the value the register had in the caller cannot be recovered.
1937class CallFrameInfo::UndefinedRule: public CallFrameInfo::Rule {
1938 public:
1939 UndefinedRule() { }
1940 ~UndefinedRule() { }
1941 bool Handle(Handler* handler, uint64_t address, int reg) const {
1942 return handler->UndefinedRule(address, reg);
1943 }
1944 bool operator==(const Rule& rhs) const {
1945 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1946 // been carefully considered; cheap RTTI-like workarounds are forbidden.
1947 const UndefinedRule* our_rhs = dynamic_cast<const UndefinedRule*>(&rhs);
1948 return (our_rhs != NULL);
1949 }
1950 Rule* Copy() const { return new UndefinedRule(*this); }
1951};
1952
1953// Rule: the register's value is the same as that it had in the caller.
1954class CallFrameInfo::SameValueRule: public CallFrameInfo::Rule {
1955 public:
1956 SameValueRule() { }
1957 ~SameValueRule() { }
1958 bool Handle(Handler* handler, uint64_t address, int reg) const {
1959 return handler->SameValueRule(address, reg);
1960 }
1961 bool operator==(const Rule& rhs) const {
1962 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1963 // been carefully considered; cheap RTTI-like workarounds are forbidden.
1964 const SameValueRule* our_rhs = dynamic_cast<const SameValueRule*>(&rhs);
1965 return (our_rhs != NULL);
1966 }
1967 Rule* Copy() const { return new SameValueRule(*this); }
1968};
1969
1970// Rule: the register is saved at OFFSET from BASE_REGISTER. BASE_REGISTER
1971// may be CallFrameInfo::Handler::kCFARegister.
1972class CallFrameInfo::OffsetRule: public CallFrameInfo::Rule {
1973 public:
1974 OffsetRule(int base_register, long offset)
1975 : base_register_(base_register), offset_(offset) { }
1976 ~OffsetRule() { }
1977 bool Handle(Handler* handler, uint64_t address, int reg) const {
1978 return handler->OffsetRule(address, reg, base_register_, offset_);
1979 }
1980 bool operator==(const Rule& rhs) const {
1981 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
1982 // been carefully considered; cheap RTTI-like workarounds are forbidden.
1983 const OffsetRule* our_rhs = dynamic_cast<const OffsetRule*>(&rhs);
1984 return (our_rhs &&
1985 base_register_ == our_rhs->base_register_ &&
1986 offset_ == our_rhs->offset_);
1987 }
1988 Rule* Copy() const { return new OffsetRule(*this); }
1989 // We don't actually need SetBaseRegister or SetOffset here, since they
1990 // are only ever applied to CFA rules, for DW_CFA_def_cfa_offset, and it
1991 // doesn't make sense to use OffsetRule for computing the CFA: it
1992 // computes the address at which a register is saved, not a value.
1993 private:
1994 int base_register_;
1995 long offset_;
1996};
1997
1998// Rule: the value the register had in the caller is the value of
1999// BASE_REGISTER plus offset. BASE_REGISTER may be
2000// CallFrameInfo::Handler::kCFARegister.
2001class CallFrameInfo::ValOffsetRule: public CallFrameInfo::Rule {
2002 public:
2003 ValOffsetRule(int base_register, long offset)
2004 : base_register_(base_register), offset_(offset) { }
2005 ~ValOffsetRule() { }
2006 bool Handle(Handler* handler, uint64_t address, int reg) const {
2007 return handler->ValOffsetRule(address, reg, base_register_, offset_);
2008 }
2009 bool operator==(const Rule& rhs) const {
2010 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
2011 // been carefully considered; cheap RTTI-like workarounds are forbidden.
2012 const ValOffsetRule* our_rhs = dynamic_cast<const ValOffsetRule*>(&rhs);
2013 return (our_rhs &&
2014 base_register_ == our_rhs->base_register_ &&
2015 offset_ == our_rhs->offset_);
2016 }
2017 Rule* Copy() const { return new ValOffsetRule(*this); }
2018 void SetBaseRegister(unsigned reg) { base_register_ = reg; }
2019 void SetOffset(long long offset) { offset_ = offset; }
2020 private:
2021 int base_register_;
2022 long offset_;
2023};
2024
2025// Rule: the register has been saved in another register REGISTER_NUMBER_.
2026class CallFrameInfo::RegisterRule: public CallFrameInfo::Rule {
2027 public:
2028 explicit RegisterRule(int register_number)
2029 : register_number_(register_number) { }
2030 ~RegisterRule() { }
2031 bool Handle(Handler* handler, uint64_t address, int reg) const {
2032 return handler->RegisterRule(address, reg, register_number_);
2033 }
2034 bool operator==(const Rule& rhs) const {
2035 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
2036 // been carefully considered; cheap RTTI-like workarounds are forbidden.
2037 const RegisterRule* our_rhs = dynamic_cast<const RegisterRule*>(&rhs);
2038 return (our_rhs && register_number_ == our_rhs->register_number_);
2039 }
2040 Rule* Copy() const { return new RegisterRule(*this); }
2041 private:
2042 int register_number_;
2043};
2044
2045// Rule: EXPRESSION evaluates to the address at which the register is saved.
2046class CallFrameInfo::ExpressionRule: public CallFrameInfo::Rule {
2047 public:
2048 explicit ExpressionRule(const string& expression)
2049 : expression_(expression) { }
2050 ~ExpressionRule() { }
2051 bool Handle(Handler* handler, uint64_t address, int reg) const {
2052 return handler->ExpressionRule(address, reg, expression_);
2053 }
2054 bool operator==(const Rule& rhs) const {
2055 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
2056 // been carefully considered; cheap RTTI-like workarounds are forbidden.
2057 const ExpressionRule* our_rhs = dynamic_cast<const ExpressionRule*>(&rhs);
2058 return (our_rhs && expression_ == our_rhs->expression_);
2059 }
2060 Rule* Copy() const { return new ExpressionRule(*this); }
2061 private:
2062 string expression_;
2063};
2064
2065// Rule: EXPRESSION evaluates to the address at which the register is saved.
2066class CallFrameInfo::ValExpressionRule: public CallFrameInfo::Rule {
2067 public:
2068 explicit ValExpressionRule(const string& expression)
2069 : expression_(expression) { }
2070 ~ValExpressionRule() { }
2071 bool Handle(Handler* handler, uint64_t address, int reg) const {
2072 return handler->ValExpressionRule(address, reg, expression_);
2073 }
2074 bool operator==(const Rule& rhs) const {
2075 // dynamic_cast is allowed by the Google C++ Style Guide, if the use has
2076 // been carefully considered; cheap RTTI-like workarounds are forbidden.
2077 const ValExpressionRule* our_rhs =
2078 dynamic_cast<const ValExpressionRule*>(&rhs);
2079 return (our_rhs && expression_ == our_rhs->expression_);
2080 }
2081 Rule* Copy() const { return new ValExpressionRule(*this); }
2082 private:
2083 string expression_;
2084};
2085
2086// A map from register numbers to rules.
2087class CallFrameInfo::RuleMap {
2088 public:
2089 RuleMap() : cfa_rule_(NULL) { }
2090 RuleMap(const RuleMap& rhs) : cfa_rule_(NULL) { *this = rhs; }
2091 ~RuleMap() { Clear(); }
2092
2093 RuleMap& operator=(const RuleMap& rhs);
2094
2095 // Set the rule for computing the CFA to RULE. Take ownership of RULE.
2096 void SetCFARule(Rule* rule) { delete cfa_rule_; cfa_rule_ = rule; }
2097
2098 // Return the current CFA rule. Unlike RegisterRule, this RuleMap retains
2099 // ownership of the rule. We use this for DW_CFA_def_cfa_offset and
2100 // DW_CFA_def_cfa_register, and for detecting references to the CFA before
2101 // a rule for it has been established.
2102 Rule* CFARule() const { return cfa_rule_; }
2103
2104 // Return the rule for REG, or NULL if there is none. The caller takes
2105 // ownership of the result.
2106 Rule* RegisterRule(int reg) const;
2107
2108 // Set the rule for computing REG to RULE. Take ownership of RULE.
2109 void SetRegisterRule(int reg, Rule* rule);
2110
2111 // Make all the appropriate calls to HANDLER as if we were changing from
2112 // this RuleMap to NEW_RULES at ADDRESS. We use this to implement
2113 // DW_CFA_restore_state, where lots of rules can change simultaneously.
2114 // Return true if all handlers returned true; otherwise, return false.
2115 bool HandleTransitionTo(Handler* handler, uint64_t address,
2116 const RuleMap& new_rules) const;
2117
2118 private:
2119 // A map from register numbers to Rules.
2120 typedef std::map<int, Rule*> RuleByNumber;
2121
2122 // Remove all register rules and clear cfa_rule_.
2123 void Clear();
2124
2125 // The rule for computing the canonical frame address. This RuleMap owns
2126 // this rule.
2127 Rule* cfa_rule_;
2128
2129 // A map from register numbers to postfix expressions to recover
2130 // their values. This RuleMap owns the Rules the map refers to.
2131 RuleByNumber registers_;
2132};
2133
2134CallFrameInfo::RuleMap& CallFrameInfo::RuleMap::operator=(const RuleMap& rhs) {
2135 Clear();
2136 // Since each map owns the rules it refers to, assignment must copy them.
2137 if (rhs.cfa_rule_) cfa_rule_ = rhs.cfa_rule_->Copy();
2138 for (RuleByNumber::const_iterator it = rhs.registers_.begin();
2139 it != rhs.registers_.end(); it++)
2140 registers_[it->first] = it->second->Copy();
2141 return *this;
2142}
2143
2144CallFrameInfo::Rule* CallFrameInfo::RuleMap::RegisterRule(int reg) const {
2145 assert(reg != Handler::kCFARegister);
2146 RuleByNumber::const_iterator it = registers_.find(reg);
2147 if (it != registers_.end())
2148 return it->second->Copy();
2149 else
2150 return NULL;
2151}
2152
2153void CallFrameInfo::RuleMap::SetRegisterRule(int reg, Rule* rule) {
2154 assert(reg != Handler::kCFARegister);
2155 assert(rule);
2156 Rule** slot = &registers_[reg];
2157 delete *slot;
2158 *slot = rule;
2159}
2160
2161bool CallFrameInfo::RuleMap::HandleTransitionTo(
2162 Handler* handler,
2163 uint64_t address,
2164 const RuleMap& new_rules) const {
2165 // Transition from cfa_rule_ to new_rules.cfa_rule_.
2166 if (cfa_rule_ && new_rules.cfa_rule_) {
2167 if (*cfa_rule_ != *new_rules.cfa_rule_ &&
2168 !new_rules.cfa_rule_->Handle(handler, address,
2169 Handler::kCFARegister))
2170 return false;
2171 } else if (cfa_rule_) {
2172 // this RuleMap has a CFA rule but new_rules doesn't.
2173 // CallFrameInfo::Handler has no way to handle this --- and shouldn't;
2174 // it's garbage input. The instruction interpreter should have
2175 // detected this and warned, so take no action here.
2176 } else if (new_rules.cfa_rule_) {
2177 // This shouldn't be possible: NEW_RULES is some prior state, and
2178 // there's no way to remove entries.
2179 assert(0);
2180 } else {
2181 // Both CFA rules are empty. No action needed.
2182 }
2183
2184 // Traverse the two maps in order by register number, and report
2185 // whatever differences we find.
2186 RuleByNumber::const_iterator old_it = registers_.begin();
2187 RuleByNumber::const_iterator new_it = new_rules.registers_.begin();
2188 while (old_it != registers_.end() && new_it != new_rules.registers_.end()) {
2189 if (old_it->first < new_it->first) {
2190 // This RuleMap has an entry for old_it->first, but NEW_RULES
2191 // doesn't.
2192 //
2193 // This isn't really the right thing to do, but since CFI generally
2194 // only mentions callee-saves registers, and GCC's convention for
2195 // callee-saves registers is that they are unchanged, it's a good
2196 // approximation.
2197 if (!handler->SameValueRule(address, old_it->first))
2198 return false;
2199 old_it++;
2200 } else if (old_it->first > new_it->first) {
2201 // NEW_RULES has entry for new_it->first, but this RuleMap
2202 // doesn't. This shouldn't be possible: NEW_RULES is some prior
2203 // state, and there's no way to remove entries.
2204 assert(0);
2205 } else {
2206 // Both maps have an entry for this register. Report the new
2207 // rule if it is different.
2208 if (*old_it->second != *new_it->second &&
2209 !new_it->second->Handle(handler, address, new_it->first))
2210 return false;
2211 new_it++, old_it++;
2212 }
2213 }
2214 // Finish off entries from this RuleMap with no counterparts in new_rules.
2215 while (old_it != registers_.end()) {
2216 if (!handler->SameValueRule(address, old_it->first))
2217 return false;
2218 old_it++;
2219 }
2220 // Since we only make transitions from a rule set to some previously
2221 // saved rule set, and we can only add rules to the map, NEW_RULES
2222 // must have fewer rules than *this.
2223 assert(new_it == new_rules.registers_.end());
2224
2225 return true;
2226}
2227
2228// Remove all register rules and clear cfa_rule_.
2229void CallFrameInfo::RuleMap::Clear() {
2230 delete cfa_rule_;
2231 cfa_rule_ = NULL;
2232 for (RuleByNumber::iterator it = registers_.begin();
2233 it != registers_.end(); it++)
2234 delete it->second;
2235 registers_.clear();
2236}
2237
2238// The state of the call frame information interpreter as it processes
2239// instructions from a CIE and FDE.
2240class CallFrameInfo::State {
2241 public:
2242 // Create a call frame information interpreter state with the given
2243 // reporter, reader, handler, and initial call frame info address.
2244 State(ByteReader* reader, Handler* handler, Reporter* reporter,
2245 uint64_t address)
2246 : reader_(reader), handler_(handler), reporter_(reporter),
2247 address_(address), entry_(NULL), cursor_(NULL) { }
2248
2249 // Interpret instructions from CIE, save the resulting rule set for
2250 // DW_CFA_restore instructions, and return true. On error, report
2251 // the problem to reporter_ and return false.
2252 bool InterpretCIE(const CIE& cie);
2253
2254 // Interpret instructions from FDE, and return true. On error,
2255 // report the problem to reporter_ and return false.
2256 bool InterpretFDE(const FDE& fde);
2257
2258 private:
2259 // The operands of a CFI instruction, for ParseOperands.
2260 struct Operands {
2261 unsigned register_number; // A register number.
2262 uint64_t offset; // An offset or address.
2263 long signed_offset; // A signed offset.
2264 string expression; // A DWARF expression.
2265 };
2266
2267 // Parse CFI instruction operands from STATE's instruction stream as
2268 // described by FORMAT. On success, populate OPERANDS with the
2269 // results, and return true. On failure, report the problem and
2270 // return false.
2271 //
2272 // Each character of FORMAT should be one of the following:
2273 //
2274 // 'r' unsigned LEB128 register number (OPERANDS->register_number)
2275 // 'o' unsigned LEB128 offset (OPERANDS->offset)
2276 // 's' signed LEB128 offset (OPERANDS->signed_offset)
2277 // 'a' machine-size address (OPERANDS->offset)
2278 // (If the CIE has a 'z' augmentation string, 'a' uses the
2279 // encoding specified by the 'R' argument.)
2280 // '1' a one-byte offset (OPERANDS->offset)
2281 // '2' a two-byte offset (OPERANDS->offset)
2282 // '4' a four-byte offset (OPERANDS->offset)
2283 // '8' an eight-byte offset (OPERANDS->offset)
2284 // 'e' a DW_FORM_block holding a (OPERANDS->expression)
2285 // DWARF expression
2286 bool ParseOperands(const char* format, Operands* operands);
2287
2288 // Interpret one CFI instruction from STATE's instruction stream, update
2289 // STATE, report any rule changes to handler_, and return true. On
2290 // failure, report the problem and return false.
2291 bool DoInstruction();
2292
2293 // The following Do* member functions are subroutines of DoInstruction,
2294 // factoring out the actual work of operations that have several
2295 // different encodings.
2296
2297 // Set the CFA rule to be the value of BASE_REGISTER plus OFFSET, and
2298 // return true. On failure, report and return false. (Used for
2299 // DW_CFA_def_cfa and DW_CFA_def_cfa_sf.)
2300 bool DoDefCFA(unsigned base_register, long offset);
2301
2302 // Change the offset of the CFA rule to OFFSET, and return true. On
2303 // failure, report and return false. (Subroutine for
2304 // DW_CFA_def_cfa_offset and DW_CFA_def_cfa_offset_sf.)
2305 bool DoDefCFAOffset(long offset);
2306
2307 // Specify that REG can be recovered using RULE, and return true. On
2308 // failure, report and return false.
2309 bool DoRule(unsigned reg, Rule* rule);
2310
2311 // Specify that REG can be found at OFFSET from the CFA, and return true.
2312 // On failure, report and return false. (Subroutine for DW_CFA_offset,
2313 // DW_CFA_offset_extended, and DW_CFA_offset_extended_sf.)
2314 bool DoOffset(unsigned reg, long offset);
2315
2316 // Specify that the caller's value for REG is the CFA plus OFFSET,
2317 // and return true. On failure, report and return false. (Subroutine
2318 // for DW_CFA_val_offset and DW_CFA_val_offset_sf.)
2319 bool DoValOffset(unsigned reg, long offset);
2320
2321 // Restore REG to the rule established in the CIE, and return true. On
2322 // failure, report and return false. (Subroutine for DW_CFA_restore and
2323 // DW_CFA_restore_extended.)
2324 bool DoRestore(unsigned reg);
2325
2326 // Return the section offset of the instruction at cursor. For use
2327 // in error messages.
2328 uint64_t CursorOffset() { return entry_->offset + (cursor_ - entry_->start); }
2329
2330 // Report that entry_ is incomplete, and return false. For brevity.
2331 bool ReportIncomplete() {
2332 reporter_->Incomplete(entry_->offset, entry_->kind);
2333 return false;
2334 }
2335
2336 // For reading multi-byte values with the appropriate endianness.
2337 ByteReader* reader_;
2338
2339 // The handler to which we should report the data we find.
2340 Handler* handler_;
2341
2342 // For reporting problems in the info we're parsing.
2343 Reporter* reporter_;
2344
2345 // The code address to which the next instruction in the stream applies.
2346 uint64_t address_;
2347
2348 // The entry whose instructions we are currently processing. This is
2349 // first a CIE, and then an FDE.
2350 const Entry* entry_;
2351
2352 // The next instruction to process.
2353 const uint8_t* cursor_;
2354
2355 // The current set of rules.
2356 RuleMap rules_;
2357
2358 // The set of rules established by the CIE, used by DW_CFA_restore
2359 // and DW_CFA_restore_extended. We set this after interpreting the
2360 // CIE's instructions.
2361 RuleMap cie_rules_;
2362
2363 // A stack of saved states, for DW_CFA_remember_state and
2364 // DW_CFA_restore_state.
2365 std::stack<RuleMap> saved_rules_;
2366};
2367
2368bool CallFrameInfo::State::InterpretCIE(const CIE& cie) {
2369 entry_ = &cie;
2370 cursor_ = entry_->instructions;
2371 while (cursor_ < entry_->end)
2372 if (!DoInstruction())
2373 return false;
2374 // Note the rules established by the CIE, for use by DW_CFA_restore
2375 // and DW_CFA_restore_extended.
2376 cie_rules_ = rules_;
2377 return true;
2378}
2379
2380bool CallFrameInfo::State::InterpretFDE(const FDE& fde) {
2381 entry_ = &fde;
2382 cursor_ = entry_->instructions;
2383 while (cursor_ < entry_->end)
2384 if (!DoInstruction())
2385 return false;
2386 return true;
2387}
2388
2389bool CallFrameInfo::State::ParseOperands(const char* format,
2390 Operands* operands) {
2391 size_t len;
2392 const char* operand;
2393
2394 for (operand = format; *operand; operand++) {
2395 size_t bytes_left = entry_->end - cursor_;
2396 switch (*operand) {
2397 case 'r':
2398 operands->register_number = reader_->ReadUnsignedLEB128(cursor_, &len);
2399 if (len > bytes_left) return ReportIncomplete();
2400 cursor_ += len;
2401 break;
2402
2403 case 'o':
2404 operands->offset = reader_->ReadUnsignedLEB128(cursor_, &len);
2405 if (len > bytes_left) return ReportIncomplete();
2406 cursor_ += len;
2407 break;
2408
2409 case 's':
2410 operands->signed_offset = reader_->ReadSignedLEB128(cursor_, &len);
2411 if (len > bytes_left) return ReportIncomplete();
2412 cursor_ += len;
2413 break;
2414
2415 case 'a':
2416 operands->offset =
2417 reader_->ReadEncodedPointer(cursor_, entry_->cie->pointer_encoding,
2418 &len);
2419 if (len > bytes_left) return ReportIncomplete();
2420 cursor_ += len;
2421 break;
2422
2423 case '1':
2424 if (1 > bytes_left) return ReportIncomplete();
2425 operands->offset = static_cast<unsigned char>(*cursor_++);
2426 break;
2427
2428 case '2':
2429 if (2 > bytes_left) return ReportIncomplete();
2430 operands->offset = reader_->ReadTwoBytes(cursor_);
2431 cursor_ += 2;
2432 break;
2433
2434 case '4':
2435 if (4 > bytes_left) return ReportIncomplete();
2436 operands->offset = reader_->ReadFourBytes(cursor_);
2437 cursor_ += 4;
2438 break;
2439
2440 case '8':
2441 if (8 > bytes_left) return ReportIncomplete();
2442 operands->offset = reader_->ReadEightBytes(cursor_);
2443 cursor_ += 8;
2444 break;
2445
2446 case 'e': {
2447 size_t expression_length = reader_->ReadUnsignedLEB128(cursor_, &len);
2448 if (len > bytes_left || expression_length > bytes_left - len)
2449 return ReportIncomplete();
2450 cursor_ += len;
2451 operands->expression = string(reinterpret_cast<const char*>(cursor_),
2452 expression_length);
2453 cursor_ += expression_length;
2454 break;
2455 }
2456
2457 default:
2458 assert(0);
2459 }
2460 }
2461
2462 return true;
2463}
2464
2465bool CallFrameInfo::State::DoInstruction() {
2466 CIE* cie = entry_->cie;
2467 Operands ops;
2468
2469 // Our entry's kind should have been set by now.
2470 assert(entry_->kind != kUnknown);
2471
2472 // We shouldn't have been invoked unless there were more
2473 // instructions to parse.
2474 assert(cursor_ < entry_->end);
2475
2476 unsigned opcode = *cursor_++;
2477 if ((opcode & 0xc0) != 0) {
2478 switch (opcode & 0xc0) {
2479 // Advance the address.
2480 case DW_CFA_advance_loc: {
2481 size_t code_offset = opcode & 0x3f;
2482 address_ += code_offset * cie->code_alignment_factor;
2483 break;
2484 }
2485
2486 // Find a register at an offset from the CFA.
2487 case DW_CFA_offset:
2488 if (!ParseOperands("o", &ops) ||
2489 !DoOffset(opcode & 0x3f, ops.offset * cie->data_alignment_factor))
2490 return false;
2491 break;
2492
2493 // Restore the rule established for a register by the CIE.
2494 case DW_CFA_restore:
2495 if (!DoRestore(opcode & 0x3f)) return false;
2496 break;
2497
2498 // The 'if' above should have excluded this possibility.
2499 default:
2500 assert(0);
2501 }
2502
2503 // Return here, so the big switch below won't be indented.
2504 return true;
2505 }
2506
2507 switch (opcode) {
2508 // Set the address.
2509 case DW_CFA_set_loc:
2510 if (!ParseOperands("a", &ops)) return false;
2511 address_ = ops.offset;
2512 break;
2513
2514 // Advance the address.
2515 case DW_CFA_advance_loc1:
2516 if (!ParseOperands("1", &ops)) return false;
2517 address_ += ops.offset * cie->code_alignment_factor;
2518 break;
2519
2520 // Advance the address.
2521 case DW_CFA_advance_loc2:
2522 if (!ParseOperands("2", &ops)) return false;
2523 address_ += ops.offset * cie->code_alignment_factor;
2524 break;
2525
2526 // Advance the address.
2527 case DW_CFA_advance_loc4:
2528 if (!ParseOperands("4", &ops)) return false;
2529 address_ += ops.offset * cie->code_alignment_factor;
2530 break;
2531
2532 // Advance the address.
2533 case DW_CFA_MIPS_advance_loc8:
2534 if (!ParseOperands("8", &ops)) return false;
2535 address_ += ops.offset * cie->code_alignment_factor;
2536 break;
2537
2538 // Compute the CFA by adding an offset to a register.
2539 case DW_CFA_def_cfa:
2540 if (!ParseOperands("ro", &ops) ||
2541 !DoDefCFA(ops.register_number, ops.offset))
2542 return false;
2543 break;
2544
2545 // Compute the CFA by adding an offset to a register.
2546 case DW_CFA_def_cfa_sf:
2547 if (!ParseOperands("rs", &ops) ||
2548 !DoDefCFA(ops.register_number,
2549 ops.signed_offset * cie->data_alignment_factor))
2550 return false;
2551 break;
2552
2553 // Change the base register used to compute the CFA.
2554 case DW_CFA_def_cfa_register: {
2555 if (!ParseOperands("r", &ops)) return false;
2556 Rule* cfa_rule = rules_.CFARule();
2557 if (!cfa_rule) {
2558 if (!DoDefCFA(ops.register_number, ops.offset)) {
2559 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2560 return false;
2561 }
2562 } else {
2563 cfa_rule->SetBaseRegister(ops.register_number);
2564 if (!cfa_rule->Handle(handler_, address_,
2565 Handler::kCFARegister))
2566 return false;
2567 }
2568 break;
2569 }
2570
2571 // Change the offset used to compute the CFA.
2572 case DW_CFA_def_cfa_offset:
2573 if (!ParseOperands("o", &ops) ||
2574 !DoDefCFAOffset(ops.offset))
2575 return false;
2576 break;
2577
2578 // Change the offset used to compute the CFA.
2579 case DW_CFA_def_cfa_offset_sf:
2580 if (!ParseOperands("s", &ops) ||
2581 !DoDefCFAOffset(ops.signed_offset * cie->data_alignment_factor))
2582 return false;
2583 break;
2584
2585 // Specify an expression whose value is the CFA.
2586 case DW_CFA_def_cfa_expression: {
2587 if (!ParseOperands("e", &ops))
2588 return false;
2589 Rule* rule = new ValExpressionRule(ops.expression);
2590 rules_.SetCFARule(rule);
2591 if (!rule->Handle(handler_, address_,
2592 Handler::kCFARegister))
2593 return false;
2594 break;
2595 }
2596
2597 // The register's value cannot be recovered.
2598 case DW_CFA_undefined: {
2599 if (!ParseOperands("r", &ops) ||
2600 !DoRule(ops.register_number, new UndefinedRule()))
2601 return false;
2602 break;
2603 }
2604
2605 // The register's value is unchanged from its value in the caller.
2606 case DW_CFA_same_value: {
2607 if (!ParseOperands("r", &ops) ||
2608 !DoRule(ops.register_number, new SameValueRule()))
2609 return false;
2610 break;
2611 }
2612
2613 // Find a register at an offset from the CFA.
2614 case DW_CFA_offset_extended:
2615 if (!ParseOperands("ro", &ops) ||
2616 !DoOffset(ops.register_number,
2617 ops.offset * cie->data_alignment_factor))
2618 return false;
2619 break;
2620
2621 // The register is saved at an offset from the CFA.
2622 case DW_CFA_offset_extended_sf:
2623 if (!ParseOperands("rs", &ops) ||
2624 !DoOffset(ops.register_number,
2625 ops.signed_offset * cie->data_alignment_factor))
2626 return false;
2627 break;
2628
2629 // The register is saved at an offset from the CFA.
2630 case DW_CFA_GNU_negative_offset_extended:
2631 if (!ParseOperands("ro", &ops) ||
2632 !DoOffset(ops.register_number,
2633 -ops.offset * cie->data_alignment_factor))
2634 return false;
2635 break;
2636
2637 // The register's value is the sum of the CFA plus an offset.
2638 case DW_CFA_val_offset:
2639 if (!ParseOperands("ro", &ops) ||
2640 !DoValOffset(ops.register_number,
2641 ops.offset * cie->data_alignment_factor))
2642 return false;
2643 break;
2644
2645 // The register's value is the sum of the CFA plus an offset.
2646 case DW_CFA_val_offset_sf:
2647 if (!ParseOperands("rs", &ops) ||
2648 !DoValOffset(ops.register_number,
2649 ops.signed_offset * cie->data_alignment_factor))
2650 return false;
2651 break;
2652
2653 // The register has been saved in another register.
2654 case DW_CFA_register: {
2655 if (!ParseOperands("ro", &ops) ||
2656 !DoRule(ops.register_number, new RegisterRule(ops.offset)))
2657 return false;
2658 break;
2659 }
2660
2661 // An expression yields the address at which the register is saved.
2662 case DW_CFA_expression: {
2663 if (!ParseOperands("re", &ops) ||
2664 !DoRule(ops.register_number, new ExpressionRule(ops.expression)))
2665 return false;
2666 break;
2667 }
2668
2669 // An expression yields the caller's value for the register.
2670 case DW_CFA_val_expression: {
2671 if (!ParseOperands("re", &ops) ||
2672 !DoRule(ops.register_number, new ValExpressionRule(ops.expression)))
2673 return false;
2674 break;
2675 }
2676
2677 // Restore the rule established for a register by the CIE.
2678 case DW_CFA_restore_extended:
2679 if (!ParseOperands("r", &ops) ||
2680 !DoRestore( ops.register_number))
2681 return false;
2682 break;
2683
2684 // Save the current set of rules on a stack.
2685 case DW_CFA_remember_state:
2686 saved_rules_.push(rules_);
2687 break;
2688
2689 // Pop the current set of rules off the stack.
2690 case DW_CFA_restore_state: {
2691 if (saved_rules_.empty()) {
2692 reporter_->EmptyStateStack(entry_->offset, entry_->kind,
2693 CursorOffset());
2694 return false;
2695 }
2696 const RuleMap& new_rules = saved_rules_.top();
2697 if (rules_.CFARule() && !new_rules.CFARule()) {
2698 reporter_->ClearingCFARule(entry_->offset, entry_->kind,
2699 CursorOffset());
2700 return false;
2701 }
2702 rules_.HandleTransitionTo(handler_, address_, new_rules);
2703 rules_ = new_rules;
2704 saved_rules_.pop();
2705 break;
2706 }
2707
2708 // No operation. (Padding instruction.)
2709 case DW_CFA_nop:
2710 break;
2711
2712 // A SPARC register window save: Registers 8 through 15 (%o0-%o7)
2713 // are saved in registers 24 through 31 (%i0-%i7), and registers
2714 // 16 through 31 (%l0-%l7 and %i0-%i7) are saved at CFA offsets
2715 // (0-15 * the register size). The register numbers must be
2716 // hard-coded. A GNU extension, and not a pretty one.
2717 case DW_CFA_GNU_window_save: {
2718 // Save %o0-%o7 in %i0-%i7.
2719 for (int i = 8; i < 16; i++)
2720 if (!DoRule(i, new RegisterRule(i + 16)))
2721 return false;
2722 // Save %l0-%l7 and %i0-%i7 at the CFA.
2723 for (int i = 16; i < 32; i++)
2724 // Assume that the byte reader's address size is the same as
2725 // the architecture's register size. !@#%*^ hilarious.
2726 if (!DoRule(i, new OffsetRule(Handler::kCFARegister,
2727 (i - 16) * reader_->AddressSize())))
2728 return false;
2729 break;
2730 }
2731
2732 // I'm not sure what this is. GDB doesn't use it for unwinding.
2733 case DW_CFA_GNU_args_size:
2734 if (!ParseOperands("o", &ops)) return false;
2735 break;
2736
2737 // An opcode we don't recognize.
2738 default: {
2739 reporter_->BadInstruction(entry_->offset, entry_->kind, CursorOffset());
2740 return false;
2741 }
2742 }
2743
2744 return true;
2745}
2746
2747bool CallFrameInfo::State::DoDefCFA(unsigned base_register, long offset) {
2748 Rule* rule = new ValOffsetRule(base_register, offset);
2749 rules_.SetCFARule(rule);
2750 return rule->Handle(handler_, address_,
2751 Handler::kCFARegister);
2752}
2753
2754bool CallFrameInfo::State::DoDefCFAOffset(long offset) {
2755 Rule* cfa_rule = rules_.CFARule();
2756 if (!cfa_rule) {
2757 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2758 return false;
2759 }
2760 cfa_rule->SetOffset(offset);
2761 return cfa_rule->Handle(handler_, address_,
2762 Handler::kCFARegister);
2763}
2764
2765bool CallFrameInfo::State::DoRule(unsigned reg, Rule* rule) {
2766 rules_.SetRegisterRule(reg, rule);
2767 return rule->Handle(handler_, address_, reg);
2768}
2769
2770bool CallFrameInfo::State::DoOffset(unsigned reg, long offset) {
2771 if (!rules_.CFARule()) {
2772 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2773 return false;
2774 }
2775 return DoRule(reg,
2776 new OffsetRule(Handler::kCFARegister, offset));
2777}
2778
2779bool CallFrameInfo::State::DoValOffset(unsigned reg, long offset) {
2780 if (!rules_.CFARule()) {
2781 reporter_->NoCFARule(entry_->offset, entry_->kind, CursorOffset());
2782 return false;
2783 }
2784 return DoRule(reg,
2785 new ValOffsetRule(Handler::kCFARegister, offset));
2786}
2787
2788bool CallFrameInfo::State::DoRestore(unsigned reg) {
2789 // DW_CFA_restore and DW_CFA_restore_extended don't make sense in a CIE.
2790 if (entry_->kind == kCIE) {
2791 reporter_->RestoreInCIE(entry_->offset, CursorOffset());
2792 return false;
2793 }
2794 Rule* rule = cie_rules_.RegisterRule(reg);
2795 if (!rule) {
2796 // This isn't really the right thing to do, but since CFI generally
2797 // only mentions callee-saves registers, and GCC's convention for
2798 // callee-saves registers is that they are unchanged, it's a good
2799 // approximation.
2800 rule = new SameValueRule();
2801 }
2802 return DoRule(reg, rule);
2803}
2804
2805bool CallFrameInfo::ReadEntryPrologue(const uint8_t* cursor, Entry* entry) {
2806 const uint8_t* buffer_end = buffer_ + buffer_length_;
2807
2808 // Initialize enough of ENTRY for use in error reporting.
2809 entry->offset = cursor - buffer_;
2810 entry->start = cursor;
2811 entry->kind = kUnknown;
2812 entry->end = NULL;
2813
2814 // Read the initial length. This sets reader_'s offset size.
2815 size_t length_size;
2816 uint64_t length = reader_->ReadInitialLength(cursor, &length_size);
2817 if (length_size > size_t(buffer_end - cursor))
2818 return ReportIncomplete(entry);
2819 cursor += length_size;
2820
2821 // In a .eh_frame section, a length of zero marks the end of the series
2822 // of entries.
2823 if (length == 0 && eh_frame_) {
2824 entry->kind = kTerminator;
2825 entry->end = cursor;
2826 return true;
2827 }
2828
2829 // Validate the length.
2830 if (length > size_t(buffer_end - cursor))
2831 return ReportIncomplete(entry);
2832
2833 // The length is the number of bytes after the initial length field;
2834 // we have that position handy at this point, so compute the end
2835 // now. (If we're parsing 64-bit-offset DWARF on a 32-bit machine,
2836 // and the length didn't fit in a size_t, we would have rejected it
2837 // above.)
2838 entry->end = cursor + length;
2839
2840 // Parse the next field: either the offset of a CIE or a CIE id.
2841 size_t offset_size = reader_->OffsetSize();
2842 if (offset_size > size_t(entry->end - cursor)) return ReportIncomplete(entry);
2843 entry->id = reader_->ReadOffset(cursor);
2844
2845 // Don't advance cursor past id field yet; in .eh_frame data we need
2846 // the id's position to compute the section offset of an FDE's CIE.
2847
2848 // Now we can decide what kind of entry this is.
2849 if (eh_frame_) {
2850 // In .eh_frame data, an ID of zero marks the entry as a CIE, and
2851 // anything else is an offset from the id field of the FDE to the start
2852 // of the CIE.
2853 if (entry->id == 0) {
2854 entry->kind = kCIE;
2855 } else {
2856 entry->kind = kFDE;
2857 // Turn the offset from the id into an offset from the buffer's start.
2858 entry->id = (cursor - buffer_) - entry->id;
2859 }
2860 } else {
2861 // In DWARF CFI data, an ID of ~0 (of the appropriate width, given the
2862 // offset size for the entry) marks the entry as a CIE, and anything
2863 // else is the offset of the CIE from the beginning of the section.
2864 if (offset_size == 4)
2865 entry->kind = (entry->id == 0xffffffff) ? kCIE : kFDE;
2866 else {
2867 assert(offset_size == 8);
2868 entry->kind = (entry->id == 0xffffffffffffffffULL) ? kCIE : kFDE;
2869 }
2870 }
2871
2872 // Now advance cursor past the id.
2873 cursor += offset_size;
2874
2875 // The fields specific to this kind of entry start here.
2876 entry->fields = cursor;
2877
2878 entry->cie = NULL;
2879
2880 return true;
2881}
2882
2883bool CallFrameInfo::ReadCIEFields(CIE* cie) {
2884 const uint8_t* cursor = cie->fields;
2885 size_t len;
2886
2887 assert(cie->kind == kCIE);
2888
2889 // Prepare for early exit.
2890 cie->version = 0;
2891 cie->augmentation.clear();
2892 cie->code_alignment_factor = 0;
2893 cie->data_alignment_factor = 0;
2894 cie->return_address_register = 0;
2895 cie->has_z_augmentation = false;
2896 cie->pointer_encoding = DW_EH_PE_absptr;
2897 cie->instructions = 0;
2898
2899 // Parse the version number.
2900 if (cie->end - cursor < 1)
2901 return ReportIncomplete(cie);
2902 cie->version = reader_->ReadOneByte(cursor);
2903 cursor++;
2904
2905 // If we don't recognize the version, we can't parse any more fields of the
2906 // CIE. For DWARF CFI, we handle versions 1 through 4 (there was never a
2907 // version 2 of CFI data). For .eh_frame, we handle versions 1 and 4 as well;
2908 // the difference between those versions seems to be the same as for
2909 // .debug_frame.
2910 if (cie->version < 1 || cie->version > 4) {
2911 reporter_->UnrecognizedVersion(cie->offset, cie->version);
2912 return false;
2913 }
2914
2915 const uint8_t* augmentation_start = cursor;
2916 const uint8_t* augmentation_end =
2917 reinterpret_cast<const uint8_t*>(memchr(augmentation_start, '\0',
2918 cie->end - augmentation_start));
2919 if (! augmentation_end) return ReportIncomplete(cie);
2920 cursor = augmentation_end;
2921 cie->augmentation = string(reinterpret_cast<const char*>(augmentation_start),
2922 cursor - augmentation_start);
2923 // Skip the terminating '\0'.
2924 cursor++;
2925
2926 // Is this CFI augmented?
2927 if (!cie->augmentation.empty()) {
2928 // Is it an augmentation we recognize?
2929 if (cie->augmentation[0] == DW_Z_augmentation_start) {
2930 // Linux C++ ABI 'z' augmentation, used for exception handling data.
2931 cie->has_z_augmentation = true;
2932 } else {
2933 // Not an augmentation we recognize. Augmentations can have arbitrary
2934 // effects on the form of rest of the content, so we have to give up.
2935 reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
2936 return false;
2937 }
2938 }
2939
2940 if (cie->version >= 4) {
2941 cie->address_size = *cursor++;
2942 if (cie->address_size != 8 && cie->address_size != 4) {
2943 reporter_->UnexpectedAddressSize(cie->offset, cie->address_size);
2944 return false;
2945 }
2946
2947 cie->segment_size = *cursor++;
2948 if (cie->segment_size != 0) {
2949 reporter_->UnexpectedSegmentSize(cie->offset, cie->segment_size);
2950 return false;
2951 }
2952 }
2953
2954 // Parse the code alignment factor.
2955 cie->code_alignment_factor = reader_->ReadUnsignedLEB128(cursor, &len);
2956 if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
2957 cursor += len;
2958
2959 // Parse the data alignment factor.
2960 cie->data_alignment_factor = reader_->ReadSignedLEB128(cursor, &len);
2961 if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
2962 cursor += len;
2963
2964 // Parse the return address register. This is a ubyte in version 1, and
2965 // a ULEB128 in version 3.
2966 if (cie->version == 1) {
2967 if (cursor >= cie->end) return ReportIncomplete(cie);
2968 cie->return_address_register = uint8_t(*cursor++);
2969 } else {
2970 cie->return_address_register = reader_->ReadUnsignedLEB128(cursor, &len);
2971 if (size_t(cie->end - cursor) < len) return ReportIncomplete(cie);
2972 cursor += len;
2973 }
2974
2975 // If we have a 'z' augmentation string, find the augmentation data and
2976 // use the augmentation string to parse it.
2977 if (cie->has_z_augmentation) {
2978 uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &len);
2979 if (size_t(cie->end - cursor) < len + data_size)
2980 return ReportIncomplete(cie);
2981 cursor += len;
2982 const uint8_t* data = cursor;
2983 cursor += data_size;
2984 const uint8_t* data_end = cursor;
2985
2986 cie->has_z_lsda = false;
2987 cie->has_z_personality = false;
2988 cie->has_z_signal_frame = false;
2989
2990 // Walk the augmentation string, and extract values from the
2991 // augmentation data as the string directs.
2992 for (size_t i = 1; i < cie->augmentation.size(); i++) {
2993 switch (cie->augmentation[i]) {
2994 case DW_Z_has_LSDA:
2995 // The CIE's augmentation data holds the language-specific data
2996 // area pointer's encoding, and the FDE's augmentation data holds
2997 // the pointer itself.
2998 cie->has_z_lsda = true;
2999 // Fetch the LSDA encoding from the augmentation data.
3000 if (data >= data_end) return ReportIncomplete(cie);
3001 cie->lsda_encoding = DwarfPointerEncoding(*data++);
3002 if (!reader_->ValidEncoding(cie->lsda_encoding)) {
3003 reporter_->InvalidPointerEncoding(cie->offset, cie->lsda_encoding);
3004 return false;
3005 }
3006 // Don't check if the encoding is usable here --- we haven't
3007 // read the FDE's fields yet, so we're not prepared for
3008 // DW_EH_PE_funcrel, although that's a fine encoding for the
3009 // LSDA to use, since it appears in the FDE.
3010 break;
3011
3012 case DW_Z_has_personality_routine:
3013 // The CIE's augmentation data holds the personality routine
3014 // pointer's encoding, followed by the pointer itself.
3015 cie->has_z_personality = true;
3016 // Fetch the personality routine pointer's encoding from the
3017 // augmentation data.
3018 if (data >= data_end) return ReportIncomplete(cie);
3019 cie->personality_encoding = DwarfPointerEncoding(*data++);
3020 if (!reader_->ValidEncoding(cie->personality_encoding)) {
3021 reporter_->InvalidPointerEncoding(cie->offset,
3022 cie->personality_encoding);
3023 return false;
3024 }
3025 if (!reader_->UsableEncoding(cie->personality_encoding)) {
3026 reporter_->UnusablePointerEncoding(cie->offset,
3027 cie->personality_encoding);
3028 return false;
3029 }
3030 // Fetch the personality routine's pointer itself from the data.
3031 cie->personality_address =
3032 reader_->ReadEncodedPointer(data, cie->personality_encoding,
3033 &len);
3034 if (len > size_t(data_end - data))
3035 return ReportIncomplete(cie);
3036 data += len;
3037 break;
3038
3039 case DW_Z_has_FDE_address_encoding:
3040 // The CIE's augmentation data holds the pointer encoding to use
3041 // for addresses in the FDE.
3042 if (data >= data_end) return ReportIncomplete(cie);
3043 cie->pointer_encoding = DwarfPointerEncoding(*data++);
3044 if (!reader_->ValidEncoding(cie->pointer_encoding)) {
3045 reporter_->InvalidPointerEncoding(cie->offset,
3046 cie->pointer_encoding);
3047 return false;
3048 }
3049 if (!reader_->UsableEncoding(cie->pointer_encoding)) {
3050 reporter_->UnusablePointerEncoding(cie->offset,
3051 cie->pointer_encoding);
3052 return false;
3053 }
3054 break;
3055
3056 case DW_Z_is_signal_trampoline:
3057 // Frames using this CIE are signal delivery frames.
3058 cie->has_z_signal_frame = true;
3059 break;
3060
3061 default:
3062 // An augmentation we don't recognize.
3063 reporter_->UnrecognizedAugmentation(cie->offset, cie->augmentation);
3064 return false;
3065 }
3066 }
3067 }
3068
3069 // The CIE's instructions start here.
3070 cie->instructions = cursor;
3071
3072 return true;
3073}
3074
3075bool CallFrameInfo::ReadFDEFields(FDE* fde) {
3076 const uint8_t* cursor = fde->fields;
3077 size_t size;
3078
3079 fde->address = reader_->ReadEncodedPointer(cursor, fde->cie->pointer_encoding,
3080 &size);
3081 if (size > size_t(fde->end - cursor))
3082 return ReportIncomplete(fde);
3083 cursor += size;
3084 reader_->SetFunctionBase(fde->address);
3085
3086 // For the length, we strip off the upper nybble of the encoding used for
3087 // the starting address.
3088 DwarfPointerEncoding length_encoding =
3089 DwarfPointerEncoding(fde->cie->pointer_encoding & 0x0f);
3090 fde->size = reader_->ReadEncodedPointer(cursor, length_encoding, &size);
3091 if (size > size_t(fde->end - cursor))
3092 return ReportIncomplete(fde);
3093 cursor += size;
3094
3095 // If the CIE has a 'z' augmentation string, then augmentation data
3096 // appears here.
3097 if (fde->cie->has_z_augmentation) {
3098 uint64_t data_size = reader_->ReadUnsignedLEB128(cursor, &size);
3099 if (size_t(fde->end - cursor) < size + data_size)
3100 return ReportIncomplete(fde);
3101 cursor += size;
3102
3103 // In the abstract, we should walk the augmentation string, and extract
3104 // items from the FDE's augmentation data as we encounter augmentation
3105 // string characters that specify their presence: the ordering of items
3106 // in the augmentation string determines the arrangement of values in
3107 // the augmentation data.
3108 //
3109 // In practice, there's only ever one value in FDE augmentation data
3110 // that we support --- the LSDA pointer --- and we have to bail if we
3111 // see any unrecognized augmentation string characters. So if there is
3112 // anything here at all, we know what it is, and where it starts.
3113 if (fde->cie->has_z_lsda) {
3114 // Check whether the LSDA's pointer encoding is usable now: only once
3115 // we've parsed the FDE's starting address do we call reader_->
3116 // SetFunctionBase, so that the DW_EH_PE_funcrel encoding becomes
3117 // usable.
3118 if (!reader_->UsableEncoding(fde->cie->lsda_encoding)) {
3119 reporter_->UnusablePointerEncoding(fde->cie->offset,
3120 fde->cie->lsda_encoding);
3121 return false;
3122 }
3123
3124 fde->lsda_address =
3125 reader_->ReadEncodedPointer(cursor, fde->cie->lsda_encoding, &size);
3126 if (size > data_size)
3127 return ReportIncomplete(fde);
3128 // Ideally, we would also complain here if there were unconsumed
3129 // augmentation data.
3130 }
3131
3132 cursor += data_size;
3133 }
3134
3135 // The FDE's instructions start after those.
3136 fde->instructions = cursor;
3137
3138 return true;
3139}
3140
3141bool CallFrameInfo::Start() {
3142 const uint8_t* buffer_end = buffer_ + buffer_length_;
3143 const uint8_t* cursor;
3144 bool all_ok = true;
3145 const uint8_t* entry_end;
3146 bool ok;
3147
3148 // Traverse all the entries in buffer_, skipping CIEs and offering
3149 // FDEs to the handler.
3150 for (cursor = buffer_; cursor < buffer_end;
3151 cursor = entry_end, all_ok = all_ok && ok) {
3152 FDE fde;
3153
3154 // Make it easy to skip this entry with 'continue': assume that
3155 // things are not okay until we've checked all the data, and
3156 // prepare the address of the next entry.
3157 ok = false;
3158
3159 // Read the entry's prologue.
3160 if (!ReadEntryPrologue(cursor, &fde)) {
3161 if (!fde.end) {
3162 // If we couldn't even figure out this entry's extent, then we
3163 // must stop processing entries altogether.
3164 all_ok = false;
3165 break;
3166 }
3167 entry_end = fde.end;
3168 continue;
3169 }
3170
3171 // The next iteration picks up after this entry.
3172 entry_end = fde.end;
3173
3174 // Did we see an .eh_frame terminating mark?
3175 if (fde.kind == kTerminator) {
3176 // If there appears to be more data left in the section after the
3177 // terminating mark, warn the user. But this is just a warning;
3178 // we leave all_ok true.
3179 if (fde.end < buffer_end) reporter_->EarlyEHTerminator(fde.offset);
3180 break;
3181 }
3182
3183 // In this loop, we skip CIEs. We only parse them fully when we
3184 // parse an FDE that refers to them. This limits our memory
3185 // consumption (beyond the buffer itself) to that needed to
3186 // process the largest single entry.
3187 if (fde.kind != kFDE) {
3188 ok = true;
3189 continue;
3190 }
3191
3192 // Validate the CIE pointer.
3193 if (fde.id > buffer_length_) {
3194 reporter_->CIEPointerOutOfRange(fde.offset, fde.id);
3195 continue;
3196 }
3197
3198 CIE cie;
3199
3200 // Parse this FDE's CIE header.
3201 if (!ReadEntryPrologue(buffer_ + fde.id, &cie))
3202 continue;
3203 // This had better be an actual CIE.
3204 if (cie.kind != kCIE) {
3205 reporter_->BadCIEId(fde.offset, fde.id);
3206 continue;
3207 }
3208 if (!ReadCIEFields(&cie))
3209 continue;
3210
3211 // TODO(nbilling): This could lead to strange behavior if a single buffer
3212 // contained a mixture of DWARF versions as well as address sizes. Not
3213 // sure if it's worth handling such a case.
3214
3215 // DWARF4 CIE specifies address_size, so use it for this call frame.
3216 if (cie.version >= 4) {
3217 reader_->SetAddressSize(cie.address_size);
3218 }
3219
3220 // We now have the values that govern both the CIE and the FDE.
3221 cie.cie = &cie;
3222 fde.cie = &cie;
3223
3224 // Parse the FDE's header.
3225 if (!ReadFDEFields(&fde))
3226 continue;
3227
3228 // Call Entry to ask the consumer if they're interested.
3229 if (!handler_->Entry(fde.offset, fde.address, fde.size,
3230 cie.version, cie.augmentation,
3231 cie.return_address_register)) {
3232 // The handler isn't interested in this entry. That's not an error.
3233 ok = true;
3234 continue;
3235 }
3236
3237 if (cie.has_z_augmentation) {
3238 // Report the personality routine address, if we have one.
3239 if (cie.has_z_personality) {
3240 if (!handler_
3241 ->PersonalityRoutine(cie.personality_address,
3242 IsIndirectEncoding(cie.personality_encoding)))
3243 continue;
3244 }
3245
3246 // Report the language-specific data area address, if we have one.
3247 if (cie.has_z_lsda) {
3248 if (!handler_
3249 ->LanguageSpecificDataArea(fde.lsda_address,
3250 IsIndirectEncoding(cie.lsda_encoding)))
3251 continue;
3252 }
3253
3254 // If this is a signal-handling frame, report that.
3255 if (cie.has_z_signal_frame) {
3256 if (!handler_->SignalHandler())
3257 continue;
3258 }
3259 }
3260
3261 // Interpret the CIE's instructions, and then the FDE's instructions.
3262 State state(reader_, handler_, reporter_, fde.address);
3263 ok = state.InterpretCIE(cie) && state.InterpretFDE(fde);
3264
3265 // Tell the ByteReader that the function start address from the
3266 // FDE header is no longer valid.
3267 reader_->ClearFunctionBase();
3268
3269 // Report the end of the entry.
3270 handler_->End();
3271 }
3272
3273 return all_ok;
3274}
3275
3276const char* CallFrameInfo::KindName(EntryKind kind) {
3277 if (kind == CallFrameInfo::kUnknown)
3278 return "entry";
3279 else if (kind == CallFrameInfo::kCIE)
3280 return "common information entry";
3281 else if (kind == CallFrameInfo::kFDE)
3282 return "frame description entry";
3283 else {
3284 assert (kind == CallFrameInfo::kTerminator);
3285 return ".eh_frame sequence terminator";
3286 }
3287}
3288
3289bool CallFrameInfo::ReportIncomplete(Entry* entry) {
3290 reporter_->Incomplete(entry->offset, entry->kind);
3291 return false;
3292}
3293
3294void CallFrameInfo::Reporter::Incomplete(uint64_t offset,
3295 CallFrameInfo::EntryKind kind) {
3296 fprintf(stderr,
3297 "%s: CFI %s at offset 0x%" PRIx64 " in '%s': entry ends early\n",
3298 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
3299 section_.c_str());
3300}
3301
3302void CallFrameInfo::Reporter::EarlyEHTerminator(uint64_t offset) {
3303 fprintf(stderr,
3304 "%s: CFI at offset 0x%" PRIx64 " in '%s': saw end-of-data marker"
3305 " before end of section contents\n",
3306 filename_.c_str(), offset, section_.c_str());
3307}
3308
3309void CallFrameInfo::Reporter::CIEPointerOutOfRange(uint64_t offset,
3310 uint64_t cie_offset) {
3311 fprintf(stderr,
3312 "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
3313 " CIE pointer is out of range: 0x%" PRIx64 "\n",
3314 filename_.c_str(), offset, section_.c_str(), cie_offset);
3315}
3316
3317void CallFrameInfo::Reporter::BadCIEId(uint64_t offset, uint64_t cie_offset) {
3318 fprintf(stderr,
3319 "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
3320 " CIE pointer does not point to a CIE: 0x%" PRIx64 "\n",
3321 filename_.c_str(), offset, section_.c_str(), cie_offset);
3322}
3323
3324void CallFrameInfo::Reporter::UnexpectedAddressSize(uint64_t offset,
3325 uint8_t address_size) {
3326 fprintf(stderr,
3327 "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
3328 " CIE specifies unexpected address size: %d\n",
3329 filename_.c_str(), offset, section_.c_str(), address_size);
3330}
3331
3332void CallFrameInfo::Reporter::UnexpectedSegmentSize(uint64_t offset,
3333 uint8_t segment_size) {
3334 fprintf(stderr,
3335 "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
3336 " CIE specifies unexpected segment size: %d\n",
3337 filename_.c_str(), offset, section_.c_str(), segment_size);
3338}
3339
3340void CallFrameInfo::Reporter::UnrecognizedVersion(uint64_t offset, int version) {
3341 fprintf(stderr,
3342 "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
3343 " CIE specifies unrecognized version: %d\n",
3344 filename_.c_str(), offset, section_.c_str(), version);
3345}
3346
3347void CallFrameInfo::Reporter::UnrecognizedAugmentation(uint64_t offset,
3348 const string& aug) {
3349 fprintf(stderr,
3350 "%s: CFI frame description entry at offset 0x%" PRIx64 " in '%s':"
3351 " CIE specifies unrecognized augmentation: '%s'\n",
3352 filename_.c_str(), offset, section_.c_str(), aug.c_str());
3353}
3354
3355void CallFrameInfo::Reporter::InvalidPointerEncoding(uint64_t offset,
3356 uint8_t encoding) {
3357 fprintf(stderr,
3358 "%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':"
3359 " 'z' augmentation specifies invalid pointer encoding: 0x%02x\n",
3360 filename_.c_str(), offset, section_.c_str(), encoding);
3361}
3362
3363void CallFrameInfo::Reporter::UnusablePointerEncoding(uint64_t offset,
3364 uint8_t encoding) {
3365 fprintf(stderr,
3366 "%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':"
3367 " 'z' augmentation specifies a pointer encoding for which"
3368 " we have no base address: 0x%02x\n",
3369 filename_.c_str(), offset, section_.c_str(), encoding);
3370}
3371
3372void CallFrameInfo::Reporter::RestoreInCIE(uint64_t offset, uint64_t insn_offset) {
3373 fprintf(stderr,
3374 "%s: CFI common information entry at offset 0x%" PRIx64 " in '%s':"
3375 " the DW_CFA_restore instruction at offset 0x%" PRIx64
3376 " cannot be used in a common information entry\n",
3377 filename_.c_str(), offset, section_.c_str(), insn_offset);
3378}
3379
3380void CallFrameInfo::Reporter::BadInstruction(uint64_t offset,
3381 CallFrameInfo::EntryKind kind,
3382 uint64_t insn_offset) {
3383 fprintf(stderr,
3384 "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
3385 " the instruction at offset 0x%" PRIx64 " is unrecognized\n",
3386 filename_.c_str(), CallFrameInfo::KindName(kind),
3387 offset, section_.c_str(), insn_offset);
3388}
3389
3390void CallFrameInfo::Reporter::NoCFARule(uint64_t offset,
3391 CallFrameInfo::EntryKind kind,
3392 uint64_t insn_offset) {
3393 fprintf(stderr,
3394 "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
3395 " the instruction at offset 0x%" PRIx64 " assumes that a CFA rule has"
3396 " been set, but none has been set\n",
3397 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
3398 section_.c_str(), insn_offset);
3399}
3400
3401void CallFrameInfo::Reporter::EmptyStateStack(uint64_t offset,
3402 CallFrameInfo::EntryKind kind,
3403 uint64_t insn_offset) {
3404 fprintf(stderr,
3405 "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
3406 " the DW_CFA_restore_state instruction at offset 0x%" PRIx64
3407 " should pop a saved state from the stack, but the stack is empty\n",
3408 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
3409 section_.c_str(), insn_offset);
3410}
3411
3412void CallFrameInfo::Reporter::ClearingCFARule(uint64_t offset,
3413 CallFrameInfo::EntryKind kind,
3414 uint64_t insn_offset) {
3415 fprintf(stderr,
3416 "%s: CFI %s at offset 0x%" PRIx64 " in section '%s':"
3417 " the DW_CFA_restore_state instruction at offset 0x%" PRIx64
3418 " would clear the CFA rule in effect\n",
3419 filename_.c_str(), CallFrameInfo::KindName(kind), offset,
3420 section_.c_str(), insn_offset);
3421}
3422
3423} // namespace google_breakpad
3424