1 | // -*- mode: C++ -*- |
2 | |
3 | // Copyright (c) 2010 Google Inc. All Rights Reserved. |
4 | // |
5 | // Redistribution and use in source and binary forms, with or without |
6 | // modification, are permitted provided that the following conditions are |
7 | // met: |
8 | // |
9 | // * Redistributions of source code must retain the above copyright |
10 | // notice, this list of conditions and the following disclaimer. |
11 | // * Redistributions in binary form must reproduce the above |
12 | // copyright notice, this list of conditions and the following disclaimer |
13 | // in the documentation and/or other materials provided with the |
14 | // distribution. |
15 | // * Neither the name of Google Inc. nor the names of its |
16 | // contributors may be used to endorse or promote products derived from |
17 | // this software without specific prior written permission. |
18 | // |
19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
30 | |
31 | // CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> |
32 | |
33 | // This file contains definitions related to the DWARF2/3 reader and |
34 | // it's handler interfaces. |
35 | // The DWARF2/3 specification can be found at |
36 | // http://dwarf.freestandards.org and should be considered required |
37 | // reading if you wish to modify the implementation. |
38 | // Only a cursory attempt is made to explain terminology that is |
39 | // used here, as it is much better explained in the standard documents |
40 | #ifndef COMMON_DWARF_DWARF2READER_H__ |
41 | #define COMMON_DWARF_DWARF2READER_H__ |
42 | |
43 | #include <assert.h> |
44 | #include <stdint.h> |
45 | |
46 | #include <list> |
47 | #include <map> |
48 | #include <string> |
49 | #include <utility> |
50 | #include <vector> |
51 | #include <memory> |
52 | |
53 | #include "common/dwarf/bytereader.h" |
54 | #include "common/dwarf/dwarf2enums.h" |
55 | #include "common/dwarf/types.h" |
56 | #include "common/using_std_string.h" |
57 | #include "common/dwarf/elf_reader.h" |
58 | |
59 | namespace google_breakpad { |
60 | struct LineStateMachine; |
61 | class Dwarf2Handler; |
62 | class LineInfoHandler; |
63 | class DwpReader; |
64 | |
65 | // This maps from a string naming a section to a pair containing a |
66 | // the data for the section, and the size of the section. |
67 | typedef std::map<string, std::pair<const uint8_t*, uint64_t> > SectionMap; |
68 | |
69 | // Abstract away the difference between elf and mach-o section names. |
70 | // Elf-names use ".section_name, mach-o uses "__section_name". Pass "name" in |
71 | // the elf form, ".section_name". |
72 | const SectionMap::const_iterator GetSectionByName(const SectionMap& |
73 | sections, const char* name); |
74 | |
75 | // Most of the time, this struct functions as a simple attribute and form pair. |
76 | // However, Dwarf5 DW_FORM_implicit_const means that a form may have its value |
77 | // in line in the abbrev table, and that value must be associated with the |
78 | // pair until the attr's value is needed. |
79 | struct AttrForm { |
80 | AttrForm(enum DwarfAttribute attr, enum DwarfForm form, uint64_t value) : |
81 | attr_(attr), form_(form), value_(value) { } |
82 | |
83 | enum DwarfAttribute attr_; |
84 | enum DwarfForm form_; |
85 | uint64_t value_; |
86 | }; |
87 | typedef std::list<AttrForm> AttributeList; |
88 | typedef AttributeList::iterator AttributeIterator; |
89 | typedef AttributeList::const_iterator ConstAttributeIterator; |
90 | |
91 | struct { |
92 | uint64_t ; |
93 | uint16_t ; |
94 | uint64_t ; |
95 | uint8_t ; // insn stands for instructin |
96 | bool ; // stmt stands for statement |
97 | int8_t ; |
98 | uint8_t ; |
99 | uint8_t ; |
100 | // Use a pointer so that signalsafe_addr2line is able to use this structure |
101 | // without heap allocation problem. |
102 | std::vector<unsigned char>* ; |
103 | }; |
104 | |
105 | class LineInfo { |
106 | public: |
107 | |
108 | // Initializes a .debug_line reader. Buffer and buffer length point |
109 | // to the beginning and length of the line information to read. |
110 | // Reader is a ByteReader class that has the endianness set |
111 | // properly. |
112 | LineInfo(const uint8_t* buffer, uint64_t buffer_length, |
113 | ByteReader* reader, const uint8_t* string_buffer, |
114 | size_t string_buffer_length, const uint8_t* line_string_buffer, |
115 | size_t line_string_buffer_length, LineInfoHandler* handler); |
116 | |
117 | virtual ~LineInfo() { |
118 | if (header_.std_opcode_lengths) { |
119 | delete header_.std_opcode_lengths; |
120 | } |
121 | } |
122 | |
123 | // Start processing line info, and calling callbacks in the handler. |
124 | // Consumes the line number information for a single compilation unit. |
125 | // Returns the number of bytes processed. |
126 | uint64_t Start(); |
127 | |
128 | // Process a single line info opcode at START using the state |
129 | // machine at LSM. Return true if we should define a line using the |
130 | // current state of the line state machine. Place the length of the |
131 | // opcode in LEN. |
132 | // If LSM_PASSES_PC is non-NULL, this function also checks if the lsm |
133 | // passes the address of PC. In other words, LSM_PASSES_PC will be |
134 | // set to true, if the following condition is met. |
135 | // |
136 | // lsm's old address < PC <= lsm's new address |
137 | static bool ProcessOneOpcode(ByteReader* reader, |
138 | LineInfoHandler* handler, |
139 | const struct LineInfoHeader& , |
140 | const uint8_t* start, |
141 | struct LineStateMachine* lsm, |
142 | size_t* len, |
143 | uintptr pc, |
144 | bool* lsm_passes_pc); |
145 | |
146 | private: |
147 | // Reads the DWARF2/3 header for this line info. |
148 | void (); |
149 | |
150 | // Reads the DWARF2/3 line information |
151 | void ReadLines(); |
152 | |
153 | // Read the DWARF5 types and forms for the file and directory tables. |
154 | void ReadTypesAndForms(const uint8_t** lineptr, uint32_t* content_types, |
155 | uint32_t* content_forms, uint32_t max_types, |
156 | uint32_t* format_count); |
157 | |
158 | // Read a row from the dwarf5 LineInfo file table. |
159 | void ReadFileRow(const uint8_t** lineptr, const uint32_t* content_types, |
160 | const uint32_t* content_forms, uint32_t row, |
161 | uint32_t format_count); |
162 | |
163 | // Read and return the data at *lineptr according to form. Advance |
164 | // *lineptr appropriately. |
165 | uint64_t ReadUnsignedData(uint32_t form, const uint8_t** lineptr); |
166 | |
167 | // Read and return the data at *lineptr according to form. Advance |
168 | // *lineptr appropriately. |
169 | const char* ReadStringForm(uint32_t form, const uint8_t** lineptr); |
170 | |
171 | // The associated handler to call processing functions in |
172 | LineInfoHandler* handler_; |
173 | |
174 | // The associated ByteReader that handles endianness issues for us |
175 | ByteReader* reader_; |
176 | |
177 | // A DWARF line info header. This is not the same size as in the actual file, |
178 | // as the one in the file may have a 32 bit or 64 bit lengths |
179 | |
180 | struct LineInfoHeader ; |
181 | |
182 | // buffer is the buffer for our line info, starting at exactly where |
183 | // the line info to read is. after_header is the place right after |
184 | // the end of the line information header. |
185 | const uint8_t* buffer_; |
186 | #ifndef NDEBUG |
187 | uint64_t buffer_length_; |
188 | #endif |
189 | // Convenience pointers into .debug_str and .debug_line_str. These exactly |
190 | // correspond to those in the compilation unit. |
191 | const uint8_t* string_buffer_; |
192 | #ifndef NDEBUG |
193 | uint64_t string_buffer_length_; |
194 | #endif |
195 | const uint8_t* line_string_buffer_; |
196 | #ifndef NDEBUG |
197 | uint64_t line_string_buffer_length_; |
198 | #endif |
199 | |
200 | const uint8_t* ; |
201 | }; |
202 | |
203 | // This class is the main interface between the line info reader and |
204 | // the client. The virtual functions inside this get called for |
205 | // interesting events that happen during line info reading. The |
206 | // default implementation does nothing |
207 | |
208 | class LineInfoHandler { |
209 | public: |
210 | LineInfoHandler() { } |
211 | |
212 | virtual ~LineInfoHandler() { } |
213 | |
214 | // Called when we define a directory. NAME is the directory name, |
215 | // DIR_NUM is the directory number |
216 | virtual void DefineDir(const string& name, uint32_t dir_num) { } |
217 | |
218 | // Called when we define a filename. NAME is the filename, FILE_NUM |
219 | // is the file number which is -1 if the file index is the next |
220 | // index after the last numbered index (this happens when files are |
221 | // dynamically defined by the line program), DIR_NUM is the |
222 | // directory index for the directory name of this file, MOD_TIME is |
223 | // the modification time of the file, and LENGTH is the length of |
224 | // the file |
225 | virtual void DefineFile(const string& name, int32_t file_num, |
226 | uint32_t dir_num, uint64_t mod_time, |
227 | uint64_t length) { } |
228 | |
229 | // Called when the line info reader has a new line, address pair |
230 | // ready for us. ADDRESS is the address of the code, LENGTH is the |
231 | // length of its machine code in bytes, FILE_NUM is the file number |
232 | // containing the code, LINE_NUM is the line number in that file for |
233 | // the code, and COLUMN_NUM is the column number the code starts at, |
234 | // if we know it (0 otherwise). |
235 | virtual void AddLine(uint64_t address, uint64_t length, |
236 | uint32_t file_num, uint32_t line_num, uint32_t column_num) { } |
237 | }; |
238 | |
239 | class RangeListHandler { |
240 | public: |
241 | RangeListHandler() { } |
242 | |
243 | virtual ~RangeListHandler() { } |
244 | |
245 | // Add a range. |
246 | virtual void AddRange(uint64_t begin, uint64_t end) { }; |
247 | |
248 | // Finish processing the range list. |
249 | virtual void Finish() { }; |
250 | }; |
251 | |
252 | class RangeListReader { |
253 | public: |
254 | // Reading a range list requires quite a bit of information |
255 | // from the compilation unit. Package it conveniently. |
256 | struct CURangesInfo { |
257 | CURangesInfo() : |
258 | version_(0), base_address_(0), ranges_base_(0), |
259 | buffer_(nullptr), size_(0), addr_buffer_(nullptr), |
260 | addr_buffer_size_(0), addr_base_(0) { } |
261 | |
262 | uint16_t version_; |
263 | // Ranges base address. Ordinarily the CU's low_pc. |
264 | uint64_t base_address_; |
265 | // Offset into .debug_rnglists for this CU's rangelists. |
266 | uint64_t ranges_base_; |
267 | // Contents of either .debug_ranges or .debug_rnglists. |
268 | const uint8_t* buffer_; |
269 | uint64_t size_; |
270 | // Contents of .debug_addr. This cu's contribution starts at |
271 | // addr_base_ |
272 | const uint8_t* addr_buffer_; |
273 | uint64_t addr_buffer_size_; |
274 | uint64_t addr_base_; |
275 | }; |
276 | |
277 | RangeListReader(ByteReader* reader, CURangesInfo* cu_info, |
278 | RangeListHandler* handler) : |
279 | reader_(reader), cu_info_(cu_info), handler_(handler), |
280 | offset_array_(0) { } |
281 | |
282 | // Read ranges from cu_info as specified by form and data. |
283 | bool ReadRanges(enum DwarfForm form, uint64_t data); |
284 | |
285 | private: |
286 | // Read dwarf4 .debug_ranges at offset. |
287 | bool ReadDebugRanges(uint64_t offset); |
288 | // Read dwarf5 .debug_rngslist at offset. |
289 | bool ReadDebugRngList(uint64_t offset); |
290 | |
291 | // Convenience functions to handle the mechanics of reading entries in the |
292 | // ranges section. |
293 | uint64_t ReadULEB(uint64_t offset, uint64_t* value) { |
294 | size_t len; |
295 | *value = reader_->ReadUnsignedLEB128(cu_info_->buffer_ + offset, &len); |
296 | return len; |
297 | } |
298 | |
299 | uint64_t ReadAddress(uint64_t offset, uint64_t* value) { |
300 | *value = reader_->ReadAddress(cu_info_->buffer_ + offset); |
301 | return reader_->AddressSize(); |
302 | } |
303 | |
304 | // Read the address at this CU's addr_index in the .debug_addr section. |
305 | uint64_t GetAddressAtIndex(uint64_t addr_index) { |
306 | assert(cu_info_->addr_buffer_ != nullptr); |
307 | uint64_t offset = |
308 | cu_info_->addr_base_ + addr_index * reader_->AddressSize(); |
309 | assert(offset < cu_info_->addr_buffer_size_); |
310 | return reader_->ReadAddress(cu_info_->addr_buffer_ + offset); |
311 | } |
312 | |
313 | ByteReader* reader_; |
314 | CURangesInfo* cu_info_; |
315 | RangeListHandler* handler_; |
316 | uint64_t offset_array_; |
317 | }; |
318 | |
319 | // This class is the main interface between the reader and the |
320 | // client. The virtual functions inside this get called for |
321 | // interesting events that happen during DWARF2 reading. |
322 | // The default implementation skips everything. |
323 | class Dwarf2Handler { |
324 | public: |
325 | Dwarf2Handler() { } |
326 | |
327 | virtual ~Dwarf2Handler() { } |
328 | |
329 | // Start to process a compilation unit at OFFSET from the beginning of the |
330 | // .debug_info section. Return false if you would like to skip this |
331 | // compilation unit. |
332 | virtual bool StartCompilationUnit(uint64_t offset, uint8_t address_size, |
333 | uint8_t offset_size, uint64_t cu_length, |
334 | uint8_t dwarf_version) { return false; } |
335 | |
336 | // When processing a skeleton compilation unit, resulting from a split |
337 | // DWARF compilation, once the skeleton debug info has been read, |
338 | // the reader will call this function to ask the client if it needs |
339 | // the full debug info from the .dwo or .dwp file. Return true if |
340 | // you need it, or false to skip processing the split debug info. |
341 | virtual bool NeedSplitDebugInfo() { return true; } |
342 | |
343 | // Start to process a split compilation unit at OFFSET from the beginning of |
344 | // the debug_info section in the .dwp/.dwo file. Return false if you would |
345 | // like to skip this compilation unit. |
346 | virtual bool StartSplitCompilationUnit(uint64_t offset, |
347 | uint64_t cu_length) { return false; } |
348 | |
349 | // Start to process a DIE at OFFSET from the beginning of the .debug_info |
350 | // section. Return false if you would like to skip this DIE. |
351 | virtual bool StartDIE(uint64_t offset, enum DwarfTag tag) { return false; } |
352 | |
353 | // Called when we have an attribute with unsigned data to give to our |
354 | // handler. The attribute is for the DIE at OFFSET from the beginning of the |
355 | // .debug_info section. Its name is ATTR, its form is FORM, and its value is |
356 | // DATA. |
357 | virtual void ProcessAttributeUnsigned(uint64_t offset, |
358 | enum DwarfAttribute attr, |
359 | enum DwarfForm form, |
360 | uint64_t data) { } |
361 | |
362 | // Called when we have an attribute with signed data to give to our handler. |
363 | // The attribute is for the DIE at OFFSET from the beginning of the |
364 | // .debug_info section. Its name is ATTR, its form is FORM, and its value is |
365 | // DATA. |
366 | virtual void ProcessAttributeSigned(uint64_t offset, |
367 | enum DwarfAttribute attr, |
368 | enum DwarfForm form, |
369 | int64_t data) { } |
370 | |
371 | // Called when we have an attribute whose value is a reference to |
372 | // another DIE. The attribute belongs to the DIE at OFFSET from the |
373 | // beginning of the .debug_info section. Its name is ATTR, its form |
374 | // is FORM, and the offset of the DIE being referred to from the |
375 | // beginning of the .debug_info section is DATA. |
376 | virtual void ProcessAttributeReference(uint64_t offset, |
377 | enum DwarfAttribute attr, |
378 | enum DwarfForm form, |
379 | uint64_t data) { } |
380 | |
381 | // Called when we have an attribute with a buffer of data to give to our |
382 | // handler. The attribute is for the DIE at OFFSET from the beginning of the |
383 | // .debug_info section. Its name is ATTR, its form is FORM, DATA points to |
384 | // the buffer's contents, and its length in bytes is LENGTH. The buffer is |
385 | // owned by the caller, not the callee, and may not persist for very long. |
386 | // If you want the data to be available later, it needs to be copied. |
387 | virtual void ProcessAttributeBuffer(uint64_t offset, |
388 | enum DwarfAttribute attr, |
389 | enum DwarfForm form, |
390 | const uint8_t* data, |
391 | uint64_t len) { } |
392 | |
393 | // Called when we have an attribute with string data to give to our handler. |
394 | // The attribute is for the DIE at OFFSET from the beginning of the |
395 | // .debug_info section. Its name is ATTR, its form is FORM, and its value is |
396 | // DATA. |
397 | virtual void ProcessAttributeString(uint64_t offset, |
398 | enum DwarfAttribute attr, |
399 | enum DwarfForm form, |
400 | const string& data) { } |
401 | |
402 | // Called when we have an attribute whose value is the 64-bit signature |
403 | // of a type unit in the .debug_types section. OFFSET is the offset of |
404 | // the DIE whose attribute we're reporting. ATTR and FORM are the |
405 | // attribute's name and form. SIGNATURE is the type unit's signature. |
406 | virtual void ProcessAttributeSignature(uint64_t offset, |
407 | enum DwarfAttribute attr, |
408 | enum DwarfForm form, |
409 | uint64_t signature) { } |
410 | |
411 | // Called when finished processing the DIE at OFFSET. |
412 | // Because DWARF2/3 specifies a tree of DIEs, you may get starts |
413 | // before ends of the previous DIE, as we process children before |
414 | // ending the parent. |
415 | virtual void EndDIE(uint64_t offset) { } |
416 | |
417 | }; |
418 | |
419 | // The base of DWARF2/3 debug info is a DIE (Debugging Information |
420 | // Entry. |
421 | // DWARF groups DIE's into a tree and calls the root of this tree a |
422 | // "compilation unit". Most of the time, there is one compilation |
423 | // unit in the .debug_info section for each file that had debug info |
424 | // generated. |
425 | // Each DIE consists of |
426 | |
427 | // 1. a tag specifying a thing that is being described (ie |
428 | // DW_TAG_subprogram for functions, DW_TAG_variable for variables, etc |
429 | // 2. attributes (such as DW_AT_location for location in memory, |
430 | // DW_AT_name for name), and data for each attribute. |
431 | // 3. A flag saying whether the DIE has children or not |
432 | |
433 | // In order to gain some amount of compression, the format of |
434 | // each DIE (tag name, attributes and data forms for the attributes) |
435 | // are stored in a separate table called the "abbreviation table". |
436 | // This is done because a large number of DIEs have the exact same tag |
437 | // and list of attributes, but different data for those attributes. |
438 | // As a result, the .debug_info section is just a stream of data, and |
439 | // requires reading of the .debug_abbrev section to say what the data |
440 | // means. |
441 | |
442 | // As a warning to the user, it should be noted that the reason for |
443 | // using absolute offsets from the beginning of .debug_info is that |
444 | // DWARF2/3 supports referencing DIE's from other DIE's by their offset |
445 | // from either the current compilation unit start, *or* the beginning |
446 | // of the .debug_info section. This means it is possible to reference |
447 | // a DIE in one compilation unit from a DIE in another compilation |
448 | // unit. This style of reference is usually used to eliminate |
449 | // duplicated information that occurs across compilation |
450 | // units, such as base types, etc. GCC 3.4+ support this with |
451 | // -feliminate-dwarf2-dups. Other toolchains will sometimes do |
452 | // duplicate elimination in the linker. |
453 | |
454 | class CompilationUnit { |
455 | public: |
456 | |
457 | // Initialize a compilation unit. This requires a map of sections, |
458 | // the offset of this compilation unit in the .debug_info section, a |
459 | // ByteReader, and a Dwarf2Handler class to call callbacks in. |
460 | CompilationUnit(const string& path, const SectionMap& sections, |
461 | uint64_t offset, ByteReader* reader, Dwarf2Handler* handler); |
462 | virtual ~CompilationUnit() { |
463 | if (abbrevs_) delete abbrevs_; |
464 | } |
465 | |
466 | // Initialize a compilation unit from a .dwo or .dwp file. |
467 | // In this case, we need the .debug_addr section from the |
468 | // executable file that contains the corresponding skeleton |
469 | // compilation unit. We also inherit the Dwarf2Handler from |
470 | // the executable file, and call it as if we were still |
471 | // processing the original compilation unit. |
472 | void SetSplitDwarf(const uint8_t* addr_buffer, uint64_t addr_buffer_length, |
473 | uint64_t addr_base, uint64_t ranges_base, uint64_t dwo_id); |
474 | |
475 | // Begin reading a Dwarf2 compilation unit, and calling the |
476 | // callbacks in the Dwarf2Handler |
477 | |
478 | // Return the full length of the compilation unit, including |
479 | // headers. This plus the starting offset passed to the constructor |
480 | // is the offset of the end of the compilation unit --- and the |
481 | // start of the next compilation unit, if there is one. |
482 | uint64_t Start(); |
483 | |
484 | private: |
485 | |
486 | // This struct represents a single DWARF2/3 abbreviation |
487 | // The abbreviation tells how to read a DWARF2/3 DIE, and consist of a |
488 | // tag and a list of attributes, as well as the data form of each attribute. |
489 | struct Abbrev { |
490 | uint64_t number; |
491 | enum DwarfTag tag; |
492 | bool has_children; |
493 | AttributeList attributes; |
494 | }; |
495 | |
496 | // A DWARF2/3 compilation unit header. This is not the same size as |
497 | // in the actual file, as the one in the file may have a 32 bit or |
498 | // 64 bit length. |
499 | struct { |
500 | uint64_t ; |
501 | uint16_t ; |
502 | uint64_t ; |
503 | uint8_t ; |
504 | } ; |
505 | |
506 | // Reads the DWARF2/3 header for this compilation unit. |
507 | void (); |
508 | |
509 | // Reads the DWARF2/3 abbreviations for this compilation unit |
510 | void ReadAbbrevs(); |
511 | |
512 | // Read the abbreviation offset for this compilation unit |
513 | size_t ReadAbbrevOffset(const uint8_t* ); |
514 | |
515 | // Read the address size for this compilation unit |
516 | size_t ReadAddressSize(const uint8_t* ); |
517 | |
518 | // Read the DWO id from a split or skeleton compilation unit header |
519 | size_t ReadDwoId(const uint8_t* ); |
520 | |
521 | // Read the type signature from a type or split type compilation unit header |
522 | size_t ReadTypeSignature(const uint8_t* ); |
523 | |
524 | // Read the DWO id from a split or skeleton compilation unit header |
525 | size_t ReadTypeOffset(const uint8_t* ); |
526 | |
527 | // Processes a single DIE for this compilation unit and return a new |
528 | // pointer just past the end of it |
529 | const uint8_t* ProcessDIE(uint64_t dieoffset, |
530 | const uint8_t* start, |
531 | const Abbrev& abbrev); |
532 | |
533 | // Processes a single attribute and return a new pointer just past the |
534 | // end of it |
535 | const uint8_t* ProcessAttribute(uint64_t dieoffset, |
536 | const uint8_t* start, |
537 | enum DwarfAttribute attr, |
538 | enum DwarfForm form, |
539 | uint64_t implicit_const); |
540 | |
541 | // Special version of ProcessAttribute, for finding str_offsets_base and |
542 | // DW_AT_addr_base in DW_TAG_compile_unit, for DWARF v5. |
543 | const uint8_t* ProcessOffsetBaseAttribute(uint64_t dieoffset, |
544 | const uint8_t* start, |
545 | enum DwarfAttribute attr, |
546 | enum DwarfForm form, |
547 | uint64_t implicit_const); |
548 | |
549 | // Called when we have an attribute with unsigned data to give to |
550 | // our handler. The attribute is for the DIE at OFFSET from the |
551 | // beginning of compilation unit, has a name of ATTR, a form of |
552 | // FORM, and the actual data of the attribute is in DATA. |
553 | // If we see a DW_AT_GNU_dwo_id attribute, save the value so that |
554 | // we can find the debug info in a .dwo or .dwp file. |
555 | void ProcessAttributeUnsigned(uint64_t offset, |
556 | enum DwarfAttribute attr, |
557 | enum DwarfForm form, |
558 | uint64_t data) { |
559 | if (attr == DW_AT_GNU_dwo_id) { |
560 | dwo_id_ = data; |
561 | } |
562 | else if (attr == DW_AT_GNU_addr_base || attr == DW_AT_addr_base) { |
563 | addr_base_ = data; |
564 | } |
565 | else if (attr == DW_AT_str_offsets_base) { |
566 | str_offsets_base_ = data; |
567 | } |
568 | else if (attr == DW_AT_GNU_ranges_base || attr == DW_AT_rnglists_base) { |
569 | ranges_base_ = data; |
570 | } |
571 | // TODO(yunlian): When we add DW_AT_ranges_base from DWARF-5, |
572 | // that base will apply to DW_AT_ranges attributes in the |
573 | // skeleton CU as well as in the .dwo/.dwp files. |
574 | else if (attr == DW_AT_ranges && is_split_dwarf_) { |
575 | data += ranges_base_; |
576 | } |
577 | handler_->ProcessAttributeUnsigned(offset, attr, form, data); |
578 | } |
579 | |
580 | // Called when we have an attribute with signed data to give to |
581 | // our handler. The attribute is for the DIE at OFFSET from the |
582 | // beginning of compilation unit, has a name of ATTR, a form of |
583 | // FORM, and the actual data of the attribute is in DATA. |
584 | void ProcessAttributeSigned(uint64_t offset, |
585 | enum DwarfAttribute attr, |
586 | enum DwarfForm form, |
587 | int64_t data) { |
588 | handler_->ProcessAttributeSigned(offset, attr, form, data); |
589 | } |
590 | |
591 | // Called when we have an attribute with a buffer of data to give to |
592 | // our handler. The attribute is for the DIE at OFFSET from the |
593 | // beginning of compilation unit, has a name of ATTR, a form of |
594 | // FORM, and the actual data of the attribute is in DATA, and the |
595 | // length of the buffer is LENGTH. |
596 | void ProcessAttributeBuffer(uint64_t offset, |
597 | enum DwarfAttribute attr, |
598 | enum DwarfForm form, |
599 | const uint8_t* data, |
600 | uint64_t len) { |
601 | handler_->ProcessAttributeBuffer(offset, attr, form, data, len); |
602 | } |
603 | |
604 | // Handles the common parts of DW_FORM_GNU_str_index, DW_FORM_strx, |
605 | // DW_FORM_strx1, DW_FORM_strx2, DW_FORM_strx3, and DW_FORM_strx4. |
606 | // Retrieves the data and calls through to ProcessAttributeString. |
607 | void ProcessFormStringIndex(uint64_t offset, |
608 | enum DwarfAttribute attr, |
609 | enum DwarfForm form, |
610 | uint64_t str_index); |
611 | |
612 | // Called when we have an attribute with string data to give to |
613 | // our handler. The attribute is for the DIE at OFFSET from the |
614 | // beginning of compilation unit, has a name of ATTR, a form of |
615 | // FORM, and the actual data of the attribute is in DATA. |
616 | // If we see a DW_AT_GNU_dwo_name attribute, save the value so |
617 | // that we can find the debug info in a .dwo or .dwp file. |
618 | void ProcessAttributeString(uint64_t offset, |
619 | enum DwarfAttribute attr, |
620 | enum DwarfForm form, |
621 | const char* data) { |
622 | if (attr == DW_AT_GNU_dwo_name || attr == DW_AT_dwo_name) |
623 | dwo_name_ = data; |
624 | handler_->ProcessAttributeString(offset, attr, form, data); |
625 | } |
626 | |
627 | // Called to handle common portions of DW_FORM_addrx and variations, as well |
628 | // as DW_FORM_GNU_addr_index. |
629 | void ProcessAttributeAddrIndex(uint64_t offset, |
630 | enum DwarfAttribute attr, |
631 | enum DwarfForm form, |
632 | uint64_t addr_index) { |
633 | const uint8_t* addr_ptr = |
634 | addr_buffer_ + addr_base_ + addr_index * reader_->AddressSize(); |
635 | ProcessAttributeUnsigned( |
636 | offset, attr, form, reader_->ReadAddress(addr_ptr)); |
637 | } |
638 | |
639 | // Processes all DIEs for this compilation unit |
640 | void ProcessDIEs(); |
641 | |
642 | // Skips the die with attributes specified in ABBREV starting at |
643 | // START, and return the new place to position the stream to. |
644 | const uint8_t* SkipDIE(const uint8_t* start, const Abbrev& abbrev); |
645 | |
646 | // Skips the attribute starting at START, with FORM, and return the |
647 | // new place to position the stream to. |
648 | const uint8_t* SkipAttribute(const uint8_t* start, enum DwarfForm form); |
649 | |
650 | // Process the actual debug information in a split DWARF file. |
651 | void ProcessSplitDwarf(); |
652 | |
653 | // Read the debug sections from a .dwo file. |
654 | void ReadDebugSectionsFromDwo(ElfReader* elf_reader, |
655 | SectionMap* sections); |
656 | |
657 | // Path of the file containing the debug information. |
658 | const string path_; |
659 | |
660 | // Offset from section start is the offset of this compilation unit |
661 | // from the beginning of the .debug_info section. |
662 | uint64_t offset_from_section_start_; |
663 | |
664 | // buffer is the buffer for our CU, starting at .debug_info + offset |
665 | // passed in from constructor. |
666 | // after_header points to right after the compilation unit header. |
667 | const uint8_t* buffer_; |
668 | uint64_t buffer_length_; |
669 | const uint8_t* ; |
670 | |
671 | // The associated ByteReader that handles endianness issues for us |
672 | ByteReader* reader_; |
673 | |
674 | // The map of sections in our file to buffers containing their data |
675 | const SectionMap& sections_; |
676 | |
677 | // The associated handler to call processing functions in |
678 | Dwarf2Handler* handler_; |
679 | |
680 | // Set of DWARF2/3 abbreviations for this compilation unit. Indexed |
681 | // by abbreviation number, which means that abbrevs_[0] is not |
682 | // valid. |
683 | std::vector<Abbrev>* abbrevs_; |
684 | |
685 | // String section buffer and length, if we have a string section. |
686 | // This is here to avoid doing a section lookup for strings in |
687 | // ProcessAttribute, which is in the hot path for DWARF2 reading. |
688 | const uint8_t* string_buffer_; |
689 | uint64_t string_buffer_length_; |
690 | |
691 | // Similarly for .debug_line_string. |
692 | const uint8_t* line_string_buffer_; |
693 | uint64_t line_string_buffer_length_; |
694 | |
695 | // String offsets section buffer and length, if we have a string offsets |
696 | // section (.debug_str_offsets or .debug_str_offsets.dwo). |
697 | const uint8_t* str_offsets_buffer_; |
698 | uint64_t str_offsets_buffer_length_; |
699 | |
700 | // Address section buffer and length, if we have an address section |
701 | // (.debug_addr). |
702 | const uint8_t* addr_buffer_; |
703 | uint64_t addr_buffer_length_; |
704 | |
705 | // Flag indicating whether this compilation unit is part of a .dwo |
706 | // or .dwp file. If true, we are reading this unit because a |
707 | // skeleton compilation unit in an executable file had a |
708 | // DW_AT_GNU_dwo_name or DW_AT_GNU_dwo_id attribute. |
709 | // In a .dwo file, we expect the string offsets section to |
710 | // have a ".dwo" suffix, and we will use the ".debug_addr" section |
711 | // associated with the skeleton compilation unit. |
712 | bool is_split_dwarf_; |
713 | |
714 | // Flag indicating if it's a Type Unit (only applicable to DWARF v5). |
715 | bool is_type_unit_; |
716 | |
717 | // The value of the DW_AT_GNU_dwo_id attribute, if any. |
718 | uint64_t dwo_id_; |
719 | |
720 | // The value of the DW_AT_GNU_type_signature attribute, if any. |
721 | uint64_t type_signature_; |
722 | |
723 | // The value of the DW_AT_GNU_type_offset attribute, if any. |
724 | size_t type_offset_; |
725 | |
726 | // The value of the DW_AT_GNU_dwo_name attribute, if any. |
727 | const char* dwo_name_; |
728 | |
729 | // If this is a split DWARF CU, the value of the DW_AT_GNU_dwo_id attribute |
730 | // from the skeleton CU. |
731 | uint64_t skeleton_dwo_id_; |
732 | |
733 | // The value of the DW_AT_GNU_ranges_base or DW_AT_rnglists_base attribute, |
734 | // if any. |
735 | uint64_t ranges_base_; |
736 | |
737 | // The value of the DW_AT_GNU_addr_base attribute, if any. |
738 | uint64_t addr_base_; |
739 | |
740 | // The value of DW_AT_str_offsets_base attribute, if any. |
741 | uint64_t str_offsets_base_; |
742 | |
743 | // True if we have already looked for a .dwp file. |
744 | bool have_checked_for_dwp_; |
745 | |
746 | // Path to the .dwp file. |
747 | string dwp_path_; |
748 | |
749 | // ByteReader for the DWP file. |
750 | std::unique_ptr<ByteReader> dwp_byte_reader_; |
751 | |
752 | // DWP reader. |
753 | std::unique_ptr<DwpReader> dwp_reader_; |
754 | }; |
755 | |
756 | // A Reader for a .dwp file. Supports the fetching of DWARF debug |
757 | // info for a given dwo_id. |
758 | // |
759 | // There are two versions of .dwp files. In both versions, the |
760 | // .dwp file is an ELF file containing only debug sections. |
761 | // In Version 1, the file contains many copies of each debug |
762 | // section, one for each .dwo file that is packaged in the .dwp |
763 | // file, and the .debug_cu_index section maps from the dwo_id |
764 | // to a set of section indexes. In Version 2, the file contains |
765 | // one of each debug section, and the .debug_cu_index section |
766 | // maps from the dwo_id to a set of offsets and lengths that |
767 | // identify each .dwo file's contribution to the larger sections. |
768 | |
769 | class DwpReader { |
770 | public: |
771 | DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader); |
772 | |
773 | ~DwpReader(); |
774 | |
775 | // Read the CU index and initialize data members. |
776 | void Initialize(); |
777 | |
778 | // Read the debug sections for the given dwo_id. |
779 | void ReadDebugSectionsForCU(uint64_t dwo_id, SectionMap* sections); |
780 | |
781 | private: |
782 | // Search a v1 hash table for "dwo_id". Returns the slot index |
783 | // where the dwo_id was found, or -1 if it was not found. |
784 | int LookupCU(uint64_t dwo_id); |
785 | |
786 | // Search a v2 hash table for "dwo_id". Returns the row index |
787 | // in the offsets and sizes tables, or 0 if it was not found. |
788 | uint32_t LookupCUv2(uint64_t dwo_id); |
789 | |
790 | // The ELF reader for the .dwp file. |
791 | ElfReader* elf_reader_; |
792 | |
793 | // The ByteReader for the .dwp file. |
794 | const ByteReader& byte_reader_; |
795 | |
796 | // Pointer to the .debug_cu_index section. |
797 | const char* cu_index_; |
798 | |
799 | // Size of the .debug_cu_index section. |
800 | size_t cu_index_size_; |
801 | |
802 | // Pointer to the .debug_str.dwo section. |
803 | const char* string_buffer_; |
804 | |
805 | // Size of the .debug_str.dwo section. |
806 | size_t string_buffer_size_; |
807 | |
808 | // Version of the .dwp file. We support versions 1 and 2 currently. |
809 | int version_; |
810 | |
811 | // Number of columns in the section tables (version 2). |
812 | unsigned int ncolumns_; |
813 | |
814 | // Number of units in the section tables (version 2). |
815 | unsigned int nunits_; |
816 | |
817 | // Number of slots in the hash table. |
818 | unsigned int nslots_; |
819 | |
820 | // Pointer to the beginning of the hash table. |
821 | const char* phash_; |
822 | |
823 | // Pointer to the beginning of the index table. |
824 | const char* pindex_; |
825 | |
826 | // Pointer to the beginning of the section index pool (version 1). |
827 | const char* shndx_pool_; |
828 | |
829 | // Pointer to the beginning of the section offset table (version 2). |
830 | const char* offset_table_; |
831 | |
832 | // Pointer to the beginning of the section size table (version 2). |
833 | const char* size_table_; |
834 | |
835 | // Contents of the sections of interest (version 2). |
836 | const char* abbrev_data_; |
837 | size_t abbrev_size_; |
838 | const char* info_data_; |
839 | size_t info_size_; |
840 | const char* str_offsets_data_; |
841 | size_t str_offsets_size_; |
842 | }; |
843 | |
844 | // This class is a reader for DWARF's Call Frame Information. CFI |
845 | // describes how to unwind stack frames --- even for functions that do |
846 | // not follow fixed conventions for saving registers, whose frame size |
847 | // varies as they execute, etc. |
848 | // |
849 | // CFI describes, at each machine instruction, how to compute the |
850 | // stack frame's base address, how to find the return address, and |
851 | // where to find the saved values of the caller's registers (if the |
852 | // callee has stashed them somewhere to free up the registers for its |
853 | // own use). |
854 | // |
855 | // For example, suppose we have a function whose machine code looks |
856 | // like this (imagine an assembly language that looks like C, for a |
857 | // machine with 32-bit registers, and a stack that grows towards lower |
858 | // addresses): |
859 | // |
860 | // func: ; entry point; return address at sp |
861 | // func+0: sp = sp - 16 ; allocate space for stack frame |
862 | // func+1: sp[12] = r0 ; save r0 at sp+12 |
863 | // ... ; other code, not frame-related |
864 | // func+10: sp -= 4; *sp = x ; push some x on the stack |
865 | // ... ; other code, not frame-related |
866 | // func+20: r0 = sp[16] ; restore saved r0 |
867 | // func+21: sp += 20 ; pop whole stack frame |
868 | // func+22: pc = *sp; sp += 4 ; pop return address and jump to it |
869 | // |
870 | // DWARF CFI is (a very compressed representation of) a table with a |
871 | // row for each machine instruction address and a column for each |
872 | // register showing how to restore it, if possible. |
873 | // |
874 | // A special column named "CFA", for "Canonical Frame Address", tells how |
875 | // to compute the base address of the frame; registers' entries may |
876 | // refer to the CFA in describing where the registers are saved. |
877 | // |
878 | // Another special column, named "RA", represents the return address. |
879 | // |
880 | // For example, here is a complete (uncompressed) table describing the |
881 | // function above: |
882 | // |
883 | // insn cfa r0 r1 ... ra |
884 | // ======================================= |
885 | // func+0: sp cfa[0] |
886 | // func+1: sp+16 cfa[0] |
887 | // func+2: sp+16 cfa[-4] cfa[0] |
888 | // func+11: sp+20 cfa[-4] cfa[0] |
889 | // func+21: sp+20 cfa[0] |
890 | // func+22: sp cfa[0] |
891 | // |
892 | // Some things to note here: |
893 | // |
894 | // - Each row describes the state of affairs *before* executing the |
895 | // instruction at the given address. Thus, the row for func+0 |
896 | // describes the state before we allocate the stack frame. In the |
897 | // next row, the formula for computing the CFA has changed, |
898 | // reflecting that allocation. |
899 | // |
900 | // - The other entries are written in terms of the CFA; this allows |
901 | // them to remain unchanged as the stack pointer gets bumped around. |
902 | // For example, the rule for recovering the return address (the "ra" |
903 | // column) remains unchanged throughout the function, even as the |
904 | // stack pointer takes on three different offsets from the return |
905 | // address. |
906 | // |
907 | // - Although we haven't shown it, most calling conventions designate |
908 | // "callee-saves" and "caller-saves" registers. The callee must |
909 | // preserve the values of callee-saves registers; if it uses them, |
910 | // it must save their original values somewhere, and restore them |
911 | // before it returns. In contrast, the callee is free to trash |
912 | // caller-saves registers; if the callee uses these, it will |
913 | // probably not bother to save them anywhere, and the CFI will |
914 | // probably mark their values as "unrecoverable". |
915 | // |
916 | // (However, since the caller cannot assume the callee was going to |
917 | // save them, caller-saves registers are probably dead in the caller |
918 | // anyway, so compilers usually don't generate CFA for caller-saves |
919 | // registers.) |
920 | // |
921 | // - Exactly where the CFA points is a matter of convention that |
922 | // depends on the architecture and ABI in use. In the example, the |
923 | // CFA is the value the stack pointer had upon entry to the |
924 | // function, pointing at the saved return address. But on the x86, |
925 | // the call frame information generated by GCC follows the |
926 | // convention that the CFA is the address *after* the saved return |
927 | // address. |
928 | // |
929 | // But by definition, the CFA remains constant throughout the |
930 | // lifetime of the frame. This makes it a useful value for other |
931 | // columns to refer to. It is also gives debuggers a useful handle |
932 | // for identifying a frame. |
933 | // |
934 | // If you look at the table above, you'll notice that a given entry is |
935 | // often the same as the one immediately above it: most instructions |
936 | // change only one or two aspects of the stack frame, if they affect |
937 | // it at all. The DWARF format takes advantage of this fact, and |
938 | // reduces the size of the data by mentioning only the addresses and |
939 | // columns at which changes take place. So for the above, DWARF CFI |
940 | // data would only actually mention the following: |
941 | // |
942 | // insn cfa r0 r1 ... ra |
943 | // ======================================= |
944 | // func+0: sp cfa[0] |
945 | // func+1: sp+16 |
946 | // func+2: cfa[-4] |
947 | // func+11: sp+20 |
948 | // func+21: r0 |
949 | // func+22: sp |
950 | // |
951 | // In fact, this is the way the parser reports CFI to the consumer: as |
952 | // a series of statements of the form, "At address X, column Y changed |
953 | // to Z," and related conventions for describing the initial state. |
954 | // |
955 | // Naturally, it would be impractical to have to scan the entire |
956 | // program's CFI, noting changes as we go, just to recover the |
957 | // unwinding rules in effect at one particular instruction. To avoid |
958 | // this, CFI data is grouped into "entries", each of which covers a |
959 | // specified range of addresses and begins with a complete statement |
960 | // of the rules for all recoverable registers at that starting |
961 | // address. Each entry typically covers a single function. |
962 | // |
963 | // Thus, to compute the contents of a given row of the table --- that |
964 | // is, rules for recovering the CFA, RA, and registers at a given |
965 | // instruction --- the consumer should find the entry that covers that |
966 | // instruction's address, start with the initial state supplied at the |
967 | // beginning of the entry, and work forward until it has processed all |
968 | // the changes up to and including those for the present instruction. |
969 | // |
970 | // There are seven kinds of rules that can appear in an entry of the |
971 | // table: |
972 | // |
973 | // - "undefined": The given register is not preserved by the callee; |
974 | // its value cannot be recovered. |
975 | // |
976 | // - "same value": This register has the same value it did in the callee. |
977 | // |
978 | // - offset(N): The register is saved at offset N from the CFA. |
979 | // |
980 | // - val_offset(N): The value the register had in the caller is the |
981 | // CFA plus offset N. (This is usually only useful for describing |
982 | // the stack pointer.) |
983 | // |
984 | // - register(R): The register's value was saved in another register R. |
985 | // |
986 | // - expression(E): Evaluating the DWARF expression E using the |
987 | // current frame's registers' values yields the address at which the |
988 | // register was saved. |
989 | // |
990 | // - val_expression(E): Evaluating the DWARF expression E using the |
991 | // current frame's registers' values yields the value the register |
992 | // had in the caller. |
993 | |
994 | class CallFrameInfo { |
995 | public: |
996 | // The different kinds of entries one finds in CFI. Used internally, |
997 | // and for error reporting. |
998 | enum EntryKind { kUnknown, kCIE, kFDE, kTerminator }; |
999 | |
1000 | // The handler class to which the parser hands the parsed call frame |
1001 | // information. Defined below. |
1002 | class Handler; |
1003 | |
1004 | // A reporter class, which CallFrameInfo uses to report errors |
1005 | // encountered while parsing call frame information. Defined below. |
1006 | class Reporter; |
1007 | |
1008 | // Create a DWARF CFI parser. BUFFER points to the contents of the |
1009 | // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes. |
1010 | // REPORTER is an error reporter the parser should use to report |
1011 | // problems. READER is a ByteReader instance that has the endianness and |
1012 | // address size set properly. Report the data we find to HANDLER. |
1013 | // |
1014 | // This class can also parse Linux C++ exception handling data, as found |
1015 | // in '.eh_frame' sections. This data is a variant of DWARF CFI that is |
1016 | // placed in loadable segments so that it is present in the program's |
1017 | // address space, and is interpreted by the C++ runtime to search the |
1018 | // call stack for a handler interested in the exception being thrown, |
1019 | // actually pop the frames, and find cleanup code to run. |
1020 | // |
1021 | // There are two differences between the call frame information described |
1022 | // in the DWARF standard and the exception handling data Linux places in |
1023 | // the .eh_frame section: |
1024 | // |
1025 | // - Exception handling data uses uses a different format for call frame |
1026 | // information entry headers. The distinguished CIE id, the way FDEs |
1027 | // refer to their CIEs, and the way the end of the series of entries is |
1028 | // determined are all slightly different. |
1029 | // |
1030 | // If the constructor's EH_FRAME argument is true, then the |
1031 | // CallFrameInfo parses the entry headers as Linux C++ exception |
1032 | // handling data. If EH_FRAME is false or omitted, the CallFrameInfo |
1033 | // parses standard DWARF call frame information. |
1034 | // |
1035 | // - Linux C++ exception handling data uses CIE augmentation strings |
1036 | // beginning with 'z' to specify the presence of additional data after |
1037 | // the CIE and FDE headers and special encodings used for addresses in |
1038 | // frame description entries. |
1039 | // |
1040 | // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or |
1041 | // exception handling data if you have supplied READER with the base |
1042 | // addresses needed to interpret the pointer encodings that 'z' |
1043 | // augmentations can specify. See the ByteReader interface for details |
1044 | // about the base addresses. See the CallFrameInfo::Handler interface |
1045 | // for details about the additional information one might find in |
1046 | // 'z'-augmented data. |
1047 | // |
1048 | // Thus: |
1049 | // |
1050 | // - If you are parsing standard DWARF CFI, as found in a .debug_frame |
1051 | // section, you should pass false for the EH_FRAME argument, or omit |
1052 | // it, and you need not worry about providing READER with the |
1053 | // additional base addresses. |
1054 | // |
1055 | // - If you want to parse Linux C++ exception handling data from a |
1056 | // .eh_frame section, you should pass EH_FRAME as true, and call |
1057 | // READER's Set*Base member functions before calling our Start method. |
1058 | // |
1059 | // - If you want to parse DWARF CFI that uses the 'z' augmentations |
1060 | // (although I don't think any toolchain ever emits such data), you |
1061 | // could pass false for EH_FRAME, but call READER's Set*Base members. |
1062 | // |
1063 | // The extensions the Linux C++ ABI makes to DWARF for exception |
1064 | // handling are described here, rather poorly: |
1065 | // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html |
1066 | // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html |
1067 | // |
1068 | // The mechanics of C++ exception handling, personality routines, |
1069 | // and language-specific data areas are described here, rather nicely: |
1070 | // http://www.codesourcery.com/public/cxx-abi/abi-eh.html |
1071 | CallFrameInfo(const uint8_t* buffer, size_t buffer_length, |
1072 | ByteReader* reader, Handler* handler, Reporter* reporter, |
1073 | bool eh_frame = false) |
1074 | : buffer_(buffer), buffer_length_(buffer_length), |
1075 | reader_(reader), handler_(handler), reporter_(reporter), |
1076 | eh_frame_(eh_frame) { } |
1077 | |
1078 | ~CallFrameInfo() { } |
1079 | |
1080 | // Parse the entries in BUFFER, reporting what we find to HANDLER. |
1081 | // Return true if we reach the end of the section successfully, or |
1082 | // false if we encounter an error. |
1083 | bool Start(); |
1084 | |
1085 | // Return the textual name of KIND. For error reporting. |
1086 | static const char* KindName(EntryKind kind); |
1087 | |
1088 | private: |
1089 | |
1090 | struct CIE; |
1091 | |
1092 | // A CFI entry, either an FDE or a CIE. |
1093 | struct Entry { |
1094 | // The starting offset of the entry in the section, for error |
1095 | // reporting. |
1096 | size_t offset; |
1097 | |
1098 | // The start of this entry in the buffer. |
1099 | const uint8_t* start; |
1100 | |
1101 | // Which kind of entry this is. |
1102 | // |
1103 | // We want to be able to use this for error reporting even while we're |
1104 | // in the midst of parsing. Error reporting code may assume that kind, |
1105 | // offset, and start fields are valid, although kind may be kUnknown. |
1106 | EntryKind kind; |
1107 | |
1108 | // The end of this entry's common prologue (initial length and id), and |
1109 | // the start of this entry's kind-specific fields. |
1110 | const uint8_t* fields; |
1111 | |
1112 | // The start of this entry's instructions. |
1113 | const uint8_t* instructions; |
1114 | |
1115 | // The address past the entry's last byte in the buffer. (Note that |
1116 | // since offset points to the entry's initial length field, and the |
1117 | // length field is the number of bytes after that field, this is not |
1118 | // simply buffer_ + offset + length.) |
1119 | const uint8_t* end; |
1120 | |
1121 | // For both DWARF CFI and .eh_frame sections, this is the CIE id in a |
1122 | // CIE, and the offset of the associated CIE in an FDE. |
1123 | uint64_t id; |
1124 | |
1125 | // The CIE that applies to this entry, if we've parsed it. If this is a |
1126 | // CIE, then this field points to this structure. |
1127 | CIE* cie; |
1128 | }; |
1129 | |
1130 | // A common information entry (CIE). |
1131 | struct CIE: public Entry { |
1132 | uint8_t version; // CFI data version number |
1133 | string augmentation; // vendor format extension markers |
1134 | uint64_t code_alignment_factor; // scale for code address adjustments |
1135 | int data_alignment_factor; // scale for stack pointer adjustments |
1136 | unsigned return_address_register; // which register holds the return addr |
1137 | |
1138 | // True if this CIE includes Linux C++ ABI 'z' augmentation data. |
1139 | bool has_z_augmentation; |
1140 | |
1141 | // Parsed 'z' augmentation data. These are meaningful only if |
1142 | // has_z_augmentation is true. |
1143 | bool has_z_lsda; // The 'z' augmentation included 'L'. |
1144 | bool has_z_personality; // The 'z' augmentation included 'P'. |
1145 | bool has_z_signal_frame; // The 'z' augmentation included 'S'. |
1146 | |
1147 | // If has_z_lsda is true, this is the encoding to be used for language- |
1148 | // specific data area pointers in FDEs. |
1149 | DwarfPointerEncoding lsda_encoding; |
1150 | |
1151 | // If has_z_personality is true, this is the encoding used for the |
1152 | // personality routine pointer in the augmentation data. |
1153 | DwarfPointerEncoding personality_encoding; |
1154 | |
1155 | // If has_z_personality is true, this is the address of the personality |
1156 | // routine --- or, if personality_encoding & DW_EH_PE_indirect, the |
1157 | // address where the personality routine's address is stored. |
1158 | uint64_t personality_address; |
1159 | |
1160 | // This is the encoding used for addresses in the FDE header and |
1161 | // in DW_CFA_set_loc instructions. This is always valid, whether |
1162 | // or not we saw a 'z' augmentation string; its default value is |
1163 | // DW_EH_PE_absptr, which is what normal DWARF CFI uses. |
1164 | DwarfPointerEncoding pointer_encoding; |
1165 | |
1166 | // These were only introduced in DWARF4, so will not be set in older |
1167 | // versions. |
1168 | uint8_t address_size; |
1169 | uint8_t segment_size; |
1170 | }; |
1171 | |
1172 | // A frame description entry (FDE). |
1173 | struct FDE: public Entry { |
1174 | uint64_t address; // start address of described code |
1175 | uint64_t size; // size of described code, in bytes |
1176 | |
1177 | // If cie->has_z_lsda is true, then this is the language-specific data |
1178 | // area's address --- or its address's address, if cie->lsda_encoding |
1179 | // has the DW_EH_PE_indirect bit set. |
1180 | uint64_t lsda_address; |
1181 | }; |
1182 | |
1183 | // Internal use. |
1184 | class Rule; |
1185 | class UndefinedRule; |
1186 | class SameValueRule; |
1187 | class OffsetRule; |
1188 | class ValOffsetRule; |
1189 | class RegisterRule; |
1190 | class ExpressionRule; |
1191 | class ValExpressionRule; |
1192 | class RuleMap; |
1193 | class State; |
1194 | |
1195 | // Parse the initial length and id of a CFI entry, either a CIE, an FDE, |
1196 | // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the |
1197 | // data to parse. On success, populate ENTRY as appropriate, and return |
1198 | // true. On failure, report the problem, and return false. Even if we |
1199 | // return false, set ENTRY->end to the first byte after the entry if we |
1200 | // were able to figure that out, or NULL if we weren't. |
1201 | bool ReadEntryPrologue(const uint8_t* cursor, Entry* entry); |
1202 | |
1203 | // Parse the fields of a CIE after the entry prologue, including any 'z' |
1204 | // augmentation data. Assume that the 'Entry' fields of CIE are |
1205 | // populated; use CIE->fields and CIE->end as the start and limit for |
1206 | // parsing. On success, populate the rest of *CIE, and return true; on |
1207 | // failure, report the problem and return false. |
1208 | bool ReadCIEFields(CIE* cie); |
1209 | |
1210 | // Parse the fields of an FDE after the entry prologue, including any 'z' |
1211 | // augmentation data. Assume that the 'Entry' fields of *FDE are |
1212 | // initialized; use FDE->fields and FDE->end as the start and limit for |
1213 | // parsing. Assume that FDE->cie is fully initialized. On success, |
1214 | // populate the rest of *FDE, and return true; on failure, report the |
1215 | // problem and return false. |
1216 | bool ReadFDEFields(FDE* fde); |
1217 | |
1218 | // Report that ENTRY is incomplete, and return false. This is just a |
1219 | // trivial wrapper for invoking reporter_->Incomplete; it provides a |
1220 | // little brevity. |
1221 | bool ReportIncomplete(Entry* entry); |
1222 | |
1223 | // Return true if ENCODING has the DW_EH_PE_indirect bit set. |
1224 | static bool IsIndirectEncoding(DwarfPointerEncoding encoding) { |
1225 | return encoding & DW_EH_PE_indirect; |
1226 | } |
1227 | |
1228 | // The contents of the DWARF .debug_info section we're parsing. |
1229 | const uint8_t* buffer_; |
1230 | size_t buffer_length_; |
1231 | |
1232 | // For reading multi-byte values with the appropriate endianness. |
1233 | ByteReader* reader_; |
1234 | |
1235 | // The handler to which we should report the data we find. |
1236 | Handler* handler_; |
1237 | |
1238 | // For reporting problems in the info we're parsing. |
1239 | Reporter* reporter_; |
1240 | |
1241 | // True if we are processing .eh_frame-format data. |
1242 | bool eh_frame_; |
1243 | }; |
1244 | |
1245 | // The handler class for CallFrameInfo. The a CFI parser calls the |
1246 | // member functions of a handler object to report the data it finds. |
1247 | class CallFrameInfo::Handler { |
1248 | public: |
1249 | // The pseudo-register number for the canonical frame address. |
1250 | enum { kCFARegister = -1 }; |
1251 | |
1252 | Handler() { } |
1253 | virtual ~Handler() { } |
1254 | |
1255 | // The parser has found CFI for the machine code at ADDRESS, |
1256 | // extending for LENGTH bytes. OFFSET is the offset of the frame |
1257 | // description entry in the section, for use in error messages. |
1258 | // VERSION is the version number of the CFI format. AUGMENTATION is |
1259 | // a string describing any producer-specific extensions present in |
1260 | // the data. RETURN_ADDRESS is the number of the register that holds |
1261 | // the address to which the function should return. |
1262 | // |
1263 | // Entry should return true to process this CFI, or false to skip to |
1264 | // the next entry. |
1265 | // |
1266 | // The parser invokes Entry for each Frame Description Entry (FDE) |
1267 | // it finds. The parser doesn't report Common Information Entries |
1268 | // to the handler explicitly; instead, if the handler elects to |
1269 | // process a given FDE, the parser reiterates the appropriate CIE's |
1270 | // contents at the beginning of the FDE's rules. |
1271 | virtual bool Entry(size_t offset, uint64_t address, uint64_t length, |
1272 | uint8_t version, const string& augmentation, |
1273 | unsigned return_address) = 0; |
1274 | |
1275 | // When the Entry function returns true, the parser calls these |
1276 | // handler functions repeatedly to describe the rules for recovering |
1277 | // registers at each instruction in the given range of machine code. |
1278 | // Immediately after a call to Entry, the handler should assume that |
1279 | // the rule for each callee-saves register is "unchanged" --- that |
1280 | // is, that the register still has the value it had in the caller. |
1281 | // |
1282 | // If a *Rule function returns true, we continue processing this entry's |
1283 | // instructions. If a *Rule function returns false, we stop evaluating |
1284 | // instructions, and skip to the next entry. Either way, we call End |
1285 | // before going on to the next entry. |
1286 | // |
1287 | // In all of these functions, if the REG parameter is kCFARegister, then |
1288 | // the rule describes how to find the canonical frame address. |
1289 | // kCFARegister may be passed as a BASE_REGISTER argument, meaning that |
1290 | // the canonical frame address should be used as the base address for the |
1291 | // computation. All other REG values will be positive. |
1292 | |
1293 | // At ADDRESS, register REG's value is not recoverable. |
1294 | virtual bool UndefinedRule(uint64_t address, int reg) = 0; |
1295 | |
1296 | // At ADDRESS, register REG's value is the same as that it had in |
1297 | // the caller. |
1298 | virtual bool SameValueRule(uint64_t address, int reg) = 0; |
1299 | |
1300 | // At ADDRESS, register REG has been saved at offset OFFSET from |
1301 | // BASE_REGISTER. |
1302 | virtual bool OffsetRule(uint64_t address, int reg, |
1303 | int base_register, long offset) = 0; |
1304 | |
1305 | // At ADDRESS, the caller's value of register REG is the current |
1306 | // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an |
1307 | // address at which the register's value is saved.) |
1308 | virtual bool ValOffsetRule(uint64_t address, int reg, |
1309 | int base_register, long offset) = 0; |
1310 | |
1311 | // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs |
1312 | // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that |
1313 | // BASE_REGISTER is the "home" for REG's saved value: if you want to |
1314 | // assign to a variable whose home is REG in the calling frame, you |
1315 | // should put the value in BASE_REGISTER. |
1316 | virtual bool RegisterRule(uint64_t address, int reg, int base_register) = 0; |
1317 | |
1318 | // At ADDRESS, the DWARF expression EXPRESSION yields the address at |
1319 | // which REG was saved. |
1320 | virtual bool ExpressionRule(uint64_t address, int reg, |
1321 | const string& expression) = 0; |
1322 | |
1323 | // At ADDRESS, the DWARF expression EXPRESSION yields the caller's |
1324 | // value for REG. (This rule doesn't provide an address at which the |
1325 | // register's value is saved.) |
1326 | virtual bool ValExpressionRule(uint64_t address, int reg, |
1327 | const string& expression) = 0; |
1328 | |
1329 | // Indicate that the rules for the address range reported by the |
1330 | // last call to Entry are complete. End should return true if |
1331 | // everything is okay, or false if an error has occurred and parsing |
1332 | // should stop. |
1333 | virtual bool End() = 0; |
1334 | |
1335 | // Handler functions for Linux C++ exception handling data. These are |
1336 | // only called if the data includes 'z' augmentation strings. |
1337 | |
1338 | // The Linux C++ ABI uses an extension of the DWARF CFI format to |
1339 | // walk the stack to propagate exceptions from the throw to the |
1340 | // appropriate catch, and do the appropriate cleanups along the way. |
1341 | // CFI entries used for exception handling have two additional data |
1342 | // associated with them: |
1343 | // |
1344 | // - The "language-specific data area" describes which exception |
1345 | // types the function has 'catch' clauses for, and indicates how |
1346 | // to go about re-entering the function at the appropriate catch |
1347 | // clause. If the exception is not caught, it describes the |
1348 | // destructors that must run before the frame is popped. |
1349 | // |
1350 | // - The "personality routine" is responsible for interpreting the |
1351 | // language-specific data area's contents, and deciding whether |
1352 | // the exception should continue to propagate down the stack, |
1353 | // perhaps after doing some cleanup for this frame, or whether the |
1354 | // exception will be caught here. |
1355 | // |
1356 | // In principle, the language-specific data area is opaque to |
1357 | // everybody but the personality routine. In practice, these values |
1358 | // may be useful or interesting to readers with extra context, and |
1359 | // we have to at least skip them anyway, so we might as well report |
1360 | // them to the handler. |
1361 | |
1362 | // This entry's exception handling personality routine's address is |
1363 | // ADDRESS. If INDIRECT is true, then ADDRESS is the address at |
1364 | // which the routine's address is stored. The default definition for |
1365 | // this handler function simply returns true, allowing parsing of |
1366 | // the entry to continue. |
1367 | virtual bool PersonalityRoutine(uint64_t address, bool indirect) { |
1368 | return true; |
1369 | } |
1370 | |
1371 | // This entry's language-specific data area (LSDA) is located at |
1372 | // ADDRESS. If INDIRECT is true, then ADDRESS is the address at |
1373 | // which the area's address is stored. The default definition for |
1374 | // this handler function simply returns true, allowing parsing of |
1375 | // the entry to continue. |
1376 | virtual bool LanguageSpecificDataArea(uint64_t address, bool indirect) { |
1377 | return true; |
1378 | } |
1379 | |
1380 | // This entry describes a signal trampoline --- this frame is the |
1381 | // caller of a signal handler. The default definition for this |
1382 | // handler function simply returns true, allowing parsing of the |
1383 | // entry to continue. |
1384 | // |
1385 | // The best description of the rationale for and meaning of signal |
1386 | // trampoline CFI entries seems to be in the GCC bug database: |
1387 | // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208 |
1388 | virtual bool SignalHandler() { return true; } |
1389 | }; |
1390 | |
1391 | // The CallFrameInfo class makes calls on an instance of this class to |
1392 | // report errors or warn about problems in the data it is parsing. The |
1393 | // default definitions of these methods print a message to stderr, but |
1394 | // you can make a derived class that overrides them. |
1395 | class CallFrameInfo::Reporter { |
1396 | public: |
1397 | // Create an error reporter which attributes troubles to the section |
1398 | // named SECTION in FILENAME. |
1399 | // |
1400 | // Normally SECTION would be .debug_frame, but the Mac puts CFI data |
1401 | // in a Mach-O section named __debug_frame. If we support |
1402 | // Linux-style exception handling data, we could be reading an |
1403 | // .eh_frame section. |
1404 | Reporter(const string& filename, |
1405 | const string& section = ".debug_frame" ) |
1406 | : filename_(filename), section_(section) { } |
1407 | virtual ~Reporter() { } |
1408 | |
1409 | // The CFI entry at OFFSET ends too early to be well-formed. KIND |
1410 | // indicates what kind of entry it is; KIND can be kUnknown if we |
1411 | // haven't parsed enough of the entry to tell yet. |
1412 | virtual void Incomplete(uint64_t offset, CallFrameInfo::EntryKind kind); |
1413 | |
1414 | // The .eh_frame data has a four-byte zero at OFFSET where the next |
1415 | // entry's length would be; this is a terminator. However, the buffer |
1416 | // length as given to the CallFrameInfo constructor says there should be |
1417 | // more data. |
1418 | virtual void EarlyEHTerminator(uint64_t offset); |
1419 | |
1420 | // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the |
1421 | // section is not that large. |
1422 | virtual void CIEPointerOutOfRange(uint64_t offset, uint64_t cie_offset); |
1423 | |
1424 | // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry |
1425 | // there is not a CIE. |
1426 | virtual void BadCIEId(uint64_t offset, uint64_t cie_offset); |
1427 | |
1428 | // The FDE at OFFSET refers to a CIE with an address size we don't know how |
1429 | // to handle. |
1430 | virtual void UnexpectedAddressSize(uint64_t offset, uint8_t address_size); |
1431 | |
1432 | // The FDE at OFFSET refers to a CIE with an segment descriptor size we |
1433 | // don't know how to handle. |
1434 | virtual void UnexpectedSegmentSize(uint64_t offset, uint8_t segment_size); |
1435 | |
1436 | // The FDE at OFFSET refers to a CIE with version number VERSION, |
1437 | // which we don't recognize. We cannot parse DWARF CFI if it uses |
1438 | // a version number we don't recognize. |
1439 | virtual void UnrecognizedVersion(uint64_t offset, int version); |
1440 | |
1441 | // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION, |
1442 | // which we don't recognize. We cannot parse DWARF CFI if it uses |
1443 | // augmentations we don't recognize. |
1444 | virtual void UnrecognizedAugmentation(uint64_t offset, |
1445 | const string& augmentation); |
1446 | |
1447 | // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not |
1448 | // a valid encoding. |
1449 | virtual void InvalidPointerEncoding(uint64_t offset, uint8_t encoding); |
1450 | |
1451 | // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends |
1452 | // on a base address which has not been supplied. |
1453 | virtual void UnusablePointerEncoding(uint64_t offset, uint8_t encoding); |
1454 | |
1455 | // The CIE at OFFSET contains a DW_CFA_restore instruction at |
1456 | // INSN_OFFSET, which may not appear in a CIE. |
1457 | virtual void RestoreInCIE(uint64_t offset, uint64_t insn_offset); |
1458 | |
1459 | // The entry at OFFSET, of kind KIND, has an unrecognized |
1460 | // instruction at INSN_OFFSET. |
1461 | virtual void BadInstruction(uint64_t offset, CallFrameInfo::EntryKind kind, |
1462 | uint64_t insn_offset); |
1463 | |
1464 | // The instruction at INSN_OFFSET in the entry at OFFSET, of kind |
1465 | // KIND, establishes a rule that cites the CFA, but we have not |
1466 | // established a CFA rule yet. |
1467 | virtual void NoCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, |
1468 | uint64_t insn_offset); |
1469 | |
1470 | // The instruction at INSN_OFFSET in the entry at OFFSET, of kind |
1471 | // KIND, is a DW_CFA_restore_state instruction, but the stack of |
1472 | // saved states is empty. |
1473 | virtual void EmptyStateStack(uint64_t offset, CallFrameInfo::EntryKind kind, |
1474 | uint64_t insn_offset); |
1475 | |
1476 | // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry |
1477 | // at OFFSET, of kind KIND, would restore a state that has no CFA |
1478 | // rule, whereas the current state does have a CFA rule. This is |
1479 | // bogus input, which the CallFrameInfo::Handler interface doesn't |
1480 | // (and shouldn't) have any way to report. |
1481 | virtual void ClearingCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, |
1482 | uint64_t insn_offset); |
1483 | |
1484 | protected: |
1485 | // The name of the file whose CFI we're reading. |
1486 | string filename_; |
1487 | |
1488 | // The name of the CFI section in that file. |
1489 | string section_; |
1490 | }; |
1491 | |
1492 | } // namespace google_breakpad |
1493 | |
1494 | #endif // UTIL_DEBUGINFO_DWARF2READER_H__ |
1495 | |