| 1 | // -*- mode: C++ -*- |
| 2 | |
| 3 | // Copyright (c) 2010 Google Inc. All Rights Reserved. |
| 4 | // |
| 5 | // Redistribution and use in source and binary forms, with or without |
| 6 | // modification, are permitted provided that the following conditions are |
| 7 | // met: |
| 8 | // |
| 9 | // * Redistributions of source code must retain the above copyright |
| 10 | // notice, this list of conditions and the following disclaimer. |
| 11 | // * Redistributions in binary form must reproduce the above |
| 12 | // copyright notice, this list of conditions and the following disclaimer |
| 13 | // in the documentation and/or other materials provided with the |
| 14 | // distribution. |
| 15 | // * Neither the name of Google Inc. nor the names of its |
| 16 | // contributors may be used to endorse or promote products derived from |
| 17 | // this software without specific prior written permission. |
| 18 | // |
| 19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 | |
| 31 | // CFI reader author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> |
| 32 | |
| 33 | // This file contains definitions related to the DWARF2/3 reader and |
| 34 | // it's handler interfaces. |
| 35 | // The DWARF2/3 specification can be found at |
| 36 | // http://dwarf.freestandards.org and should be considered required |
| 37 | // reading if you wish to modify the implementation. |
| 38 | // Only a cursory attempt is made to explain terminology that is |
| 39 | // used here, as it is much better explained in the standard documents |
| 40 | #ifndef COMMON_DWARF_DWARF2READER_H__ |
| 41 | #define COMMON_DWARF_DWARF2READER_H__ |
| 42 | |
| 43 | #include <assert.h> |
| 44 | #include <stdint.h> |
| 45 | |
| 46 | #include <list> |
| 47 | #include <map> |
| 48 | #include <string> |
| 49 | #include <utility> |
| 50 | #include <vector> |
| 51 | #include <memory> |
| 52 | |
| 53 | #include "common/dwarf/bytereader.h" |
| 54 | #include "common/dwarf/dwarf2enums.h" |
| 55 | #include "common/dwarf/types.h" |
| 56 | #include "common/using_std_string.h" |
| 57 | #include "common/dwarf/elf_reader.h" |
| 58 | |
| 59 | namespace google_breakpad { |
| 60 | struct LineStateMachine; |
| 61 | class Dwarf2Handler; |
| 62 | class LineInfoHandler; |
| 63 | class DwpReader; |
| 64 | |
| 65 | // This maps from a string naming a section to a pair containing a |
| 66 | // the data for the section, and the size of the section. |
| 67 | typedef std::map<string, std::pair<const uint8_t*, uint64_t> > SectionMap; |
| 68 | |
| 69 | // Abstract away the difference between elf and mach-o section names. |
| 70 | // Elf-names use ".section_name, mach-o uses "__section_name". Pass "name" in |
| 71 | // the elf form, ".section_name". |
| 72 | const SectionMap::const_iterator GetSectionByName(const SectionMap& |
| 73 | sections, const char* name); |
| 74 | |
| 75 | // Most of the time, this struct functions as a simple attribute and form pair. |
| 76 | // However, Dwarf5 DW_FORM_implicit_const means that a form may have its value |
| 77 | // in line in the abbrev table, and that value must be associated with the |
| 78 | // pair until the attr's value is needed. |
| 79 | struct AttrForm { |
| 80 | AttrForm(enum DwarfAttribute attr, enum DwarfForm form, uint64_t value) : |
| 81 | attr_(attr), form_(form), value_(value) { } |
| 82 | |
| 83 | enum DwarfAttribute attr_; |
| 84 | enum DwarfForm form_; |
| 85 | uint64_t value_; |
| 86 | }; |
| 87 | typedef std::list<AttrForm> AttributeList; |
| 88 | typedef AttributeList::iterator AttributeIterator; |
| 89 | typedef AttributeList::const_iterator ConstAttributeIterator; |
| 90 | |
| 91 | struct { |
| 92 | uint64_t ; |
| 93 | uint16_t ; |
| 94 | uint64_t ; |
| 95 | uint8_t ; // insn stands for instructin |
| 96 | bool ; // stmt stands for statement |
| 97 | int8_t ; |
| 98 | uint8_t ; |
| 99 | uint8_t ; |
| 100 | // Use a pointer so that signalsafe_addr2line is able to use this structure |
| 101 | // without heap allocation problem. |
| 102 | std::vector<unsigned char>* ; |
| 103 | }; |
| 104 | |
| 105 | class LineInfo { |
| 106 | public: |
| 107 | |
| 108 | // Initializes a .debug_line reader. Buffer and buffer length point |
| 109 | // to the beginning and length of the line information to read. |
| 110 | // Reader is a ByteReader class that has the endianness set |
| 111 | // properly. |
| 112 | LineInfo(const uint8_t* buffer, uint64_t buffer_length, |
| 113 | ByteReader* reader, const uint8_t* string_buffer, |
| 114 | size_t string_buffer_length, const uint8_t* line_string_buffer, |
| 115 | size_t line_string_buffer_length, LineInfoHandler* handler); |
| 116 | |
| 117 | virtual ~LineInfo() { |
| 118 | if (header_.std_opcode_lengths) { |
| 119 | delete header_.std_opcode_lengths; |
| 120 | } |
| 121 | } |
| 122 | |
| 123 | // Start processing line info, and calling callbacks in the handler. |
| 124 | // Consumes the line number information for a single compilation unit. |
| 125 | // Returns the number of bytes processed. |
| 126 | uint64_t Start(); |
| 127 | |
| 128 | // Process a single line info opcode at START using the state |
| 129 | // machine at LSM. Return true if we should define a line using the |
| 130 | // current state of the line state machine. Place the length of the |
| 131 | // opcode in LEN. |
| 132 | // If LSM_PASSES_PC is non-NULL, this function also checks if the lsm |
| 133 | // passes the address of PC. In other words, LSM_PASSES_PC will be |
| 134 | // set to true, if the following condition is met. |
| 135 | // |
| 136 | // lsm's old address < PC <= lsm's new address |
| 137 | static bool ProcessOneOpcode(ByteReader* reader, |
| 138 | LineInfoHandler* handler, |
| 139 | const struct LineInfoHeader& , |
| 140 | const uint8_t* start, |
| 141 | struct LineStateMachine* lsm, |
| 142 | size_t* len, |
| 143 | uintptr pc, |
| 144 | bool* lsm_passes_pc); |
| 145 | |
| 146 | private: |
| 147 | // Reads the DWARF2/3 header for this line info. |
| 148 | void (); |
| 149 | |
| 150 | // Reads the DWARF2/3 line information |
| 151 | void ReadLines(); |
| 152 | |
| 153 | // Read the DWARF5 types and forms for the file and directory tables. |
| 154 | void ReadTypesAndForms(const uint8_t** lineptr, uint32_t* content_types, |
| 155 | uint32_t* content_forms, uint32_t max_types, |
| 156 | uint32_t* format_count); |
| 157 | |
| 158 | // Read a row from the dwarf5 LineInfo file table. |
| 159 | void ReadFileRow(const uint8_t** lineptr, const uint32_t* content_types, |
| 160 | const uint32_t* content_forms, uint32_t row, |
| 161 | uint32_t format_count); |
| 162 | |
| 163 | // Read and return the data at *lineptr according to form. Advance |
| 164 | // *lineptr appropriately. |
| 165 | uint64_t ReadUnsignedData(uint32_t form, const uint8_t** lineptr); |
| 166 | |
| 167 | // Read and return the data at *lineptr according to form. Advance |
| 168 | // *lineptr appropriately. |
| 169 | const char* ReadStringForm(uint32_t form, const uint8_t** lineptr); |
| 170 | |
| 171 | // The associated handler to call processing functions in |
| 172 | LineInfoHandler* handler_; |
| 173 | |
| 174 | // The associated ByteReader that handles endianness issues for us |
| 175 | ByteReader* reader_; |
| 176 | |
| 177 | // A DWARF line info header. This is not the same size as in the actual file, |
| 178 | // as the one in the file may have a 32 bit or 64 bit lengths |
| 179 | |
| 180 | struct LineInfoHeader ; |
| 181 | |
| 182 | // buffer is the buffer for our line info, starting at exactly where |
| 183 | // the line info to read is. after_header is the place right after |
| 184 | // the end of the line information header. |
| 185 | const uint8_t* buffer_; |
| 186 | #ifndef NDEBUG |
| 187 | uint64_t buffer_length_; |
| 188 | #endif |
| 189 | // Convenience pointers into .debug_str and .debug_line_str. These exactly |
| 190 | // correspond to those in the compilation unit. |
| 191 | const uint8_t* string_buffer_; |
| 192 | #ifndef NDEBUG |
| 193 | uint64_t string_buffer_length_; |
| 194 | #endif |
| 195 | const uint8_t* line_string_buffer_; |
| 196 | #ifndef NDEBUG |
| 197 | uint64_t line_string_buffer_length_; |
| 198 | #endif |
| 199 | |
| 200 | const uint8_t* ; |
| 201 | }; |
| 202 | |
| 203 | // This class is the main interface between the line info reader and |
| 204 | // the client. The virtual functions inside this get called for |
| 205 | // interesting events that happen during line info reading. The |
| 206 | // default implementation does nothing |
| 207 | |
| 208 | class LineInfoHandler { |
| 209 | public: |
| 210 | LineInfoHandler() { } |
| 211 | |
| 212 | virtual ~LineInfoHandler() { } |
| 213 | |
| 214 | // Called when we define a directory. NAME is the directory name, |
| 215 | // DIR_NUM is the directory number |
| 216 | virtual void DefineDir(const string& name, uint32_t dir_num) { } |
| 217 | |
| 218 | // Called when we define a filename. NAME is the filename, FILE_NUM |
| 219 | // is the file number which is -1 if the file index is the next |
| 220 | // index after the last numbered index (this happens when files are |
| 221 | // dynamically defined by the line program), DIR_NUM is the |
| 222 | // directory index for the directory name of this file, MOD_TIME is |
| 223 | // the modification time of the file, and LENGTH is the length of |
| 224 | // the file |
| 225 | virtual void DefineFile(const string& name, int32_t file_num, |
| 226 | uint32_t dir_num, uint64_t mod_time, |
| 227 | uint64_t length) { } |
| 228 | |
| 229 | // Called when the line info reader has a new line, address pair |
| 230 | // ready for us. ADDRESS is the address of the code, LENGTH is the |
| 231 | // length of its machine code in bytes, FILE_NUM is the file number |
| 232 | // containing the code, LINE_NUM is the line number in that file for |
| 233 | // the code, and COLUMN_NUM is the column number the code starts at, |
| 234 | // if we know it (0 otherwise). |
| 235 | virtual void AddLine(uint64_t address, uint64_t length, |
| 236 | uint32_t file_num, uint32_t line_num, uint32_t column_num) { } |
| 237 | }; |
| 238 | |
| 239 | class RangeListHandler { |
| 240 | public: |
| 241 | RangeListHandler() { } |
| 242 | |
| 243 | virtual ~RangeListHandler() { } |
| 244 | |
| 245 | // Add a range. |
| 246 | virtual void AddRange(uint64_t begin, uint64_t end) { }; |
| 247 | |
| 248 | // Finish processing the range list. |
| 249 | virtual void Finish() { }; |
| 250 | }; |
| 251 | |
| 252 | class RangeListReader { |
| 253 | public: |
| 254 | // Reading a range list requires quite a bit of information |
| 255 | // from the compilation unit. Package it conveniently. |
| 256 | struct CURangesInfo { |
| 257 | CURangesInfo() : |
| 258 | version_(0), base_address_(0), ranges_base_(0), |
| 259 | buffer_(nullptr), size_(0), addr_buffer_(nullptr), |
| 260 | addr_buffer_size_(0), addr_base_(0) { } |
| 261 | |
| 262 | uint16_t version_; |
| 263 | // Ranges base address. Ordinarily the CU's low_pc. |
| 264 | uint64_t base_address_; |
| 265 | // Offset into .debug_rnglists for this CU's rangelists. |
| 266 | uint64_t ranges_base_; |
| 267 | // Contents of either .debug_ranges or .debug_rnglists. |
| 268 | const uint8_t* buffer_; |
| 269 | uint64_t size_; |
| 270 | // Contents of .debug_addr. This cu's contribution starts at |
| 271 | // addr_base_ |
| 272 | const uint8_t* addr_buffer_; |
| 273 | uint64_t addr_buffer_size_; |
| 274 | uint64_t addr_base_; |
| 275 | }; |
| 276 | |
| 277 | RangeListReader(ByteReader* reader, CURangesInfo* cu_info, |
| 278 | RangeListHandler* handler) : |
| 279 | reader_(reader), cu_info_(cu_info), handler_(handler), |
| 280 | offset_array_(0) { } |
| 281 | |
| 282 | // Read ranges from cu_info as specified by form and data. |
| 283 | bool ReadRanges(enum DwarfForm form, uint64_t data); |
| 284 | |
| 285 | private: |
| 286 | // Read dwarf4 .debug_ranges at offset. |
| 287 | bool ReadDebugRanges(uint64_t offset); |
| 288 | // Read dwarf5 .debug_rngslist at offset. |
| 289 | bool ReadDebugRngList(uint64_t offset); |
| 290 | |
| 291 | // Convenience functions to handle the mechanics of reading entries in the |
| 292 | // ranges section. |
| 293 | uint64_t ReadULEB(uint64_t offset, uint64_t* value) { |
| 294 | size_t len; |
| 295 | *value = reader_->ReadUnsignedLEB128(cu_info_->buffer_ + offset, &len); |
| 296 | return len; |
| 297 | } |
| 298 | |
| 299 | uint64_t ReadAddress(uint64_t offset, uint64_t* value) { |
| 300 | *value = reader_->ReadAddress(cu_info_->buffer_ + offset); |
| 301 | return reader_->AddressSize(); |
| 302 | } |
| 303 | |
| 304 | // Read the address at this CU's addr_index in the .debug_addr section. |
| 305 | uint64_t GetAddressAtIndex(uint64_t addr_index) { |
| 306 | assert(cu_info_->addr_buffer_ != nullptr); |
| 307 | uint64_t offset = |
| 308 | cu_info_->addr_base_ + addr_index * reader_->AddressSize(); |
| 309 | assert(offset < cu_info_->addr_buffer_size_); |
| 310 | return reader_->ReadAddress(cu_info_->addr_buffer_ + offset); |
| 311 | } |
| 312 | |
| 313 | ByteReader* reader_; |
| 314 | CURangesInfo* cu_info_; |
| 315 | RangeListHandler* handler_; |
| 316 | uint64_t offset_array_; |
| 317 | }; |
| 318 | |
| 319 | // This class is the main interface between the reader and the |
| 320 | // client. The virtual functions inside this get called for |
| 321 | // interesting events that happen during DWARF2 reading. |
| 322 | // The default implementation skips everything. |
| 323 | class Dwarf2Handler { |
| 324 | public: |
| 325 | Dwarf2Handler() { } |
| 326 | |
| 327 | virtual ~Dwarf2Handler() { } |
| 328 | |
| 329 | // Start to process a compilation unit at OFFSET from the beginning of the |
| 330 | // .debug_info section. Return false if you would like to skip this |
| 331 | // compilation unit. |
| 332 | virtual bool StartCompilationUnit(uint64_t offset, uint8_t address_size, |
| 333 | uint8_t offset_size, uint64_t cu_length, |
| 334 | uint8_t dwarf_version) { return false; } |
| 335 | |
| 336 | // When processing a skeleton compilation unit, resulting from a split |
| 337 | // DWARF compilation, once the skeleton debug info has been read, |
| 338 | // the reader will call this function to ask the client if it needs |
| 339 | // the full debug info from the .dwo or .dwp file. Return true if |
| 340 | // you need it, or false to skip processing the split debug info. |
| 341 | virtual bool NeedSplitDebugInfo() { return true; } |
| 342 | |
| 343 | // Start to process a split compilation unit at OFFSET from the beginning of |
| 344 | // the debug_info section in the .dwp/.dwo file. Return false if you would |
| 345 | // like to skip this compilation unit. |
| 346 | virtual bool StartSplitCompilationUnit(uint64_t offset, |
| 347 | uint64_t cu_length) { return false; } |
| 348 | |
| 349 | // Start to process a DIE at OFFSET from the beginning of the .debug_info |
| 350 | // section. Return false if you would like to skip this DIE. |
| 351 | virtual bool StartDIE(uint64_t offset, enum DwarfTag tag) { return false; } |
| 352 | |
| 353 | // Called when we have an attribute with unsigned data to give to our |
| 354 | // handler. The attribute is for the DIE at OFFSET from the beginning of the |
| 355 | // .debug_info section. Its name is ATTR, its form is FORM, and its value is |
| 356 | // DATA. |
| 357 | virtual void ProcessAttributeUnsigned(uint64_t offset, |
| 358 | enum DwarfAttribute attr, |
| 359 | enum DwarfForm form, |
| 360 | uint64_t data) { } |
| 361 | |
| 362 | // Called when we have an attribute with signed data to give to our handler. |
| 363 | // The attribute is for the DIE at OFFSET from the beginning of the |
| 364 | // .debug_info section. Its name is ATTR, its form is FORM, and its value is |
| 365 | // DATA. |
| 366 | virtual void ProcessAttributeSigned(uint64_t offset, |
| 367 | enum DwarfAttribute attr, |
| 368 | enum DwarfForm form, |
| 369 | int64_t data) { } |
| 370 | |
| 371 | // Called when we have an attribute whose value is a reference to |
| 372 | // another DIE. The attribute belongs to the DIE at OFFSET from the |
| 373 | // beginning of the .debug_info section. Its name is ATTR, its form |
| 374 | // is FORM, and the offset of the DIE being referred to from the |
| 375 | // beginning of the .debug_info section is DATA. |
| 376 | virtual void ProcessAttributeReference(uint64_t offset, |
| 377 | enum DwarfAttribute attr, |
| 378 | enum DwarfForm form, |
| 379 | uint64_t data) { } |
| 380 | |
| 381 | // Called when we have an attribute with a buffer of data to give to our |
| 382 | // handler. The attribute is for the DIE at OFFSET from the beginning of the |
| 383 | // .debug_info section. Its name is ATTR, its form is FORM, DATA points to |
| 384 | // the buffer's contents, and its length in bytes is LENGTH. The buffer is |
| 385 | // owned by the caller, not the callee, and may not persist for very long. |
| 386 | // If you want the data to be available later, it needs to be copied. |
| 387 | virtual void ProcessAttributeBuffer(uint64_t offset, |
| 388 | enum DwarfAttribute attr, |
| 389 | enum DwarfForm form, |
| 390 | const uint8_t* data, |
| 391 | uint64_t len) { } |
| 392 | |
| 393 | // Called when we have an attribute with string data to give to our handler. |
| 394 | // The attribute is for the DIE at OFFSET from the beginning of the |
| 395 | // .debug_info section. Its name is ATTR, its form is FORM, and its value is |
| 396 | // DATA. |
| 397 | virtual void ProcessAttributeString(uint64_t offset, |
| 398 | enum DwarfAttribute attr, |
| 399 | enum DwarfForm form, |
| 400 | const string& data) { } |
| 401 | |
| 402 | // Called when we have an attribute whose value is the 64-bit signature |
| 403 | // of a type unit in the .debug_types section. OFFSET is the offset of |
| 404 | // the DIE whose attribute we're reporting. ATTR and FORM are the |
| 405 | // attribute's name and form. SIGNATURE is the type unit's signature. |
| 406 | virtual void ProcessAttributeSignature(uint64_t offset, |
| 407 | enum DwarfAttribute attr, |
| 408 | enum DwarfForm form, |
| 409 | uint64_t signature) { } |
| 410 | |
| 411 | // Called when finished processing the DIE at OFFSET. |
| 412 | // Because DWARF2/3 specifies a tree of DIEs, you may get starts |
| 413 | // before ends of the previous DIE, as we process children before |
| 414 | // ending the parent. |
| 415 | virtual void EndDIE(uint64_t offset) { } |
| 416 | |
| 417 | }; |
| 418 | |
| 419 | // The base of DWARF2/3 debug info is a DIE (Debugging Information |
| 420 | // Entry. |
| 421 | // DWARF groups DIE's into a tree and calls the root of this tree a |
| 422 | // "compilation unit". Most of the time, there is one compilation |
| 423 | // unit in the .debug_info section for each file that had debug info |
| 424 | // generated. |
| 425 | // Each DIE consists of |
| 426 | |
| 427 | // 1. a tag specifying a thing that is being described (ie |
| 428 | // DW_TAG_subprogram for functions, DW_TAG_variable for variables, etc |
| 429 | // 2. attributes (such as DW_AT_location for location in memory, |
| 430 | // DW_AT_name for name), and data for each attribute. |
| 431 | // 3. A flag saying whether the DIE has children or not |
| 432 | |
| 433 | // In order to gain some amount of compression, the format of |
| 434 | // each DIE (tag name, attributes and data forms for the attributes) |
| 435 | // are stored in a separate table called the "abbreviation table". |
| 436 | // This is done because a large number of DIEs have the exact same tag |
| 437 | // and list of attributes, but different data for those attributes. |
| 438 | // As a result, the .debug_info section is just a stream of data, and |
| 439 | // requires reading of the .debug_abbrev section to say what the data |
| 440 | // means. |
| 441 | |
| 442 | // As a warning to the user, it should be noted that the reason for |
| 443 | // using absolute offsets from the beginning of .debug_info is that |
| 444 | // DWARF2/3 supports referencing DIE's from other DIE's by their offset |
| 445 | // from either the current compilation unit start, *or* the beginning |
| 446 | // of the .debug_info section. This means it is possible to reference |
| 447 | // a DIE in one compilation unit from a DIE in another compilation |
| 448 | // unit. This style of reference is usually used to eliminate |
| 449 | // duplicated information that occurs across compilation |
| 450 | // units, such as base types, etc. GCC 3.4+ support this with |
| 451 | // -feliminate-dwarf2-dups. Other toolchains will sometimes do |
| 452 | // duplicate elimination in the linker. |
| 453 | |
| 454 | class CompilationUnit { |
| 455 | public: |
| 456 | |
| 457 | // Initialize a compilation unit. This requires a map of sections, |
| 458 | // the offset of this compilation unit in the .debug_info section, a |
| 459 | // ByteReader, and a Dwarf2Handler class to call callbacks in. |
| 460 | CompilationUnit(const string& path, const SectionMap& sections, |
| 461 | uint64_t offset, ByteReader* reader, Dwarf2Handler* handler); |
| 462 | virtual ~CompilationUnit() { |
| 463 | if (abbrevs_) delete abbrevs_; |
| 464 | } |
| 465 | |
| 466 | // Initialize a compilation unit from a .dwo or .dwp file. |
| 467 | // In this case, we need the .debug_addr section from the |
| 468 | // executable file that contains the corresponding skeleton |
| 469 | // compilation unit. We also inherit the Dwarf2Handler from |
| 470 | // the executable file, and call it as if we were still |
| 471 | // processing the original compilation unit. |
| 472 | void SetSplitDwarf(const uint8_t* addr_buffer, uint64_t addr_buffer_length, |
| 473 | uint64_t addr_base, uint64_t ranges_base, uint64_t dwo_id); |
| 474 | |
| 475 | // Begin reading a Dwarf2 compilation unit, and calling the |
| 476 | // callbacks in the Dwarf2Handler |
| 477 | |
| 478 | // Return the full length of the compilation unit, including |
| 479 | // headers. This plus the starting offset passed to the constructor |
| 480 | // is the offset of the end of the compilation unit --- and the |
| 481 | // start of the next compilation unit, if there is one. |
| 482 | uint64_t Start(); |
| 483 | |
| 484 | private: |
| 485 | |
| 486 | // This struct represents a single DWARF2/3 abbreviation |
| 487 | // The abbreviation tells how to read a DWARF2/3 DIE, and consist of a |
| 488 | // tag and a list of attributes, as well as the data form of each attribute. |
| 489 | struct Abbrev { |
| 490 | uint64_t number; |
| 491 | enum DwarfTag tag; |
| 492 | bool has_children; |
| 493 | AttributeList attributes; |
| 494 | }; |
| 495 | |
| 496 | // A DWARF2/3 compilation unit header. This is not the same size as |
| 497 | // in the actual file, as the one in the file may have a 32 bit or |
| 498 | // 64 bit length. |
| 499 | struct { |
| 500 | uint64_t ; |
| 501 | uint16_t ; |
| 502 | uint64_t ; |
| 503 | uint8_t ; |
| 504 | } ; |
| 505 | |
| 506 | // Reads the DWARF2/3 header for this compilation unit. |
| 507 | void (); |
| 508 | |
| 509 | // Reads the DWARF2/3 abbreviations for this compilation unit |
| 510 | void ReadAbbrevs(); |
| 511 | |
| 512 | // Read the abbreviation offset for this compilation unit |
| 513 | size_t ReadAbbrevOffset(const uint8_t* ); |
| 514 | |
| 515 | // Read the address size for this compilation unit |
| 516 | size_t ReadAddressSize(const uint8_t* ); |
| 517 | |
| 518 | // Read the DWO id from a split or skeleton compilation unit header |
| 519 | size_t ReadDwoId(const uint8_t* ); |
| 520 | |
| 521 | // Read the type signature from a type or split type compilation unit header |
| 522 | size_t ReadTypeSignature(const uint8_t* ); |
| 523 | |
| 524 | // Read the DWO id from a split or skeleton compilation unit header |
| 525 | size_t ReadTypeOffset(const uint8_t* ); |
| 526 | |
| 527 | // Processes a single DIE for this compilation unit and return a new |
| 528 | // pointer just past the end of it |
| 529 | const uint8_t* ProcessDIE(uint64_t dieoffset, |
| 530 | const uint8_t* start, |
| 531 | const Abbrev& abbrev); |
| 532 | |
| 533 | // Processes a single attribute and return a new pointer just past the |
| 534 | // end of it |
| 535 | const uint8_t* ProcessAttribute(uint64_t dieoffset, |
| 536 | const uint8_t* start, |
| 537 | enum DwarfAttribute attr, |
| 538 | enum DwarfForm form, |
| 539 | uint64_t implicit_const); |
| 540 | |
| 541 | // Special version of ProcessAttribute, for finding str_offsets_base and |
| 542 | // DW_AT_addr_base in DW_TAG_compile_unit, for DWARF v5. |
| 543 | const uint8_t* ProcessOffsetBaseAttribute(uint64_t dieoffset, |
| 544 | const uint8_t* start, |
| 545 | enum DwarfAttribute attr, |
| 546 | enum DwarfForm form, |
| 547 | uint64_t implicit_const); |
| 548 | |
| 549 | // Called when we have an attribute with unsigned data to give to |
| 550 | // our handler. The attribute is for the DIE at OFFSET from the |
| 551 | // beginning of compilation unit, has a name of ATTR, a form of |
| 552 | // FORM, and the actual data of the attribute is in DATA. |
| 553 | // If we see a DW_AT_GNU_dwo_id attribute, save the value so that |
| 554 | // we can find the debug info in a .dwo or .dwp file. |
| 555 | void ProcessAttributeUnsigned(uint64_t offset, |
| 556 | enum DwarfAttribute attr, |
| 557 | enum DwarfForm form, |
| 558 | uint64_t data) { |
| 559 | if (attr == DW_AT_GNU_dwo_id) { |
| 560 | dwo_id_ = data; |
| 561 | } |
| 562 | else if (attr == DW_AT_GNU_addr_base || attr == DW_AT_addr_base) { |
| 563 | addr_base_ = data; |
| 564 | } |
| 565 | else if (attr == DW_AT_str_offsets_base) { |
| 566 | str_offsets_base_ = data; |
| 567 | } |
| 568 | else if (attr == DW_AT_GNU_ranges_base || attr == DW_AT_rnglists_base) { |
| 569 | ranges_base_ = data; |
| 570 | } |
| 571 | // TODO(yunlian): When we add DW_AT_ranges_base from DWARF-5, |
| 572 | // that base will apply to DW_AT_ranges attributes in the |
| 573 | // skeleton CU as well as in the .dwo/.dwp files. |
| 574 | else if (attr == DW_AT_ranges && is_split_dwarf_) { |
| 575 | data += ranges_base_; |
| 576 | } |
| 577 | handler_->ProcessAttributeUnsigned(offset, attr, form, data); |
| 578 | } |
| 579 | |
| 580 | // Called when we have an attribute with signed data to give to |
| 581 | // our handler. The attribute is for the DIE at OFFSET from the |
| 582 | // beginning of compilation unit, has a name of ATTR, a form of |
| 583 | // FORM, and the actual data of the attribute is in DATA. |
| 584 | void ProcessAttributeSigned(uint64_t offset, |
| 585 | enum DwarfAttribute attr, |
| 586 | enum DwarfForm form, |
| 587 | int64_t data) { |
| 588 | handler_->ProcessAttributeSigned(offset, attr, form, data); |
| 589 | } |
| 590 | |
| 591 | // Called when we have an attribute with a buffer of data to give to |
| 592 | // our handler. The attribute is for the DIE at OFFSET from the |
| 593 | // beginning of compilation unit, has a name of ATTR, a form of |
| 594 | // FORM, and the actual data of the attribute is in DATA, and the |
| 595 | // length of the buffer is LENGTH. |
| 596 | void ProcessAttributeBuffer(uint64_t offset, |
| 597 | enum DwarfAttribute attr, |
| 598 | enum DwarfForm form, |
| 599 | const uint8_t* data, |
| 600 | uint64_t len) { |
| 601 | handler_->ProcessAttributeBuffer(offset, attr, form, data, len); |
| 602 | } |
| 603 | |
| 604 | // Handles the common parts of DW_FORM_GNU_str_index, DW_FORM_strx, |
| 605 | // DW_FORM_strx1, DW_FORM_strx2, DW_FORM_strx3, and DW_FORM_strx4. |
| 606 | // Retrieves the data and calls through to ProcessAttributeString. |
| 607 | void ProcessFormStringIndex(uint64_t offset, |
| 608 | enum DwarfAttribute attr, |
| 609 | enum DwarfForm form, |
| 610 | uint64_t str_index); |
| 611 | |
| 612 | // Called when we have an attribute with string data to give to |
| 613 | // our handler. The attribute is for the DIE at OFFSET from the |
| 614 | // beginning of compilation unit, has a name of ATTR, a form of |
| 615 | // FORM, and the actual data of the attribute is in DATA. |
| 616 | // If we see a DW_AT_GNU_dwo_name attribute, save the value so |
| 617 | // that we can find the debug info in a .dwo or .dwp file. |
| 618 | void ProcessAttributeString(uint64_t offset, |
| 619 | enum DwarfAttribute attr, |
| 620 | enum DwarfForm form, |
| 621 | const char* data) { |
| 622 | if (attr == DW_AT_GNU_dwo_name || attr == DW_AT_dwo_name) |
| 623 | dwo_name_ = data; |
| 624 | handler_->ProcessAttributeString(offset, attr, form, data); |
| 625 | } |
| 626 | |
| 627 | // Called to handle common portions of DW_FORM_addrx and variations, as well |
| 628 | // as DW_FORM_GNU_addr_index. |
| 629 | void ProcessAttributeAddrIndex(uint64_t offset, |
| 630 | enum DwarfAttribute attr, |
| 631 | enum DwarfForm form, |
| 632 | uint64_t addr_index) { |
| 633 | const uint8_t* addr_ptr = |
| 634 | addr_buffer_ + addr_base_ + addr_index * reader_->AddressSize(); |
| 635 | ProcessAttributeUnsigned( |
| 636 | offset, attr, form, reader_->ReadAddress(addr_ptr)); |
| 637 | } |
| 638 | |
| 639 | // Processes all DIEs for this compilation unit |
| 640 | void ProcessDIEs(); |
| 641 | |
| 642 | // Skips the die with attributes specified in ABBREV starting at |
| 643 | // START, and return the new place to position the stream to. |
| 644 | const uint8_t* SkipDIE(const uint8_t* start, const Abbrev& abbrev); |
| 645 | |
| 646 | // Skips the attribute starting at START, with FORM, and return the |
| 647 | // new place to position the stream to. |
| 648 | const uint8_t* SkipAttribute(const uint8_t* start, enum DwarfForm form); |
| 649 | |
| 650 | // Process the actual debug information in a split DWARF file. |
| 651 | void ProcessSplitDwarf(); |
| 652 | |
| 653 | // Read the debug sections from a .dwo file. |
| 654 | void ReadDebugSectionsFromDwo(ElfReader* elf_reader, |
| 655 | SectionMap* sections); |
| 656 | |
| 657 | // Path of the file containing the debug information. |
| 658 | const string path_; |
| 659 | |
| 660 | // Offset from section start is the offset of this compilation unit |
| 661 | // from the beginning of the .debug_info section. |
| 662 | uint64_t offset_from_section_start_; |
| 663 | |
| 664 | // buffer is the buffer for our CU, starting at .debug_info + offset |
| 665 | // passed in from constructor. |
| 666 | // after_header points to right after the compilation unit header. |
| 667 | const uint8_t* buffer_; |
| 668 | uint64_t buffer_length_; |
| 669 | const uint8_t* ; |
| 670 | |
| 671 | // The associated ByteReader that handles endianness issues for us |
| 672 | ByteReader* reader_; |
| 673 | |
| 674 | // The map of sections in our file to buffers containing their data |
| 675 | const SectionMap& sections_; |
| 676 | |
| 677 | // The associated handler to call processing functions in |
| 678 | Dwarf2Handler* handler_; |
| 679 | |
| 680 | // Set of DWARF2/3 abbreviations for this compilation unit. Indexed |
| 681 | // by abbreviation number, which means that abbrevs_[0] is not |
| 682 | // valid. |
| 683 | std::vector<Abbrev>* abbrevs_; |
| 684 | |
| 685 | // String section buffer and length, if we have a string section. |
| 686 | // This is here to avoid doing a section lookup for strings in |
| 687 | // ProcessAttribute, which is in the hot path for DWARF2 reading. |
| 688 | const uint8_t* string_buffer_; |
| 689 | uint64_t string_buffer_length_; |
| 690 | |
| 691 | // Similarly for .debug_line_string. |
| 692 | const uint8_t* line_string_buffer_; |
| 693 | uint64_t line_string_buffer_length_; |
| 694 | |
| 695 | // String offsets section buffer and length, if we have a string offsets |
| 696 | // section (.debug_str_offsets or .debug_str_offsets.dwo). |
| 697 | const uint8_t* str_offsets_buffer_; |
| 698 | uint64_t str_offsets_buffer_length_; |
| 699 | |
| 700 | // Address section buffer and length, if we have an address section |
| 701 | // (.debug_addr). |
| 702 | const uint8_t* addr_buffer_; |
| 703 | uint64_t addr_buffer_length_; |
| 704 | |
| 705 | // Flag indicating whether this compilation unit is part of a .dwo |
| 706 | // or .dwp file. If true, we are reading this unit because a |
| 707 | // skeleton compilation unit in an executable file had a |
| 708 | // DW_AT_GNU_dwo_name or DW_AT_GNU_dwo_id attribute. |
| 709 | // In a .dwo file, we expect the string offsets section to |
| 710 | // have a ".dwo" suffix, and we will use the ".debug_addr" section |
| 711 | // associated with the skeleton compilation unit. |
| 712 | bool is_split_dwarf_; |
| 713 | |
| 714 | // Flag indicating if it's a Type Unit (only applicable to DWARF v5). |
| 715 | bool is_type_unit_; |
| 716 | |
| 717 | // The value of the DW_AT_GNU_dwo_id attribute, if any. |
| 718 | uint64_t dwo_id_; |
| 719 | |
| 720 | // The value of the DW_AT_GNU_type_signature attribute, if any. |
| 721 | uint64_t type_signature_; |
| 722 | |
| 723 | // The value of the DW_AT_GNU_type_offset attribute, if any. |
| 724 | size_t type_offset_; |
| 725 | |
| 726 | // The value of the DW_AT_GNU_dwo_name attribute, if any. |
| 727 | const char* dwo_name_; |
| 728 | |
| 729 | // If this is a split DWARF CU, the value of the DW_AT_GNU_dwo_id attribute |
| 730 | // from the skeleton CU. |
| 731 | uint64_t skeleton_dwo_id_; |
| 732 | |
| 733 | // The value of the DW_AT_GNU_ranges_base or DW_AT_rnglists_base attribute, |
| 734 | // if any. |
| 735 | uint64_t ranges_base_; |
| 736 | |
| 737 | // The value of the DW_AT_GNU_addr_base attribute, if any. |
| 738 | uint64_t addr_base_; |
| 739 | |
| 740 | // The value of DW_AT_str_offsets_base attribute, if any. |
| 741 | uint64_t str_offsets_base_; |
| 742 | |
| 743 | // True if we have already looked for a .dwp file. |
| 744 | bool have_checked_for_dwp_; |
| 745 | |
| 746 | // Path to the .dwp file. |
| 747 | string dwp_path_; |
| 748 | |
| 749 | // ByteReader for the DWP file. |
| 750 | std::unique_ptr<ByteReader> dwp_byte_reader_; |
| 751 | |
| 752 | // DWP reader. |
| 753 | std::unique_ptr<DwpReader> dwp_reader_; |
| 754 | }; |
| 755 | |
| 756 | // A Reader for a .dwp file. Supports the fetching of DWARF debug |
| 757 | // info for a given dwo_id. |
| 758 | // |
| 759 | // There are two versions of .dwp files. In both versions, the |
| 760 | // .dwp file is an ELF file containing only debug sections. |
| 761 | // In Version 1, the file contains many copies of each debug |
| 762 | // section, one for each .dwo file that is packaged in the .dwp |
| 763 | // file, and the .debug_cu_index section maps from the dwo_id |
| 764 | // to a set of section indexes. In Version 2, the file contains |
| 765 | // one of each debug section, and the .debug_cu_index section |
| 766 | // maps from the dwo_id to a set of offsets and lengths that |
| 767 | // identify each .dwo file's contribution to the larger sections. |
| 768 | |
| 769 | class DwpReader { |
| 770 | public: |
| 771 | DwpReader(const ByteReader& byte_reader, ElfReader* elf_reader); |
| 772 | |
| 773 | ~DwpReader(); |
| 774 | |
| 775 | // Read the CU index and initialize data members. |
| 776 | void Initialize(); |
| 777 | |
| 778 | // Read the debug sections for the given dwo_id. |
| 779 | void ReadDebugSectionsForCU(uint64_t dwo_id, SectionMap* sections); |
| 780 | |
| 781 | private: |
| 782 | // Search a v1 hash table for "dwo_id". Returns the slot index |
| 783 | // where the dwo_id was found, or -1 if it was not found. |
| 784 | int LookupCU(uint64_t dwo_id); |
| 785 | |
| 786 | // Search a v2 hash table for "dwo_id". Returns the row index |
| 787 | // in the offsets and sizes tables, or 0 if it was not found. |
| 788 | uint32_t LookupCUv2(uint64_t dwo_id); |
| 789 | |
| 790 | // The ELF reader for the .dwp file. |
| 791 | ElfReader* elf_reader_; |
| 792 | |
| 793 | // The ByteReader for the .dwp file. |
| 794 | const ByteReader& byte_reader_; |
| 795 | |
| 796 | // Pointer to the .debug_cu_index section. |
| 797 | const char* cu_index_; |
| 798 | |
| 799 | // Size of the .debug_cu_index section. |
| 800 | size_t cu_index_size_; |
| 801 | |
| 802 | // Pointer to the .debug_str.dwo section. |
| 803 | const char* string_buffer_; |
| 804 | |
| 805 | // Size of the .debug_str.dwo section. |
| 806 | size_t string_buffer_size_; |
| 807 | |
| 808 | // Version of the .dwp file. We support versions 1 and 2 currently. |
| 809 | int version_; |
| 810 | |
| 811 | // Number of columns in the section tables (version 2). |
| 812 | unsigned int ncolumns_; |
| 813 | |
| 814 | // Number of units in the section tables (version 2). |
| 815 | unsigned int nunits_; |
| 816 | |
| 817 | // Number of slots in the hash table. |
| 818 | unsigned int nslots_; |
| 819 | |
| 820 | // Pointer to the beginning of the hash table. |
| 821 | const char* phash_; |
| 822 | |
| 823 | // Pointer to the beginning of the index table. |
| 824 | const char* pindex_; |
| 825 | |
| 826 | // Pointer to the beginning of the section index pool (version 1). |
| 827 | const char* shndx_pool_; |
| 828 | |
| 829 | // Pointer to the beginning of the section offset table (version 2). |
| 830 | const char* offset_table_; |
| 831 | |
| 832 | // Pointer to the beginning of the section size table (version 2). |
| 833 | const char* size_table_; |
| 834 | |
| 835 | // Contents of the sections of interest (version 2). |
| 836 | const char* abbrev_data_; |
| 837 | size_t abbrev_size_; |
| 838 | const char* info_data_; |
| 839 | size_t info_size_; |
| 840 | const char* str_offsets_data_; |
| 841 | size_t str_offsets_size_; |
| 842 | }; |
| 843 | |
| 844 | // This class is a reader for DWARF's Call Frame Information. CFI |
| 845 | // describes how to unwind stack frames --- even for functions that do |
| 846 | // not follow fixed conventions for saving registers, whose frame size |
| 847 | // varies as they execute, etc. |
| 848 | // |
| 849 | // CFI describes, at each machine instruction, how to compute the |
| 850 | // stack frame's base address, how to find the return address, and |
| 851 | // where to find the saved values of the caller's registers (if the |
| 852 | // callee has stashed them somewhere to free up the registers for its |
| 853 | // own use). |
| 854 | // |
| 855 | // For example, suppose we have a function whose machine code looks |
| 856 | // like this (imagine an assembly language that looks like C, for a |
| 857 | // machine with 32-bit registers, and a stack that grows towards lower |
| 858 | // addresses): |
| 859 | // |
| 860 | // func: ; entry point; return address at sp |
| 861 | // func+0: sp = sp - 16 ; allocate space for stack frame |
| 862 | // func+1: sp[12] = r0 ; save r0 at sp+12 |
| 863 | // ... ; other code, not frame-related |
| 864 | // func+10: sp -= 4; *sp = x ; push some x on the stack |
| 865 | // ... ; other code, not frame-related |
| 866 | // func+20: r0 = sp[16] ; restore saved r0 |
| 867 | // func+21: sp += 20 ; pop whole stack frame |
| 868 | // func+22: pc = *sp; sp += 4 ; pop return address and jump to it |
| 869 | // |
| 870 | // DWARF CFI is (a very compressed representation of) a table with a |
| 871 | // row for each machine instruction address and a column for each |
| 872 | // register showing how to restore it, if possible. |
| 873 | // |
| 874 | // A special column named "CFA", for "Canonical Frame Address", tells how |
| 875 | // to compute the base address of the frame; registers' entries may |
| 876 | // refer to the CFA in describing where the registers are saved. |
| 877 | // |
| 878 | // Another special column, named "RA", represents the return address. |
| 879 | // |
| 880 | // For example, here is a complete (uncompressed) table describing the |
| 881 | // function above: |
| 882 | // |
| 883 | // insn cfa r0 r1 ... ra |
| 884 | // ======================================= |
| 885 | // func+0: sp cfa[0] |
| 886 | // func+1: sp+16 cfa[0] |
| 887 | // func+2: sp+16 cfa[-4] cfa[0] |
| 888 | // func+11: sp+20 cfa[-4] cfa[0] |
| 889 | // func+21: sp+20 cfa[0] |
| 890 | // func+22: sp cfa[0] |
| 891 | // |
| 892 | // Some things to note here: |
| 893 | // |
| 894 | // - Each row describes the state of affairs *before* executing the |
| 895 | // instruction at the given address. Thus, the row for func+0 |
| 896 | // describes the state before we allocate the stack frame. In the |
| 897 | // next row, the formula for computing the CFA has changed, |
| 898 | // reflecting that allocation. |
| 899 | // |
| 900 | // - The other entries are written in terms of the CFA; this allows |
| 901 | // them to remain unchanged as the stack pointer gets bumped around. |
| 902 | // For example, the rule for recovering the return address (the "ra" |
| 903 | // column) remains unchanged throughout the function, even as the |
| 904 | // stack pointer takes on three different offsets from the return |
| 905 | // address. |
| 906 | // |
| 907 | // - Although we haven't shown it, most calling conventions designate |
| 908 | // "callee-saves" and "caller-saves" registers. The callee must |
| 909 | // preserve the values of callee-saves registers; if it uses them, |
| 910 | // it must save their original values somewhere, and restore them |
| 911 | // before it returns. In contrast, the callee is free to trash |
| 912 | // caller-saves registers; if the callee uses these, it will |
| 913 | // probably not bother to save them anywhere, and the CFI will |
| 914 | // probably mark their values as "unrecoverable". |
| 915 | // |
| 916 | // (However, since the caller cannot assume the callee was going to |
| 917 | // save them, caller-saves registers are probably dead in the caller |
| 918 | // anyway, so compilers usually don't generate CFA for caller-saves |
| 919 | // registers.) |
| 920 | // |
| 921 | // - Exactly where the CFA points is a matter of convention that |
| 922 | // depends on the architecture and ABI in use. In the example, the |
| 923 | // CFA is the value the stack pointer had upon entry to the |
| 924 | // function, pointing at the saved return address. But on the x86, |
| 925 | // the call frame information generated by GCC follows the |
| 926 | // convention that the CFA is the address *after* the saved return |
| 927 | // address. |
| 928 | // |
| 929 | // But by definition, the CFA remains constant throughout the |
| 930 | // lifetime of the frame. This makes it a useful value for other |
| 931 | // columns to refer to. It is also gives debuggers a useful handle |
| 932 | // for identifying a frame. |
| 933 | // |
| 934 | // If you look at the table above, you'll notice that a given entry is |
| 935 | // often the same as the one immediately above it: most instructions |
| 936 | // change only one or two aspects of the stack frame, if they affect |
| 937 | // it at all. The DWARF format takes advantage of this fact, and |
| 938 | // reduces the size of the data by mentioning only the addresses and |
| 939 | // columns at which changes take place. So for the above, DWARF CFI |
| 940 | // data would only actually mention the following: |
| 941 | // |
| 942 | // insn cfa r0 r1 ... ra |
| 943 | // ======================================= |
| 944 | // func+0: sp cfa[0] |
| 945 | // func+1: sp+16 |
| 946 | // func+2: cfa[-4] |
| 947 | // func+11: sp+20 |
| 948 | // func+21: r0 |
| 949 | // func+22: sp |
| 950 | // |
| 951 | // In fact, this is the way the parser reports CFI to the consumer: as |
| 952 | // a series of statements of the form, "At address X, column Y changed |
| 953 | // to Z," and related conventions for describing the initial state. |
| 954 | // |
| 955 | // Naturally, it would be impractical to have to scan the entire |
| 956 | // program's CFI, noting changes as we go, just to recover the |
| 957 | // unwinding rules in effect at one particular instruction. To avoid |
| 958 | // this, CFI data is grouped into "entries", each of which covers a |
| 959 | // specified range of addresses and begins with a complete statement |
| 960 | // of the rules for all recoverable registers at that starting |
| 961 | // address. Each entry typically covers a single function. |
| 962 | // |
| 963 | // Thus, to compute the contents of a given row of the table --- that |
| 964 | // is, rules for recovering the CFA, RA, and registers at a given |
| 965 | // instruction --- the consumer should find the entry that covers that |
| 966 | // instruction's address, start with the initial state supplied at the |
| 967 | // beginning of the entry, and work forward until it has processed all |
| 968 | // the changes up to and including those for the present instruction. |
| 969 | // |
| 970 | // There are seven kinds of rules that can appear in an entry of the |
| 971 | // table: |
| 972 | // |
| 973 | // - "undefined": The given register is not preserved by the callee; |
| 974 | // its value cannot be recovered. |
| 975 | // |
| 976 | // - "same value": This register has the same value it did in the callee. |
| 977 | // |
| 978 | // - offset(N): The register is saved at offset N from the CFA. |
| 979 | // |
| 980 | // - val_offset(N): The value the register had in the caller is the |
| 981 | // CFA plus offset N. (This is usually only useful for describing |
| 982 | // the stack pointer.) |
| 983 | // |
| 984 | // - register(R): The register's value was saved in another register R. |
| 985 | // |
| 986 | // - expression(E): Evaluating the DWARF expression E using the |
| 987 | // current frame's registers' values yields the address at which the |
| 988 | // register was saved. |
| 989 | // |
| 990 | // - val_expression(E): Evaluating the DWARF expression E using the |
| 991 | // current frame's registers' values yields the value the register |
| 992 | // had in the caller. |
| 993 | |
| 994 | class CallFrameInfo { |
| 995 | public: |
| 996 | // The different kinds of entries one finds in CFI. Used internally, |
| 997 | // and for error reporting. |
| 998 | enum EntryKind { kUnknown, kCIE, kFDE, kTerminator }; |
| 999 | |
| 1000 | // The handler class to which the parser hands the parsed call frame |
| 1001 | // information. Defined below. |
| 1002 | class Handler; |
| 1003 | |
| 1004 | // A reporter class, which CallFrameInfo uses to report errors |
| 1005 | // encountered while parsing call frame information. Defined below. |
| 1006 | class Reporter; |
| 1007 | |
| 1008 | // Create a DWARF CFI parser. BUFFER points to the contents of the |
| 1009 | // .debug_frame section to parse; BUFFER_LENGTH is its length in bytes. |
| 1010 | // REPORTER is an error reporter the parser should use to report |
| 1011 | // problems. READER is a ByteReader instance that has the endianness and |
| 1012 | // address size set properly. Report the data we find to HANDLER. |
| 1013 | // |
| 1014 | // This class can also parse Linux C++ exception handling data, as found |
| 1015 | // in '.eh_frame' sections. This data is a variant of DWARF CFI that is |
| 1016 | // placed in loadable segments so that it is present in the program's |
| 1017 | // address space, and is interpreted by the C++ runtime to search the |
| 1018 | // call stack for a handler interested in the exception being thrown, |
| 1019 | // actually pop the frames, and find cleanup code to run. |
| 1020 | // |
| 1021 | // There are two differences between the call frame information described |
| 1022 | // in the DWARF standard and the exception handling data Linux places in |
| 1023 | // the .eh_frame section: |
| 1024 | // |
| 1025 | // - Exception handling data uses uses a different format for call frame |
| 1026 | // information entry headers. The distinguished CIE id, the way FDEs |
| 1027 | // refer to their CIEs, and the way the end of the series of entries is |
| 1028 | // determined are all slightly different. |
| 1029 | // |
| 1030 | // If the constructor's EH_FRAME argument is true, then the |
| 1031 | // CallFrameInfo parses the entry headers as Linux C++ exception |
| 1032 | // handling data. If EH_FRAME is false or omitted, the CallFrameInfo |
| 1033 | // parses standard DWARF call frame information. |
| 1034 | // |
| 1035 | // - Linux C++ exception handling data uses CIE augmentation strings |
| 1036 | // beginning with 'z' to specify the presence of additional data after |
| 1037 | // the CIE and FDE headers and special encodings used for addresses in |
| 1038 | // frame description entries. |
| 1039 | // |
| 1040 | // CallFrameInfo can handle 'z' augmentations in either DWARF CFI or |
| 1041 | // exception handling data if you have supplied READER with the base |
| 1042 | // addresses needed to interpret the pointer encodings that 'z' |
| 1043 | // augmentations can specify. See the ByteReader interface for details |
| 1044 | // about the base addresses. See the CallFrameInfo::Handler interface |
| 1045 | // for details about the additional information one might find in |
| 1046 | // 'z'-augmented data. |
| 1047 | // |
| 1048 | // Thus: |
| 1049 | // |
| 1050 | // - If you are parsing standard DWARF CFI, as found in a .debug_frame |
| 1051 | // section, you should pass false for the EH_FRAME argument, or omit |
| 1052 | // it, and you need not worry about providing READER with the |
| 1053 | // additional base addresses. |
| 1054 | // |
| 1055 | // - If you want to parse Linux C++ exception handling data from a |
| 1056 | // .eh_frame section, you should pass EH_FRAME as true, and call |
| 1057 | // READER's Set*Base member functions before calling our Start method. |
| 1058 | // |
| 1059 | // - If you want to parse DWARF CFI that uses the 'z' augmentations |
| 1060 | // (although I don't think any toolchain ever emits such data), you |
| 1061 | // could pass false for EH_FRAME, but call READER's Set*Base members. |
| 1062 | // |
| 1063 | // The extensions the Linux C++ ABI makes to DWARF for exception |
| 1064 | // handling are described here, rather poorly: |
| 1065 | // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html |
| 1066 | // http://refspecs.linux-foundation.org/LSB_4.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html |
| 1067 | // |
| 1068 | // The mechanics of C++ exception handling, personality routines, |
| 1069 | // and language-specific data areas are described here, rather nicely: |
| 1070 | // http://www.codesourcery.com/public/cxx-abi/abi-eh.html |
| 1071 | CallFrameInfo(const uint8_t* buffer, size_t buffer_length, |
| 1072 | ByteReader* reader, Handler* handler, Reporter* reporter, |
| 1073 | bool eh_frame = false) |
| 1074 | : buffer_(buffer), buffer_length_(buffer_length), |
| 1075 | reader_(reader), handler_(handler), reporter_(reporter), |
| 1076 | eh_frame_(eh_frame) { } |
| 1077 | |
| 1078 | ~CallFrameInfo() { } |
| 1079 | |
| 1080 | // Parse the entries in BUFFER, reporting what we find to HANDLER. |
| 1081 | // Return true if we reach the end of the section successfully, or |
| 1082 | // false if we encounter an error. |
| 1083 | bool Start(); |
| 1084 | |
| 1085 | // Return the textual name of KIND. For error reporting. |
| 1086 | static const char* KindName(EntryKind kind); |
| 1087 | |
| 1088 | private: |
| 1089 | |
| 1090 | struct CIE; |
| 1091 | |
| 1092 | // A CFI entry, either an FDE or a CIE. |
| 1093 | struct Entry { |
| 1094 | // The starting offset of the entry in the section, for error |
| 1095 | // reporting. |
| 1096 | size_t offset; |
| 1097 | |
| 1098 | // The start of this entry in the buffer. |
| 1099 | const uint8_t* start; |
| 1100 | |
| 1101 | // Which kind of entry this is. |
| 1102 | // |
| 1103 | // We want to be able to use this for error reporting even while we're |
| 1104 | // in the midst of parsing. Error reporting code may assume that kind, |
| 1105 | // offset, and start fields are valid, although kind may be kUnknown. |
| 1106 | EntryKind kind; |
| 1107 | |
| 1108 | // The end of this entry's common prologue (initial length and id), and |
| 1109 | // the start of this entry's kind-specific fields. |
| 1110 | const uint8_t* fields; |
| 1111 | |
| 1112 | // The start of this entry's instructions. |
| 1113 | const uint8_t* instructions; |
| 1114 | |
| 1115 | // The address past the entry's last byte in the buffer. (Note that |
| 1116 | // since offset points to the entry's initial length field, and the |
| 1117 | // length field is the number of bytes after that field, this is not |
| 1118 | // simply buffer_ + offset + length.) |
| 1119 | const uint8_t* end; |
| 1120 | |
| 1121 | // For both DWARF CFI and .eh_frame sections, this is the CIE id in a |
| 1122 | // CIE, and the offset of the associated CIE in an FDE. |
| 1123 | uint64_t id; |
| 1124 | |
| 1125 | // The CIE that applies to this entry, if we've parsed it. If this is a |
| 1126 | // CIE, then this field points to this structure. |
| 1127 | CIE* cie; |
| 1128 | }; |
| 1129 | |
| 1130 | // A common information entry (CIE). |
| 1131 | struct CIE: public Entry { |
| 1132 | uint8_t version; // CFI data version number |
| 1133 | string augmentation; // vendor format extension markers |
| 1134 | uint64_t code_alignment_factor; // scale for code address adjustments |
| 1135 | int data_alignment_factor; // scale for stack pointer adjustments |
| 1136 | unsigned return_address_register; // which register holds the return addr |
| 1137 | |
| 1138 | // True if this CIE includes Linux C++ ABI 'z' augmentation data. |
| 1139 | bool has_z_augmentation; |
| 1140 | |
| 1141 | // Parsed 'z' augmentation data. These are meaningful only if |
| 1142 | // has_z_augmentation is true. |
| 1143 | bool has_z_lsda; // The 'z' augmentation included 'L'. |
| 1144 | bool has_z_personality; // The 'z' augmentation included 'P'. |
| 1145 | bool has_z_signal_frame; // The 'z' augmentation included 'S'. |
| 1146 | |
| 1147 | // If has_z_lsda is true, this is the encoding to be used for language- |
| 1148 | // specific data area pointers in FDEs. |
| 1149 | DwarfPointerEncoding lsda_encoding; |
| 1150 | |
| 1151 | // If has_z_personality is true, this is the encoding used for the |
| 1152 | // personality routine pointer in the augmentation data. |
| 1153 | DwarfPointerEncoding personality_encoding; |
| 1154 | |
| 1155 | // If has_z_personality is true, this is the address of the personality |
| 1156 | // routine --- or, if personality_encoding & DW_EH_PE_indirect, the |
| 1157 | // address where the personality routine's address is stored. |
| 1158 | uint64_t personality_address; |
| 1159 | |
| 1160 | // This is the encoding used for addresses in the FDE header and |
| 1161 | // in DW_CFA_set_loc instructions. This is always valid, whether |
| 1162 | // or not we saw a 'z' augmentation string; its default value is |
| 1163 | // DW_EH_PE_absptr, which is what normal DWARF CFI uses. |
| 1164 | DwarfPointerEncoding pointer_encoding; |
| 1165 | |
| 1166 | // These were only introduced in DWARF4, so will not be set in older |
| 1167 | // versions. |
| 1168 | uint8_t address_size; |
| 1169 | uint8_t segment_size; |
| 1170 | }; |
| 1171 | |
| 1172 | // A frame description entry (FDE). |
| 1173 | struct FDE: public Entry { |
| 1174 | uint64_t address; // start address of described code |
| 1175 | uint64_t size; // size of described code, in bytes |
| 1176 | |
| 1177 | // If cie->has_z_lsda is true, then this is the language-specific data |
| 1178 | // area's address --- or its address's address, if cie->lsda_encoding |
| 1179 | // has the DW_EH_PE_indirect bit set. |
| 1180 | uint64_t lsda_address; |
| 1181 | }; |
| 1182 | |
| 1183 | // Internal use. |
| 1184 | class Rule; |
| 1185 | class UndefinedRule; |
| 1186 | class SameValueRule; |
| 1187 | class OffsetRule; |
| 1188 | class ValOffsetRule; |
| 1189 | class RegisterRule; |
| 1190 | class ExpressionRule; |
| 1191 | class ValExpressionRule; |
| 1192 | class RuleMap; |
| 1193 | class State; |
| 1194 | |
| 1195 | // Parse the initial length and id of a CFI entry, either a CIE, an FDE, |
| 1196 | // or a .eh_frame end-of-data mark. CURSOR points to the beginning of the |
| 1197 | // data to parse. On success, populate ENTRY as appropriate, and return |
| 1198 | // true. On failure, report the problem, and return false. Even if we |
| 1199 | // return false, set ENTRY->end to the first byte after the entry if we |
| 1200 | // were able to figure that out, or NULL if we weren't. |
| 1201 | bool ReadEntryPrologue(const uint8_t* cursor, Entry* entry); |
| 1202 | |
| 1203 | // Parse the fields of a CIE after the entry prologue, including any 'z' |
| 1204 | // augmentation data. Assume that the 'Entry' fields of CIE are |
| 1205 | // populated; use CIE->fields and CIE->end as the start and limit for |
| 1206 | // parsing. On success, populate the rest of *CIE, and return true; on |
| 1207 | // failure, report the problem and return false. |
| 1208 | bool ReadCIEFields(CIE* cie); |
| 1209 | |
| 1210 | // Parse the fields of an FDE after the entry prologue, including any 'z' |
| 1211 | // augmentation data. Assume that the 'Entry' fields of *FDE are |
| 1212 | // initialized; use FDE->fields and FDE->end as the start and limit for |
| 1213 | // parsing. Assume that FDE->cie is fully initialized. On success, |
| 1214 | // populate the rest of *FDE, and return true; on failure, report the |
| 1215 | // problem and return false. |
| 1216 | bool ReadFDEFields(FDE* fde); |
| 1217 | |
| 1218 | // Report that ENTRY is incomplete, and return false. This is just a |
| 1219 | // trivial wrapper for invoking reporter_->Incomplete; it provides a |
| 1220 | // little brevity. |
| 1221 | bool ReportIncomplete(Entry* entry); |
| 1222 | |
| 1223 | // Return true if ENCODING has the DW_EH_PE_indirect bit set. |
| 1224 | static bool IsIndirectEncoding(DwarfPointerEncoding encoding) { |
| 1225 | return encoding & DW_EH_PE_indirect; |
| 1226 | } |
| 1227 | |
| 1228 | // The contents of the DWARF .debug_info section we're parsing. |
| 1229 | const uint8_t* buffer_; |
| 1230 | size_t buffer_length_; |
| 1231 | |
| 1232 | // For reading multi-byte values with the appropriate endianness. |
| 1233 | ByteReader* reader_; |
| 1234 | |
| 1235 | // The handler to which we should report the data we find. |
| 1236 | Handler* handler_; |
| 1237 | |
| 1238 | // For reporting problems in the info we're parsing. |
| 1239 | Reporter* reporter_; |
| 1240 | |
| 1241 | // True if we are processing .eh_frame-format data. |
| 1242 | bool eh_frame_; |
| 1243 | }; |
| 1244 | |
| 1245 | // The handler class for CallFrameInfo. The a CFI parser calls the |
| 1246 | // member functions of a handler object to report the data it finds. |
| 1247 | class CallFrameInfo::Handler { |
| 1248 | public: |
| 1249 | // The pseudo-register number for the canonical frame address. |
| 1250 | enum { kCFARegister = -1 }; |
| 1251 | |
| 1252 | Handler() { } |
| 1253 | virtual ~Handler() { } |
| 1254 | |
| 1255 | // The parser has found CFI for the machine code at ADDRESS, |
| 1256 | // extending for LENGTH bytes. OFFSET is the offset of the frame |
| 1257 | // description entry in the section, for use in error messages. |
| 1258 | // VERSION is the version number of the CFI format. AUGMENTATION is |
| 1259 | // a string describing any producer-specific extensions present in |
| 1260 | // the data. RETURN_ADDRESS is the number of the register that holds |
| 1261 | // the address to which the function should return. |
| 1262 | // |
| 1263 | // Entry should return true to process this CFI, or false to skip to |
| 1264 | // the next entry. |
| 1265 | // |
| 1266 | // The parser invokes Entry for each Frame Description Entry (FDE) |
| 1267 | // it finds. The parser doesn't report Common Information Entries |
| 1268 | // to the handler explicitly; instead, if the handler elects to |
| 1269 | // process a given FDE, the parser reiterates the appropriate CIE's |
| 1270 | // contents at the beginning of the FDE's rules. |
| 1271 | virtual bool Entry(size_t offset, uint64_t address, uint64_t length, |
| 1272 | uint8_t version, const string& augmentation, |
| 1273 | unsigned return_address) = 0; |
| 1274 | |
| 1275 | // When the Entry function returns true, the parser calls these |
| 1276 | // handler functions repeatedly to describe the rules for recovering |
| 1277 | // registers at each instruction in the given range of machine code. |
| 1278 | // Immediately after a call to Entry, the handler should assume that |
| 1279 | // the rule for each callee-saves register is "unchanged" --- that |
| 1280 | // is, that the register still has the value it had in the caller. |
| 1281 | // |
| 1282 | // If a *Rule function returns true, we continue processing this entry's |
| 1283 | // instructions. If a *Rule function returns false, we stop evaluating |
| 1284 | // instructions, and skip to the next entry. Either way, we call End |
| 1285 | // before going on to the next entry. |
| 1286 | // |
| 1287 | // In all of these functions, if the REG parameter is kCFARegister, then |
| 1288 | // the rule describes how to find the canonical frame address. |
| 1289 | // kCFARegister may be passed as a BASE_REGISTER argument, meaning that |
| 1290 | // the canonical frame address should be used as the base address for the |
| 1291 | // computation. All other REG values will be positive. |
| 1292 | |
| 1293 | // At ADDRESS, register REG's value is not recoverable. |
| 1294 | virtual bool UndefinedRule(uint64_t address, int reg) = 0; |
| 1295 | |
| 1296 | // At ADDRESS, register REG's value is the same as that it had in |
| 1297 | // the caller. |
| 1298 | virtual bool SameValueRule(uint64_t address, int reg) = 0; |
| 1299 | |
| 1300 | // At ADDRESS, register REG has been saved at offset OFFSET from |
| 1301 | // BASE_REGISTER. |
| 1302 | virtual bool OffsetRule(uint64_t address, int reg, |
| 1303 | int base_register, long offset) = 0; |
| 1304 | |
| 1305 | // At ADDRESS, the caller's value of register REG is the current |
| 1306 | // value of BASE_REGISTER plus OFFSET. (This rule doesn't provide an |
| 1307 | // address at which the register's value is saved.) |
| 1308 | virtual bool ValOffsetRule(uint64_t address, int reg, |
| 1309 | int base_register, long offset) = 0; |
| 1310 | |
| 1311 | // At ADDRESS, register REG has been saved in BASE_REGISTER. This differs |
| 1312 | // from ValOffsetRule(ADDRESS, REG, BASE_REGISTER, 0), in that |
| 1313 | // BASE_REGISTER is the "home" for REG's saved value: if you want to |
| 1314 | // assign to a variable whose home is REG in the calling frame, you |
| 1315 | // should put the value in BASE_REGISTER. |
| 1316 | virtual bool RegisterRule(uint64_t address, int reg, int base_register) = 0; |
| 1317 | |
| 1318 | // At ADDRESS, the DWARF expression EXPRESSION yields the address at |
| 1319 | // which REG was saved. |
| 1320 | virtual bool ExpressionRule(uint64_t address, int reg, |
| 1321 | const string& expression) = 0; |
| 1322 | |
| 1323 | // At ADDRESS, the DWARF expression EXPRESSION yields the caller's |
| 1324 | // value for REG. (This rule doesn't provide an address at which the |
| 1325 | // register's value is saved.) |
| 1326 | virtual bool ValExpressionRule(uint64_t address, int reg, |
| 1327 | const string& expression) = 0; |
| 1328 | |
| 1329 | // Indicate that the rules for the address range reported by the |
| 1330 | // last call to Entry are complete. End should return true if |
| 1331 | // everything is okay, or false if an error has occurred and parsing |
| 1332 | // should stop. |
| 1333 | virtual bool End() = 0; |
| 1334 | |
| 1335 | // Handler functions for Linux C++ exception handling data. These are |
| 1336 | // only called if the data includes 'z' augmentation strings. |
| 1337 | |
| 1338 | // The Linux C++ ABI uses an extension of the DWARF CFI format to |
| 1339 | // walk the stack to propagate exceptions from the throw to the |
| 1340 | // appropriate catch, and do the appropriate cleanups along the way. |
| 1341 | // CFI entries used for exception handling have two additional data |
| 1342 | // associated with them: |
| 1343 | // |
| 1344 | // - The "language-specific data area" describes which exception |
| 1345 | // types the function has 'catch' clauses for, and indicates how |
| 1346 | // to go about re-entering the function at the appropriate catch |
| 1347 | // clause. If the exception is not caught, it describes the |
| 1348 | // destructors that must run before the frame is popped. |
| 1349 | // |
| 1350 | // - The "personality routine" is responsible for interpreting the |
| 1351 | // language-specific data area's contents, and deciding whether |
| 1352 | // the exception should continue to propagate down the stack, |
| 1353 | // perhaps after doing some cleanup for this frame, or whether the |
| 1354 | // exception will be caught here. |
| 1355 | // |
| 1356 | // In principle, the language-specific data area is opaque to |
| 1357 | // everybody but the personality routine. In practice, these values |
| 1358 | // may be useful or interesting to readers with extra context, and |
| 1359 | // we have to at least skip them anyway, so we might as well report |
| 1360 | // them to the handler. |
| 1361 | |
| 1362 | // This entry's exception handling personality routine's address is |
| 1363 | // ADDRESS. If INDIRECT is true, then ADDRESS is the address at |
| 1364 | // which the routine's address is stored. The default definition for |
| 1365 | // this handler function simply returns true, allowing parsing of |
| 1366 | // the entry to continue. |
| 1367 | virtual bool PersonalityRoutine(uint64_t address, bool indirect) { |
| 1368 | return true; |
| 1369 | } |
| 1370 | |
| 1371 | // This entry's language-specific data area (LSDA) is located at |
| 1372 | // ADDRESS. If INDIRECT is true, then ADDRESS is the address at |
| 1373 | // which the area's address is stored. The default definition for |
| 1374 | // this handler function simply returns true, allowing parsing of |
| 1375 | // the entry to continue. |
| 1376 | virtual bool LanguageSpecificDataArea(uint64_t address, bool indirect) { |
| 1377 | return true; |
| 1378 | } |
| 1379 | |
| 1380 | // This entry describes a signal trampoline --- this frame is the |
| 1381 | // caller of a signal handler. The default definition for this |
| 1382 | // handler function simply returns true, allowing parsing of the |
| 1383 | // entry to continue. |
| 1384 | // |
| 1385 | // The best description of the rationale for and meaning of signal |
| 1386 | // trampoline CFI entries seems to be in the GCC bug database: |
| 1387 | // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26208 |
| 1388 | virtual bool SignalHandler() { return true; } |
| 1389 | }; |
| 1390 | |
| 1391 | // The CallFrameInfo class makes calls on an instance of this class to |
| 1392 | // report errors or warn about problems in the data it is parsing. The |
| 1393 | // default definitions of these methods print a message to stderr, but |
| 1394 | // you can make a derived class that overrides them. |
| 1395 | class CallFrameInfo::Reporter { |
| 1396 | public: |
| 1397 | // Create an error reporter which attributes troubles to the section |
| 1398 | // named SECTION in FILENAME. |
| 1399 | // |
| 1400 | // Normally SECTION would be .debug_frame, but the Mac puts CFI data |
| 1401 | // in a Mach-O section named __debug_frame. If we support |
| 1402 | // Linux-style exception handling data, we could be reading an |
| 1403 | // .eh_frame section. |
| 1404 | Reporter(const string& filename, |
| 1405 | const string& section = ".debug_frame" ) |
| 1406 | : filename_(filename), section_(section) { } |
| 1407 | virtual ~Reporter() { } |
| 1408 | |
| 1409 | // The CFI entry at OFFSET ends too early to be well-formed. KIND |
| 1410 | // indicates what kind of entry it is; KIND can be kUnknown if we |
| 1411 | // haven't parsed enough of the entry to tell yet. |
| 1412 | virtual void Incomplete(uint64_t offset, CallFrameInfo::EntryKind kind); |
| 1413 | |
| 1414 | // The .eh_frame data has a four-byte zero at OFFSET where the next |
| 1415 | // entry's length would be; this is a terminator. However, the buffer |
| 1416 | // length as given to the CallFrameInfo constructor says there should be |
| 1417 | // more data. |
| 1418 | virtual void EarlyEHTerminator(uint64_t offset); |
| 1419 | |
| 1420 | // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the |
| 1421 | // section is not that large. |
| 1422 | virtual void CIEPointerOutOfRange(uint64_t offset, uint64_t cie_offset); |
| 1423 | |
| 1424 | // The FDE at OFFSET refers to the CIE at CIE_OFFSET, but the entry |
| 1425 | // there is not a CIE. |
| 1426 | virtual void BadCIEId(uint64_t offset, uint64_t cie_offset); |
| 1427 | |
| 1428 | // The FDE at OFFSET refers to a CIE with an address size we don't know how |
| 1429 | // to handle. |
| 1430 | virtual void UnexpectedAddressSize(uint64_t offset, uint8_t address_size); |
| 1431 | |
| 1432 | // The FDE at OFFSET refers to a CIE with an segment descriptor size we |
| 1433 | // don't know how to handle. |
| 1434 | virtual void UnexpectedSegmentSize(uint64_t offset, uint8_t segment_size); |
| 1435 | |
| 1436 | // The FDE at OFFSET refers to a CIE with version number VERSION, |
| 1437 | // which we don't recognize. We cannot parse DWARF CFI if it uses |
| 1438 | // a version number we don't recognize. |
| 1439 | virtual void UnrecognizedVersion(uint64_t offset, int version); |
| 1440 | |
| 1441 | // The FDE at OFFSET refers to a CIE with augmentation AUGMENTATION, |
| 1442 | // which we don't recognize. We cannot parse DWARF CFI if it uses |
| 1443 | // augmentations we don't recognize. |
| 1444 | virtual void UnrecognizedAugmentation(uint64_t offset, |
| 1445 | const string& augmentation); |
| 1446 | |
| 1447 | // The pointer encoding ENCODING, specified by the CIE at OFFSET, is not |
| 1448 | // a valid encoding. |
| 1449 | virtual void InvalidPointerEncoding(uint64_t offset, uint8_t encoding); |
| 1450 | |
| 1451 | // The pointer encoding ENCODING, specified by the CIE at OFFSET, depends |
| 1452 | // on a base address which has not been supplied. |
| 1453 | virtual void UnusablePointerEncoding(uint64_t offset, uint8_t encoding); |
| 1454 | |
| 1455 | // The CIE at OFFSET contains a DW_CFA_restore instruction at |
| 1456 | // INSN_OFFSET, which may not appear in a CIE. |
| 1457 | virtual void RestoreInCIE(uint64_t offset, uint64_t insn_offset); |
| 1458 | |
| 1459 | // The entry at OFFSET, of kind KIND, has an unrecognized |
| 1460 | // instruction at INSN_OFFSET. |
| 1461 | virtual void BadInstruction(uint64_t offset, CallFrameInfo::EntryKind kind, |
| 1462 | uint64_t insn_offset); |
| 1463 | |
| 1464 | // The instruction at INSN_OFFSET in the entry at OFFSET, of kind |
| 1465 | // KIND, establishes a rule that cites the CFA, but we have not |
| 1466 | // established a CFA rule yet. |
| 1467 | virtual void NoCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, |
| 1468 | uint64_t insn_offset); |
| 1469 | |
| 1470 | // The instruction at INSN_OFFSET in the entry at OFFSET, of kind |
| 1471 | // KIND, is a DW_CFA_restore_state instruction, but the stack of |
| 1472 | // saved states is empty. |
| 1473 | virtual void EmptyStateStack(uint64_t offset, CallFrameInfo::EntryKind kind, |
| 1474 | uint64_t insn_offset); |
| 1475 | |
| 1476 | // The DW_CFA_remember_state instruction at INSN_OFFSET in the entry |
| 1477 | // at OFFSET, of kind KIND, would restore a state that has no CFA |
| 1478 | // rule, whereas the current state does have a CFA rule. This is |
| 1479 | // bogus input, which the CallFrameInfo::Handler interface doesn't |
| 1480 | // (and shouldn't) have any way to report. |
| 1481 | virtual void ClearingCFARule(uint64_t offset, CallFrameInfo::EntryKind kind, |
| 1482 | uint64_t insn_offset); |
| 1483 | |
| 1484 | protected: |
| 1485 | // The name of the file whose CFI we're reading. |
| 1486 | string filename_; |
| 1487 | |
| 1488 | // The name of the CFI section in that file. |
| 1489 | string section_; |
| 1490 | }; |
| 1491 | |
| 1492 | } // namespace google_breakpad |
| 1493 | |
| 1494 | #endif // UTIL_DEBUGINFO_DWARF2READER_H__ |
| 1495 | |