| 1 | #pragma once |
| 2 | |
| 3 | #if defined(__ELF__) && !defined(__FreeBSD__) |
| 4 | |
| 5 | /* |
| 6 | * Copyright 2012-present Facebook, Inc. |
| 7 | * |
| 8 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 9 | * you may not use this file except in compliance with the License. |
| 10 | * You may obtain a copy of the License at |
| 11 | * |
| 12 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 13 | * |
| 14 | * Unless required by applicable law or agreed to in writing, software |
| 15 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 17 | * See the License for the specific language governing permissions and |
| 18 | * limitations under the License. |
| 19 | */ |
| 20 | |
| 21 | /** This file was edited for ClickHouse. |
| 22 | */ |
| 23 | |
| 24 | #include <string> |
| 25 | #include <string_view> |
| 26 | #include <variant> |
| 27 | |
| 28 | |
| 29 | namespace DB |
| 30 | { |
| 31 | |
| 32 | class Elf; |
| 33 | |
| 34 | /** |
| 35 | * DWARF record parser. |
| 36 | * |
| 37 | * We only implement enough DWARF functionality to convert from PC address |
| 38 | * to file and line number information. |
| 39 | * |
| 40 | * This means (although they're not part of the public API of this class), we |
| 41 | * can parse Debug Information Entries (DIEs), abbreviations, attributes (of |
| 42 | * all forms), and we can interpret bytecode for the line number VM. |
| 43 | * |
| 44 | * We can interpret DWARF records of version 2, 3, or 4, although we don't |
| 45 | * actually support many of the version 4 features (such as VLIW, multiple |
| 46 | * operations per instruction) |
| 47 | * |
| 48 | * Note that the DWARF record parser does not allocate heap memory at all. |
| 49 | * This is on purpose: you can use the parser from |
| 50 | * memory-constrained situations (such as an exception handler for |
| 51 | * std::out_of_memory) If it weren't for this requirement, some things would |
| 52 | * be much simpler: the Path class would be unnecessary and would be replaced |
| 53 | * with a std::string; the list of file names in the line number VM would be |
| 54 | * kept as a vector of strings instead of re-executing the program to look for |
| 55 | * DW_LNE_define_file instructions, etc. |
| 56 | */ |
| 57 | class Dwarf final |
| 58 | { |
| 59 | // Note that Dwarf uses (and returns) std::string_view a lot. |
| 60 | // The std::string_view point within sections in the ELF file, and so will |
| 61 | // be live for as long as the passed-in Elf is live. |
| 62 | public: |
| 63 | /** Create a DWARF parser around an ELF file. */ |
| 64 | explicit Dwarf(const Elf & elf); |
| 65 | |
| 66 | /** |
| 67 | * Represent a file path a s collection of three parts (base directory, |
| 68 | * subdirectory, and file). |
| 69 | */ |
| 70 | class Path |
| 71 | { |
| 72 | public: |
| 73 | Path() {} |
| 74 | |
| 75 | Path(std::string_view baseDir, std::string_view subDir, std::string_view file); |
| 76 | |
| 77 | std::string_view baseDir() const { return baseDir_; } |
| 78 | std::string_view subDir() const { return subDir_; } |
| 79 | std::string_view file() const { return file_; } |
| 80 | |
| 81 | size_t size() const; |
| 82 | |
| 83 | /** |
| 84 | * Copy the Path to a buffer of size bufSize. |
| 85 | * |
| 86 | * toBuffer behaves like snprintf: It will always null-terminate the |
| 87 | * buffer (so it will copy at most bufSize-1 bytes), and it will return |
| 88 | * the number of bytes that would have been written if there had been |
| 89 | * enough room, so, if toBuffer returns a value >= bufSize, the output |
| 90 | * was truncated. |
| 91 | */ |
| 92 | size_t toBuffer(char * buf, size_t bufSize) const; |
| 93 | |
| 94 | void toString(std::string & dest) const; |
| 95 | std::string toString() const |
| 96 | { |
| 97 | std::string s; |
| 98 | toString(s); |
| 99 | return s; |
| 100 | } |
| 101 | |
| 102 | // TODO(tudorb): Implement operator==, operator!=; not as easy as it |
| 103 | // seems as the same path can be represented in multiple ways |
| 104 | private: |
| 105 | std::string_view baseDir_; |
| 106 | std::string_view subDir_; |
| 107 | std::string_view file_; |
| 108 | }; |
| 109 | |
| 110 | enum class LocationInfoMode |
| 111 | { |
| 112 | // Don't resolve location info. |
| 113 | DISABLED, |
| 114 | // Perform CU lookup using .debug_aranges (might be incomplete). |
| 115 | FAST, |
| 116 | // Scan all CU in .debug_info (slow!) on .debug_aranges lookup failure. |
| 117 | FULL, |
| 118 | }; |
| 119 | |
| 120 | struct LocationInfo |
| 121 | { |
| 122 | bool hasMainFile = false; |
| 123 | Path mainFile; |
| 124 | |
| 125 | bool hasFileAndLine = false; |
| 126 | Path file; |
| 127 | uint64_t line = 0; |
| 128 | }; |
| 129 | |
| 130 | /** Find the file and line number information corresponding to address. |
| 131 | * The address must be physical - offset in object file without offset in virtual memory where the object is loaded. |
| 132 | */ |
| 133 | bool findAddress(uintptr_t address, LocationInfo & info, LocationInfoMode mode) const; |
| 134 | |
| 135 | private: |
| 136 | static bool findDebugInfoOffset(uintptr_t address, std::string_view aranges, uint64_t & offset); |
| 137 | |
| 138 | void init(); |
| 139 | bool findLocation(uintptr_t address, std::string_view & infoEntry, LocationInfo & info) const; |
| 140 | |
| 141 | const Elf * elf_; |
| 142 | |
| 143 | // DWARF section made up of chunks, each prefixed with a length header. |
| 144 | // The length indicates whether the chunk is DWARF-32 or DWARF-64, which |
| 145 | // guides interpretation of "section offset" records. |
| 146 | // (yes, DWARF-32 and DWARF-64 sections may coexist in the same file) |
| 147 | class Section |
| 148 | { |
| 149 | public: |
| 150 | Section() : is64Bit_(false) {} |
| 151 | |
| 152 | explicit Section(std::string_view d); |
| 153 | |
| 154 | // Return next chunk, if any; the 4- or 12-byte length was already |
| 155 | // parsed and isn't part of the chunk. |
| 156 | bool next(std::string_view & chunk); |
| 157 | |
| 158 | // Is the current chunk 64 bit? |
| 159 | bool is64Bit() const { return is64Bit_; } |
| 160 | |
| 161 | private: |
| 162 | // Yes, 32- and 64- bit sections may coexist. Yikes! |
| 163 | bool is64Bit_; |
| 164 | std::string_view data_; |
| 165 | }; |
| 166 | |
| 167 | // Abbreviation for a Debugging Information Entry. |
| 168 | struct DIEAbbreviation |
| 169 | { |
| 170 | uint64_t code; |
| 171 | uint64_t tag; |
| 172 | bool hasChildren; |
| 173 | |
| 174 | struct Attribute |
| 175 | { |
| 176 | uint64_t name; |
| 177 | uint64_t form; |
| 178 | }; |
| 179 | |
| 180 | std::string_view attributes; |
| 181 | }; |
| 182 | |
| 183 | // Interpreter for the line number bytecode VM |
| 184 | class LineNumberVM |
| 185 | { |
| 186 | public: |
| 187 | LineNumberVM(std::string_view data, std::string_view compilationDirectory); |
| 188 | |
| 189 | bool findAddress(uintptr_t address, Path & file, uint64_t & line); |
| 190 | |
| 191 | private: |
| 192 | void init(); |
| 193 | void reset(); |
| 194 | |
| 195 | // Execute until we commit one new row to the line number matrix |
| 196 | bool next(std::string_view & program); |
| 197 | enum StepResult |
| 198 | { |
| 199 | CONTINUE, // Continue feeding opcodes |
| 200 | COMMIT, // Commit new <address, file, line> tuple |
| 201 | END, // End of sequence |
| 202 | }; |
| 203 | // Execute one opcode |
| 204 | StepResult step(std::string_view & program); |
| 205 | |
| 206 | struct FileName |
| 207 | { |
| 208 | std::string_view relativeName; |
| 209 | // 0 = current compilation directory |
| 210 | // otherwise, 1-based index in the list of include directories |
| 211 | uint64_t directoryIndex; |
| 212 | }; |
| 213 | // Read one FileName object, remove_prefix sp |
| 214 | static bool readFileName(std::string_view & sp, FileName & fn); |
| 215 | |
| 216 | // Get file name at given index; may be in the initial table |
| 217 | // (fileNames_) or defined using DW_LNE_define_file (and we reexecute |
| 218 | // enough of the program to find it, if so) |
| 219 | FileName getFileName(uint64_t index) const; |
| 220 | |
| 221 | // Get include directory at given index |
| 222 | std::string_view getIncludeDirectory(uint64_t index) const; |
| 223 | |
| 224 | // Execute opcodes until finding a DW_LNE_define_file and return true; |
| 225 | // return file at the end. |
| 226 | bool nextDefineFile(std::string_view & program, FileName & fn) const; |
| 227 | |
| 228 | // Initialization |
| 229 | bool is64Bit_; |
| 230 | std::string_view data_; |
| 231 | std::string_view compilationDirectory_; |
| 232 | |
| 233 | // Header |
| 234 | uint16_t version_; |
| 235 | uint8_t minLength_; |
| 236 | bool defaultIsStmt_; |
| 237 | int8_t lineBase_; |
| 238 | uint8_t lineRange_; |
| 239 | uint8_t opcodeBase_; |
| 240 | const uint8_t * standardOpcodeLengths_; |
| 241 | |
| 242 | std::string_view includeDirectories_; |
| 243 | size_t includeDirectoryCount_; |
| 244 | |
| 245 | std::string_view fileNames_; |
| 246 | size_t fileNameCount_; |
| 247 | |
| 248 | // State machine registers |
| 249 | uint64_t address_; |
| 250 | uint64_t file_; |
| 251 | uint64_t line_; |
| 252 | uint64_t column_; |
| 253 | bool isStmt_; |
| 254 | bool basicBlock_; |
| 255 | bool endSequence_; |
| 256 | bool prologueEnd_; |
| 257 | bool epilogueBegin_; |
| 258 | uint64_t isa_; |
| 259 | uint64_t discriminator_; |
| 260 | }; |
| 261 | |
| 262 | // Read an abbreviation from a std::string_view, return true if at end; remove_prefix sp |
| 263 | static bool readAbbreviation(std::string_view & sp, DIEAbbreviation & abbr); |
| 264 | |
| 265 | // Get abbreviation corresponding to a code, in the chunk starting at |
| 266 | // offset in the .debug_abbrev section |
| 267 | DIEAbbreviation getAbbreviation(uint64_t code, uint64_t offset) const; |
| 268 | |
| 269 | // Read one attribute <name, form> pair, remove_prefix sp; returns <0, 0> at end. |
| 270 | static DIEAbbreviation::Attribute readAttribute(std::string_view & sp); |
| 271 | |
| 272 | // Read one attribute value, remove_prefix sp |
| 273 | typedef std::variant<uint64_t, std::string_view> AttributeValue; |
| 274 | AttributeValue readAttributeValue(std::string_view & sp, uint64_t form, bool is64Bit) const; |
| 275 | |
| 276 | // Get an ELF section by name, return true if found |
| 277 | bool getSection(const char * name, std::string_view * section) const; |
| 278 | |
| 279 | // Get a string from the .debug_str section |
| 280 | std::string_view getStringFromStringSection(uint64_t offset) const; |
| 281 | |
| 282 | std::string_view info_; // .debug_info |
| 283 | std::string_view abbrev_; // .debug_abbrev |
| 284 | std::string_view aranges_; // .debug_aranges |
| 285 | std::string_view line_; // .debug_line |
| 286 | std::string_view strings_; // .debug_str |
| 287 | }; |
| 288 | |
| 289 | } |
| 290 | |
| 291 | #endif |
| 292 | |