1 | #pragma once |
2 | |
3 | #if defined(__ELF__) && !defined(__FreeBSD__) |
4 | |
5 | /* |
6 | * Copyright 2012-present Facebook, Inc. |
7 | * |
8 | * Licensed under the Apache License, Version 2.0 (the "License"); |
9 | * you may not use this file except in compliance with the License. |
10 | * You may obtain a copy of the License at |
11 | * |
12 | * http://www.apache.org/licenses/LICENSE-2.0 |
13 | * |
14 | * Unless required by applicable law or agreed to in writing, software |
15 | * distributed under the License is distributed on an "AS IS" BASIS, |
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
17 | * See the License for the specific language governing permissions and |
18 | * limitations under the License. |
19 | */ |
20 | |
21 | /** This file was edited for ClickHouse. |
22 | */ |
23 | |
24 | #include <string> |
25 | #include <string_view> |
26 | #include <variant> |
27 | |
28 | |
29 | namespace DB |
30 | { |
31 | |
32 | class Elf; |
33 | |
34 | /** |
35 | * DWARF record parser. |
36 | * |
37 | * We only implement enough DWARF functionality to convert from PC address |
38 | * to file and line number information. |
39 | * |
40 | * This means (although they're not part of the public API of this class), we |
41 | * can parse Debug Information Entries (DIEs), abbreviations, attributes (of |
42 | * all forms), and we can interpret bytecode for the line number VM. |
43 | * |
44 | * We can interpret DWARF records of version 2, 3, or 4, although we don't |
45 | * actually support many of the version 4 features (such as VLIW, multiple |
46 | * operations per instruction) |
47 | * |
48 | * Note that the DWARF record parser does not allocate heap memory at all. |
49 | * This is on purpose: you can use the parser from |
50 | * memory-constrained situations (such as an exception handler for |
51 | * std::out_of_memory) If it weren't for this requirement, some things would |
52 | * be much simpler: the Path class would be unnecessary and would be replaced |
53 | * with a std::string; the list of file names in the line number VM would be |
54 | * kept as a vector of strings instead of re-executing the program to look for |
55 | * DW_LNE_define_file instructions, etc. |
56 | */ |
57 | class Dwarf final |
58 | { |
59 | // Note that Dwarf uses (and returns) std::string_view a lot. |
60 | // The std::string_view point within sections in the ELF file, and so will |
61 | // be live for as long as the passed-in Elf is live. |
62 | public: |
63 | /** Create a DWARF parser around an ELF file. */ |
64 | explicit Dwarf(const Elf & elf); |
65 | |
66 | /** |
67 | * Represent a file path a s collection of three parts (base directory, |
68 | * subdirectory, and file). |
69 | */ |
70 | class Path |
71 | { |
72 | public: |
73 | Path() {} |
74 | |
75 | Path(std::string_view baseDir, std::string_view subDir, std::string_view file); |
76 | |
77 | std::string_view baseDir() const { return baseDir_; } |
78 | std::string_view subDir() const { return subDir_; } |
79 | std::string_view file() const { return file_; } |
80 | |
81 | size_t size() const; |
82 | |
83 | /** |
84 | * Copy the Path to a buffer of size bufSize. |
85 | * |
86 | * toBuffer behaves like snprintf: It will always null-terminate the |
87 | * buffer (so it will copy at most bufSize-1 bytes), and it will return |
88 | * the number of bytes that would have been written if there had been |
89 | * enough room, so, if toBuffer returns a value >= bufSize, the output |
90 | * was truncated. |
91 | */ |
92 | size_t toBuffer(char * buf, size_t bufSize) const; |
93 | |
94 | void toString(std::string & dest) const; |
95 | std::string toString() const |
96 | { |
97 | std::string s; |
98 | toString(s); |
99 | return s; |
100 | } |
101 | |
102 | // TODO(tudorb): Implement operator==, operator!=; not as easy as it |
103 | // seems as the same path can be represented in multiple ways |
104 | private: |
105 | std::string_view baseDir_; |
106 | std::string_view subDir_; |
107 | std::string_view file_; |
108 | }; |
109 | |
110 | enum class LocationInfoMode |
111 | { |
112 | // Don't resolve location info. |
113 | DISABLED, |
114 | // Perform CU lookup using .debug_aranges (might be incomplete). |
115 | FAST, |
116 | // Scan all CU in .debug_info (slow!) on .debug_aranges lookup failure. |
117 | FULL, |
118 | }; |
119 | |
120 | struct LocationInfo |
121 | { |
122 | bool hasMainFile = false; |
123 | Path mainFile; |
124 | |
125 | bool hasFileAndLine = false; |
126 | Path file; |
127 | uint64_t line = 0; |
128 | }; |
129 | |
130 | /** Find the file and line number information corresponding to address. |
131 | * The address must be physical - offset in object file without offset in virtual memory where the object is loaded. |
132 | */ |
133 | bool findAddress(uintptr_t address, LocationInfo & info, LocationInfoMode mode) const; |
134 | |
135 | private: |
136 | static bool findDebugInfoOffset(uintptr_t address, std::string_view aranges, uint64_t & offset); |
137 | |
138 | void init(); |
139 | bool findLocation(uintptr_t address, std::string_view & infoEntry, LocationInfo & info) const; |
140 | |
141 | const Elf * elf_; |
142 | |
143 | // DWARF section made up of chunks, each prefixed with a length header. |
144 | // The length indicates whether the chunk is DWARF-32 or DWARF-64, which |
145 | // guides interpretation of "section offset" records. |
146 | // (yes, DWARF-32 and DWARF-64 sections may coexist in the same file) |
147 | class Section |
148 | { |
149 | public: |
150 | Section() : is64Bit_(false) {} |
151 | |
152 | explicit Section(std::string_view d); |
153 | |
154 | // Return next chunk, if any; the 4- or 12-byte length was already |
155 | // parsed and isn't part of the chunk. |
156 | bool next(std::string_view & chunk); |
157 | |
158 | // Is the current chunk 64 bit? |
159 | bool is64Bit() const { return is64Bit_; } |
160 | |
161 | private: |
162 | // Yes, 32- and 64- bit sections may coexist. Yikes! |
163 | bool is64Bit_; |
164 | std::string_view data_; |
165 | }; |
166 | |
167 | // Abbreviation for a Debugging Information Entry. |
168 | struct DIEAbbreviation |
169 | { |
170 | uint64_t code; |
171 | uint64_t tag; |
172 | bool hasChildren; |
173 | |
174 | struct Attribute |
175 | { |
176 | uint64_t name; |
177 | uint64_t form; |
178 | }; |
179 | |
180 | std::string_view attributes; |
181 | }; |
182 | |
183 | // Interpreter for the line number bytecode VM |
184 | class LineNumberVM |
185 | { |
186 | public: |
187 | LineNumberVM(std::string_view data, std::string_view compilationDirectory); |
188 | |
189 | bool findAddress(uintptr_t address, Path & file, uint64_t & line); |
190 | |
191 | private: |
192 | void init(); |
193 | void reset(); |
194 | |
195 | // Execute until we commit one new row to the line number matrix |
196 | bool next(std::string_view & program); |
197 | enum StepResult |
198 | { |
199 | CONTINUE, // Continue feeding opcodes |
200 | COMMIT, // Commit new <address, file, line> tuple |
201 | END, // End of sequence |
202 | }; |
203 | // Execute one opcode |
204 | StepResult step(std::string_view & program); |
205 | |
206 | struct FileName |
207 | { |
208 | std::string_view relativeName; |
209 | // 0 = current compilation directory |
210 | // otherwise, 1-based index in the list of include directories |
211 | uint64_t directoryIndex; |
212 | }; |
213 | // Read one FileName object, remove_prefix sp |
214 | static bool readFileName(std::string_view & sp, FileName & fn); |
215 | |
216 | // Get file name at given index; may be in the initial table |
217 | // (fileNames_) or defined using DW_LNE_define_file (and we reexecute |
218 | // enough of the program to find it, if so) |
219 | FileName getFileName(uint64_t index) const; |
220 | |
221 | // Get include directory at given index |
222 | std::string_view getIncludeDirectory(uint64_t index) const; |
223 | |
224 | // Execute opcodes until finding a DW_LNE_define_file and return true; |
225 | // return file at the end. |
226 | bool nextDefineFile(std::string_view & program, FileName & fn) const; |
227 | |
228 | // Initialization |
229 | bool is64Bit_; |
230 | std::string_view data_; |
231 | std::string_view compilationDirectory_; |
232 | |
233 | // Header |
234 | uint16_t version_; |
235 | uint8_t minLength_; |
236 | bool defaultIsStmt_; |
237 | int8_t lineBase_; |
238 | uint8_t lineRange_; |
239 | uint8_t opcodeBase_; |
240 | const uint8_t * standardOpcodeLengths_; |
241 | |
242 | std::string_view includeDirectories_; |
243 | size_t includeDirectoryCount_; |
244 | |
245 | std::string_view fileNames_; |
246 | size_t fileNameCount_; |
247 | |
248 | // State machine registers |
249 | uint64_t address_; |
250 | uint64_t file_; |
251 | uint64_t line_; |
252 | uint64_t column_; |
253 | bool isStmt_; |
254 | bool basicBlock_; |
255 | bool endSequence_; |
256 | bool prologueEnd_; |
257 | bool epilogueBegin_; |
258 | uint64_t isa_; |
259 | uint64_t discriminator_; |
260 | }; |
261 | |
262 | // Read an abbreviation from a std::string_view, return true if at end; remove_prefix sp |
263 | static bool readAbbreviation(std::string_view & sp, DIEAbbreviation & abbr); |
264 | |
265 | // Get abbreviation corresponding to a code, in the chunk starting at |
266 | // offset in the .debug_abbrev section |
267 | DIEAbbreviation getAbbreviation(uint64_t code, uint64_t offset) const; |
268 | |
269 | // Read one attribute <name, form> pair, remove_prefix sp; returns <0, 0> at end. |
270 | static DIEAbbreviation::Attribute readAttribute(std::string_view & sp); |
271 | |
272 | // Read one attribute value, remove_prefix sp |
273 | typedef std::variant<uint64_t, std::string_view> AttributeValue; |
274 | AttributeValue readAttributeValue(std::string_view & sp, uint64_t form, bool is64Bit) const; |
275 | |
276 | // Get an ELF section by name, return true if found |
277 | bool getSection(const char * name, std::string_view * section) const; |
278 | |
279 | // Get a string from the .debug_str section |
280 | std::string_view getStringFromStringSection(uint64_t offset) const; |
281 | |
282 | std::string_view info_; // .debug_info |
283 | std::string_view abbrev_; // .debug_abbrev |
284 | std::string_view aranges_; // .debug_aranges |
285 | std::string_view line_; // .debug_line |
286 | std::string_view strings_; // .debug_str |
287 | }; |
288 | |
289 | } |
290 | |
291 | #endif |
292 | |