1 | /* |
2 | * Copyright 2012-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | // DWARF record parser |
18 | |
19 | #pragma once |
20 | |
21 | #include <boost/variant.hpp> |
22 | |
23 | #include <folly/Range.h> |
24 | #include <folly/experimental/symbolizer/Elf.h> |
25 | |
26 | namespace folly { |
27 | namespace symbolizer { |
28 | |
29 | /** |
30 | * DWARF record parser. |
31 | * |
32 | * We only implement enough DWARF functionality to convert from PC address |
33 | * to file and line number information. |
34 | * |
35 | * This means (although they're not part of the public API of this class), we |
36 | * can parse Debug Information Entries (DIEs), abbreviations, attributes (of |
37 | * all forms), and we can interpret bytecode for the line number VM. |
38 | * |
39 | * We can interpret DWARF records of version 2, 3, or 4, although we don't |
40 | * actually support many of the version 4 features (such as VLIW, multiple |
41 | * operations per instruction) |
42 | * |
43 | * Note that the DWARF record parser does not allocate heap memory at all. |
44 | * This is on purpose: you can use the parser from |
45 | * memory-constrained situations (such as an exception handler for |
46 | * std::out_of_memory) If it weren't for this requirement, some things would |
47 | * be much simpler: the Path class would be unnecessary and would be replaced |
48 | * with a std::string; the list of file names in the line number VM would be |
49 | * kept as a vector of strings instead of re-executing the program to look for |
50 | * DW_LNE_define_file instructions, etc. |
51 | */ |
52 | class Dwarf { |
53 | // Note that Dwarf uses (and returns) StringPiece a lot. |
54 | // The StringPieces point within sections in the ELF file, and so will |
55 | // be live for as long as the passed-in ElfFile is live. |
56 | public: |
57 | /** Create a DWARF parser around an ELF file. */ |
58 | explicit Dwarf(const ElfFile* elf); |
59 | |
60 | /** |
61 | * Represent a file path a s collection of three parts (base directory, |
62 | * subdirectory, and file). |
63 | */ |
64 | class Path { |
65 | public: |
66 | Path() {} |
67 | |
68 | Path( |
69 | folly::StringPiece baseDir, |
70 | folly::StringPiece subDir, |
71 | folly::StringPiece file); |
72 | |
73 | folly::StringPiece baseDir() const { |
74 | return baseDir_; |
75 | } |
76 | folly::StringPiece subDir() const { |
77 | return subDir_; |
78 | } |
79 | folly::StringPiece file() const { |
80 | return file_; |
81 | } |
82 | |
83 | size_t size() const; |
84 | |
85 | /** |
86 | * Copy the Path to a buffer of size bufSize. |
87 | * |
88 | * toBuffer behaves like snprintf: It will always null-terminate the |
89 | * buffer (so it will copy at most bufSize-1 bytes), and it will return |
90 | * the number of bytes that would have been written if there had been |
91 | * enough room, so, if toBuffer returns a value >= bufSize, the output |
92 | * was truncated. |
93 | */ |
94 | size_t toBuffer(char* buf, size_t bufSize) const; |
95 | |
96 | void toString(std::string& dest) const; |
97 | std::string toString() const { |
98 | std::string s; |
99 | toString(s); |
100 | return s; |
101 | } |
102 | |
103 | // TODO(tudorb): Implement operator==, operator!=; not as easy as it |
104 | // seems as the same path can be represented in multiple ways |
105 | private: |
106 | folly::StringPiece baseDir_; |
107 | folly::StringPiece subDir_; |
108 | folly::StringPiece file_; |
109 | }; |
110 | |
111 | enum class LocationInfoMode { |
112 | // Don't resolve location info. |
113 | DISABLED, |
114 | // Perform CU lookup using .debug_aranges (might be incomplete). |
115 | FAST, |
116 | // Scan all CU in .debug_info (slow!) on .debug_aranges lookup failure. |
117 | FULL, |
118 | }; |
119 | |
120 | struct LocationInfo { |
121 | bool hasMainFile = false; |
122 | Path mainFile; |
123 | |
124 | bool hasFileAndLine = false; |
125 | Path file; |
126 | uint64_t line = 0; |
127 | }; |
128 | |
129 | /** |
130 | * Find the file and line number information corresponding to address. |
131 | */ |
132 | bool findAddress(uintptr_t address, LocationInfo& info, LocationInfoMode mode) |
133 | const; |
134 | |
135 | private: |
136 | static bool |
137 | findDebugInfoOffset(uintptr_t address, StringPiece aranges, uint64_t& offset); |
138 | |
139 | void init(); |
140 | bool findLocation( |
141 | uintptr_t address, |
142 | StringPiece& infoEntry, |
143 | LocationInfo& info) const; |
144 | |
145 | const ElfFile* elf_; |
146 | |
147 | // DWARF section made up of chunks, each prefixed with a length header. |
148 | // The length indicates whether the chunk is DWARF-32 or DWARF-64, which |
149 | // guides interpretation of "section offset" records. |
150 | // (yes, DWARF-32 and DWARF-64 sections may coexist in the same file) |
151 | class Section { |
152 | public: |
153 | Section() : is64Bit_(false) {} |
154 | |
155 | explicit Section(folly::StringPiece d); |
156 | |
157 | // Return next chunk, if any; the 4- or 12-byte length was already |
158 | // parsed and isn't part of the chunk. |
159 | bool next(folly::StringPiece& chunk); |
160 | |
161 | // Is the current chunk 64 bit? |
162 | bool is64Bit() const { |
163 | return is64Bit_; |
164 | } |
165 | |
166 | private: |
167 | // Yes, 32- and 64- bit sections may coexist. Yikes! |
168 | bool is64Bit_; |
169 | folly::StringPiece data_; |
170 | }; |
171 | |
172 | // Abbreviation for a Debugging Information Entry. |
173 | struct DIEAbbreviation { |
174 | uint64_t code; |
175 | uint64_t tag; |
176 | bool hasChildren; |
177 | |
178 | struct Attribute { |
179 | uint64_t name; |
180 | uint64_t form; |
181 | }; |
182 | |
183 | folly::StringPiece attributes; |
184 | }; |
185 | |
186 | // Interpreter for the line number bytecode VM |
187 | class LineNumberVM { |
188 | public: |
189 | LineNumberVM( |
190 | folly::StringPiece data, |
191 | folly::StringPiece compilationDirectory); |
192 | |
193 | bool findAddress(uintptr_t address, Path& file, uint64_t& line); |
194 | |
195 | private: |
196 | void init(); |
197 | void reset(); |
198 | |
199 | // Execute until we commit one new row to the line number matrix |
200 | bool next(folly::StringPiece& program); |
201 | enum StepResult { |
202 | CONTINUE, // Continue feeding opcodes |
203 | COMMIT, // Commit new <address, file, line> tuple |
204 | END, // End of sequence |
205 | }; |
206 | // Execute one opcode |
207 | StepResult step(folly::StringPiece& program); |
208 | |
209 | struct FileName { |
210 | folly::StringPiece relativeName; |
211 | // 0 = current compilation directory |
212 | // otherwise, 1-based index in the list of include directories |
213 | uint64_t directoryIndex; |
214 | }; |
215 | // Read one FileName object, advance sp |
216 | static bool readFileName(folly::StringPiece& sp, FileName& fn); |
217 | |
218 | // Get file name at given index; may be in the initial table |
219 | // (fileNames_) or defined using DW_LNE_define_file (and we reexecute |
220 | // enough of the program to find it, if so) |
221 | FileName getFileName(uint64_t index) const; |
222 | |
223 | // Get include directory at given index |
224 | folly::StringPiece getIncludeDirectory(uint64_t index) const; |
225 | |
226 | // Execute opcodes until finding a DW_LNE_define_file and return true; |
227 | // return file at the end. |
228 | bool nextDefineFile(folly::StringPiece& program, FileName& fn) const; |
229 | |
230 | // Initialization |
231 | bool is64Bit_; |
232 | folly::StringPiece data_; |
233 | folly::StringPiece compilationDirectory_; |
234 | |
235 | // Header |
236 | uint16_t version_; |
237 | uint8_t minLength_; |
238 | bool defaultIsStmt_; |
239 | int8_t lineBase_; |
240 | uint8_t lineRange_; |
241 | uint8_t opcodeBase_; |
242 | const uint8_t* standardOpcodeLengths_; |
243 | |
244 | folly::StringPiece includeDirectories_; |
245 | size_t includeDirectoryCount_; |
246 | |
247 | folly::StringPiece fileNames_; |
248 | size_t fileNameCount_; |
249 | |
250 | // State machine registers |
251 | uint64_t address_; |
252 | uint64_t file_; |
253 | uint64_t line_; |
254 | uint64_t column_; |
255 | bool isStmt_; |
256 | bool basicBlock_; |
257 | bool endSequence_; |
258 | bool prologueEnd_; |
259 | bool epilogueBegin_; |
260 | uint64_t isa_; |
261 | uint64_t discriminator_; |
262 | }; |
263 | |
264 | // Read an abbreviation from a StringPiece, return true if at end; advance sp |
265 | static bool readAbbreviation(folly::StringPiece& sp, DIEAbbreviation& abbr); |
266 | |
267 | // Get abbreviation corresponding to a code, in the chunk starting at |
268 | // offset in the .debug_abbrev section |
269 | DIEAbbreviation getAbbreviation(uint64_t code, uint64_t offset) const; |
270 | |
271 | // Read one attribute <name, form> pair, advance sp; returns <0, 0> at end. |
272 | static DIEAbbreviation::Attribute readAttribute(folly::StringPiece& sp); |
273 | |
274 | // Read one attribute value, advance sp |
275 | typedef boost::variant<uint64_t, folly::StringPiece> AttributeValue; |
276 | AttributeValue |
277 | readAttributeValue(folly::StringPiece& sp, uint64_t form, bool is64Bit) const; |
278 | |
279 | // Get an ELF section by name, return true if found |
280 | bool getSection(const char* name, folly::StringPiece* section) const; |
281 | |
282 | // Get a string from the .debug_str section |
283 | folly::StringPiece getStringFromStringSection(uint64_t offset) const; |
284 | |
285 | folly::StringPiece info_; // .debug_info |
286 | folly::StringPiece abbrev_; // .debug_abbrev |
287 | folly::StringPiece aranges_; // .debug_aranges |
288 | folly::StringPiece line_; // .debug_line |
289 | folly::StringPiece strings_; // .debug_str |
290 | }; |
291 | |
292 | inline std::ostream& operator<<(std::ostream& out, const Dwarf::Path& path) { |
293 | return out << path.toString(); |
294 | } |
295 | |
296 | } // namespace symbolizer |
297 | } // namespace folly |
298 | |