1/*
2 * Copyright 2012-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17// DWARF record parser
18
19#pragma once
20
21#include <boost/variant.hpp>
22
23#include <folly/Range.h>
24#include <folly/experimental/symbolizer/Elf.h>
25
26namespace folly {
27namespace symbolizer {
28
29/**
30 * DWARF record parser.
31 *
32 * We only implement enough DWARF functionality to convert from PC address
33 * to file and line number information.
34 *
35 * This means (although they're not part of the public API of this class), we
36 * can parse Debug Information Entries (DIEs), abbreviations, attributes (of
37 * all forms), and we can interpret bytecode for the line number VM.
38 *
39 * We can interpret DWARF records of version 2, 3, or 4, although we don't
40 * actually support many of the version 4 features (such as VLIW, multiple
41 * operations per instruction)
42 *
43 * Note that the DWARF record parser does not allocate heap memory at all.
44 * This is on purpose: you can use the parser from
45 * memory-constrained situations (such as an exception handler for
46 * std::out_of_memory) If it weren't for this requirement, some things would
47 * be much simpler: the Path class would be unnecessary and would be replaced
48 * with a std::string; the list of file names in the line number VM would be
49 * kept as a vector of strings instead of re-executing the program to look for
50 * DW_LNE_define_file instructions, etc.
51 */
52class Dwarf {
53 // Note that Dwarf uses (and returns) StringPiece a lot.
54 // The StringPieces point within sections in the ELF file, and so will
55 // be live for as long as the passed-in ElfFile is live.
56 public:
57 /** Create a DWARF parser around an ELF file. */
58 explicit Dwarf(const ElfFile* elf);
59
60 /**
61 * Represent a file path a s collection of three parts (base directory,
62 * subdirectory, and file).
63 */
64 class Path {
65 public:
66 Path() {}
67
68 Path(
69 folly::StringPiece baseDir,
70 folly::StringPiece subDir,
71 folly::StringPiece file);
72
73 folly::StringPiece baseDir() const {
74 return baseDir_;
75 }
76 folly::StringPiece subDir() const {
77 return subDir_;
78 }
79 folly::StringPiece file() const {
80 return file_;
81 }
82
83 size_t size() const;
84
85 /**
86 * Copy the Path to a buffer of size bufSize.
87 *
88 * toBuffer behaves like snprintf: It will always null-terminate the
89 * buffer (so it will copy at most bufSize-1 bytes), and it will return
90 * the number of bytes that would have been written if there had been
91 * enough room, so, if toBuffer returns a value >= bufSize, the output
92 * was truncated.
93 */
94 size_t toBuffer(char* buf, size_t bufSize) const;
95
96 void toString(std::string& dest) const;
97 std::string toString() const {
98 std::string s;
99 toString(s);
100 return s;
101 }
102
103 // TODO(tudorb): Implement operator==, operator!=; not as easy as it
104 // seems as the same path can be represented in multiple ways
105 private:
106 folly::StringPiece baseDir_;
107 folly::StringPiece subDir_;
108 folly::StringPiece file_;
109 };
110
111 enum class LocationInfoMode {
112 // Don't resolve location info.
113 DISABLED,
114 // Perform CU lookup using .debug_aranges (might be incomplete).
115 FAST,
116 // Scan all CU in .debug_info (slow!) on .debug_aranges lookup failure.
117 FULL,
118 };
119
120 struct LocationInfo {
121 bool hasMainFile = false;
122 Path mainFile;
123
124 bool hasFileAndLine = false;
125 Path file;
126 uint64_t line = 0;
127 };
128
129 /**
130 * Find the file and line number information corresponding to address.
131 */
132 bool findAddress(uintptr_t address, LocationInfo& info, LocationInfoMode mode)
133 const;
134
135 private:
136 static bool
137 findDebugInfoOffset(uintptr_t address, StringPiece aranges, uint64_t& offset);
138
139 void init();
140 bool findLocation(
141 uintptr_t address,
142 StringPiece& infoEntry,
143 LocationInfo& info) const;
144
145 const ElfFile* elf_;
146
147 // DWARF section made up of chunks, each prefixed with a length header.
148 // The length indicates whether the chunk is DWARF-32 or DWARF-64, which
149 // guides interpretation of "section offset" records.
150 // (yes, DWARF-32 and DWARF-64 sections may coexist in the same file)
151 class Section {
152 public:
153 Section() : is64Bit_(false) {}
154
155 explicit Section(folly::StringPiece d);
156
157 // Return next chunk, if any; the 4- or 12-byte length was already
158 // parsed and isn't part of the chunk.
159 bool next(folly::StringPiece& chunk);
160
161 // Is the current chunk 64 bit?
162 bool is64Bit() const {
163 return is64Bit_;
164 }
165
166 private:
167 // Yes, 32- and 64- bit sections may coexist. Yikes!
168 bool is64Bit_;
169 folly::StringPiece data_;
170 };
171
172 // Abbreviation for a Debugging Information Entry.
173 struct DIEAbbreviation {
174 uint64_t code;
175 uint64_t tag;
176 bool hasChildren;
177
178 struct Attribute {
179 uint64_t name;
180 uint64_t form;
181 };
182
183 folly::StringPiece attributes;
184 };
185
186 // Interpreter for the line number bytecode VM
187 class LineNumberVM {
188 public:
189 LineNumberVM(
190 folly::StringPiece data,
191 folly::StringPiece compilationDirectory);
192
193 bool findAddress(uintptr_t address, Path& file, uint64_t& line);
194
195 private:
196 void init();
197 void reset();
198
199 // Execute until we commit one new row to the line number matrix
200 bool next(folly::StringPiece& program);
201 enum StepResult {
202 CONTINUE, // Continue feeding opcodes
203 COMMIT, // Commit new <address, file, line> tuple
204 END, // End of sequence
205 };
206 // Execute one opcode
207 StepResult step(folly::StringPiece& program);
208
209 struct FileName {
210 folly::StringPiece relativeName;
211 // 0 = current compilation directory
212 // otherwise, 1-based index in the list of include directories
213 uint64_t directoryIndex;
214 };
215 // Read one FileName object, advance sp
216 static bool readFileName(folly::StringPiece& sp, FileName& fn);
217
218 // Get file name at given index; may be in the initial table
219 // (fileNames_) or defined using DW_LNE_define_file (and we reexecute
220 // enough of the program to find it, if so)
221 FileName getFileName(uint64_t index) const;
222
223 // Get include directory at given index
224 folly::StringPiece getIncludeDirectory(uint64_t index) const;
225
226 // Execute opcodes until finding a DW_LNE_define_file and return true;
227 // return file at the end.
228 bool nextDefineFile(folly::StringPiece& program, FileName& fn) const;
229
230 // Initialization
231 bool is64Bit_;
232 folly::StringPiece data_;
233 folly::StringPiece compilationDirectory_;
234
235 // Header
236 uint16_t version_;
237 uint8_t minLength_;
238 bool defaultIsStmt_;
239 int8_t lineBase_;
240 uint8_t lineRange_;
241 uint8_t opcodeBase_;
242 const uint8_t* standardOpcodeLengths_;
243
244 folly::StringPiece includeDirectories_;
245 size_t includeDirectoryCount_;
246
247 folly::StringPiece fileNames_;
248 size_t fileNameCount_;
249
250 // State machine registers
251 uint64_t address_;
252 uint64_t file_;
253 uint64_t line_;
254 uint64_t column_;
255 bool isStmt_;
256 bool basicBlock_;
257 bool endSequence_;
258 bool prologueEnd_;
259 bool epilogueBegin_;
260 uint64_t isa_;
261 uint64_t discriminator_;
262 };
263
264 // Read an abbreviation from a StringPiece, return true if at end; advance sp
265 static bool readAbbreviation(folly::StringPiece& sp, DIEAbbreviation& abbr);
266
267 // Get abbreviation corresponding to a code, in the chunk starting at
268 // offset in the .debug_abbrev section
269 DIEAbbreviation getAbbreviation(uint64_t code, uint64_t offset) const;
270
271 // Read one attribute <name, form> pair, advance sp; returns <0, 0> at end.
272 static DIEAbbreviation::Attribute readAttribute(folly::StringPiece& sp);
273
274 // Read one attribute value, advance sp
275 typedef boost::variant<uint64_t, folly::StringPiece> AttributeValue;
276 AttributeValue
277 readAttributeValue(folly::StringPiece& sp, uint64_t form, bool is64Bit) const;
278
279 // Get an ELF section by name, return true if found
280 bool getSection(const char* name, folly::StringPiece* section) const;
281
282 // Get a string from the .debug_str section
283 folly::StringPiece getStringFromStringSection(uint64_t offset) const;
284
285 folly::StringPiece info_; // .debug_info
286 folly::StringPiece abbrev_; // .debug_abbrev
287 folly::StringPiece aranges_; // .debug_aranges
288 folly::StringPiece line_; // .debug_line
289 folly::StringPiece strings_; // .debug_str
290};
291
292inline std::ostream& operator<<(std::ostream& out, const Dwarf::Path& path) {
293 return out << path.toString();
294}
295
296} // namespace symbolizer
297} // namespace folly
298