1#pragma once
2
3#if defined(__ELF__) && !defined(__FreeBSD__)
4
5/*
6 * Copyright 2012-present Facebook, Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21/** This file was edited for ClickHouse.
22 */
23
24#include <string>
25#include <string_view>
26#include <variant>
27
28
29namespace DB
30{
31
32class Elf;
33
34/**
35 * DWARF record parser.
36 *
37 * We only implement enough DWARF functionality to convert from PC address
38 * to file and line number information.
39 *
40 * This means (although they're not part of the public API of this class), we
41 * can parse Debug Information Entries (DIEs), abbreviations, attributes (of
42 * all forms), and we can interpret bytecode for the line number VM.
43 *
44 * We can interpret DWARF records of version 2, 3, or 4, although we don't
45 * actually support many of the version 4 features (such as VLIW, multiple
46 * operations per instruction)
47 *
48 * Note that the DWARF record parser does not allocate heap memory at all.
49 * This is on purpose: you can use the parser from
50 * memory-constrained situations (such as an exception handler for
51 * std::out_of_memory) If it weren't for this requirement, some things would
52 * be much simpler: the Path class would be unnecessary and would be replaced
53 * with a std::string; the list of file names in the line number VM would be
54 * kept as a vector of strings instead of re-executing the program to look for
55 * DW_LNE_define_file instructions, etc.
56 */
57class Dwarf final
58{
59 // Note that Dwarf uses (and returns) std::string_view a lot.
60 // The std::string_view point within sections in the ELF file, and so will
61 // be live for as long as the passed-in Elf is live.
62public:
63 /** Create a DWARF parser around an ELF file. */
64 explicit Dwarf(const Elf & elf);
65
66 /**
67 * Represent a file path a s collection of three parts (base directory,
68 * subdirectory, and file).
69 */
70 class Path
71 {
72 public:
73 Path() {}
74
75 Path(std::string_view baseDir, std::string_view subDir, std::string_view file);
76
77 std::string_view baseDir() const { return baseDir_; }
78 std::string_view subDir() const { return subDir_; }
79 std::string_view file() const { return file_; }
80
81 size_t size() const;
82
83 /**
84 * Copy the Path to a buffer of size bufSize.
85 *
86 * toBuffer behaves like snprintf: It will always null-terminate the
87 * buffer (so it will copy at most bufSize-1 bytes), and it will return
88 * the number of bytes that would have been written if there had been
89 * enough room, so, if toBuffer returns a value >= bufSize, the output
90 * was truncated.
91 */
92 size_t toBuffer(char * buf, size_t bufSize) const;
93
94 void toString(std::string & dest) const;
95 std::string toString() const
96 {
97 std::string s;
98 toString(s);
99 return s;
100 }
101
102 // TODO(tudorb): Implement operator==, operator!=; not as easy as it
103 // seems as the same path can be represented in multiple ways
104 private:
105 std::string_view baseDir_;
106 std::string_view subDir_;
107 std::string_view file_;
108 };
109
110 enum class LocationInfoMode
111 {
112 // Don't resolve location info.
113 DISABLED,
114 // Perform CU lookup using .debug_aranges (might be incomplete).
115 FAST,
116 // Scan all CU in .debug_info (slow!) on .debug_aranges lookup failure.
117 FULL,
118 };
119
120 struct LocationInfo
121 {
122 bool hasMainFile = false;
123 Path mainFile;
124
125 bool hasFileAndLine = false;
126 Path file;
127 uint64_t line = 0;
128 };
129
130 /** Find the file and line number information corresponding to address.
131 * The address must be physical - offset in object file without offset in virtual memory where the object is loaded.
132 */
133 bool findAddress(uintptr_t address, LocationInfo & info, LocationInfoMode mode) const;
134
135private:
136 static bool findDebugInfoOffset(uintptr_t address, std::string_view aranges, uint64_t & offset);
137
138 void init();
139 bool findLocation(uintptr_t address, std::string_view & infoEntry, LocationInfo & info) const;
140
141 const Elf * elf_;
142
143 // DWARF section made up of chunks, each prefixed with a length header.
144 // The length indicates whether the chunk is DWARF-32 or DWARF-64, which
145 // guides interpretation of "section offset" records.
146 // (yes, DWARF-32 and DWARF-64 sections may coexist in the same file)
147 class Section
148 {
149 public:
150 Section() : is64Bit_(false) {}
151
152 explicit Section(std::string_view d);
153
154 // Return next chunk, if any; the 4- or 12-byte length was already
155 // parsed and isn't part of the chunk.
156 bool next(std::string_view & chunk);
157
158 // Is the current chunk 64 bit?
159 bool is64Bit() const { return is64Bit_; }
160
161 private:
162 // Yes, 32- and 64- bit sections may coexist. Yikes!
163 bool is64Bit_;
164 std::string_view data_;
165 };
166
167 // Abbreviation for a Debugging Information Entry.
168 struct DIEAbbreviation
169 {
170 uint64_t code;
171 uint64_t tag;
172 bool hasChildren;
173
174 struct Attribute
175 {
176 uint64_t name;
177 uint64_t form;
178 };
179
180 std::string_view attributes;
181 };
182
183 // Interpreter for the line number bytecode VM
184 class LineNumberVM
185 {
186 public:
187 LineNumberVM(std::string_view data, std::string_view compilationDirectory);
188
189 bool findAddress(uintptr_t address, Path & file, uint64_t & line);
190
191 private:
192 void init();
193 void reset();
194
195 // Execute until we commit one new row to the line number matrix
196 bool next(std::string_view & program);
197 enum StepResult
198 {
199 CONTINUE, // Continue feeding opcodes
200 COMMIT, // Commit new <address, file, line> tuple
201 END, // End of sequence
202 };
203 // Execute one opcode
204 StepResult step(std::string_view & program);
205
206 struct FileName
207 {
208 std::string_view relativeName;
209 // 0 = current compilation directory
210 // otherwise, 1-based index in the list of include directories
211 uint64_t directoryIndex;
212 };
213 // Read one FileName object, remove_prefix sp
214 static bool readFileName(std::string_view & sp, FileName & fn);
215
216 // Get file name at given index; may be in the initial table
217 // (fileNames_) or defined using DW_LNE_define_file (and we reexecute
218 // enough of the program to find it, if so)
219 FileName getFileName(uint64_t index) const;
220
221 // Get include directory at given index
222 std::string_view getIncludeDirectory(uint64_t index) const;
223
224 // Execute opcodes until finding a DW_LNE_define_file and return true;
225 // return file at the end.
226 bool nextDefineFile(std::string_view & program, FileName & fn) const;
227
228 // Initialization
229 bool is64Bit_;
230 std::string_view data_;
231 std::string_view compilationDirectory_;
232
233 // Header
234 uint16_t version_;
235 uint8_t minLength_;
236 bool defaultIsStmt_;
237 int8_t lineBase_;
238 uint8_t lineRange_;
239 uint8_t opcodeBase_;
240 const uint8_t * standardOpcodeLengths_;
241
242 std::string_view includeDirectories_;
243 size_t includeDirectoryCount_;
244
245 std::string_view fileNames_;
246 size_t fileNameCount_;
247
248 // State machine registers
249 uint64_t address_;
250 uint64_t file_;
251 uint64_t line_;
252 uint64_t column_;
253 bool isStmt_;
254 bool basicBlock_;
255 bool endSequence_;
256 bool prologueEnd_;
257 bool epilogueBegin_;
258 uint64_t isa_;
259 uint64_t discriminator_;
260 };
261
262 // Read an abbreviation from a std::string_view, return true if at end; remove_prefix sp
263 static bool readAbbreviation(std::string_view & sp, DIEAbbreviation & abbr);
264
265 // Get abbreviation corresponding to a code, in the chunk starting at
266 // offset in the .debug_abbrev section
267 DIEAbbreviation getAbbreviation(uint64_t code, uint64_t offset) const;
268
269 // Read one attribute <name, form> pair, remove_prefix sp; returns <0, 0> at end.
270 static DIEAbbreviation::Attribute readAttribute(std::string_view & sp);
271
272 // Read one attribute value, remove_prefix sp
273 typedef std::variant<uint64_t, std::string_view> AttributeValue;
274 AttributeValue readAttributeValue(std::string_view & sp, uint64_t form, bool is64Bit) const;
275
276 // Get an ELF section by name, return true if found
277 bool getSection(const char * name, std::string_view * section) const;
278
279 // Get a string from the .debug_str section
280 std::string_view getStringFromStringSection(uint64_t offset) const;
281
282 std::string_view info_; // .debug_info
283 std::string_view abbrev_; // .debug_abbrev
284 std::string_view aranges_; // .debug_aranges
285 std::string_view line_; // .debug_line
286 std::string_view strings_; // .debug_str
287};
288
289}
290
291#endif
292