1 | #if defined(__ELF__) && !defined(__FreeBSD__) |
2 | |
3 | /* |
4 | * Copyright 2012-present Facebook, Inc. |
5 | * |
6 | * Licensed under the Apache License, Version 2.0 (the "License"); |
7 | * you may not use this file except in compliance with the License. |
8 | * You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, software |
13 | * distributed under the License is distributed on an "AS IS" BASIS, |
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | * See the License for the specific language governing permissions and |
16 | * limitations under the License. |
17 | */ |
18 | |
19 | /** This file was edited for ClickHouse. |
20 | */ |
21 | |
22 | #include <optional> |
23 | |
24 | #include <string.h> |
25 | |
26 | #include <Common/Elf.h> |
27 | #include <Common/Dwarf.h> |
28 | #include <Common/Exception.h> |
29 | |
30 | |
31 | #define DW_CHILDREN_no 0 |
32 | #define DW_FORM_addr 1 |
33 | #define DW_FORM_block1 0x0a |
34 | #define DW_FORM_block2 3 |
35 | #define DW_FORM_block4 4 |
36 | #define DW_FORM_block 9 |
37 | #define DW_FORM_exprloc 0x18 |
38 | #define DW_FORM_data1 0x0b |
39 | #define DW_FORM_ref1 0x11 |
40 | #define DW_FORM_data2 0x05 |
41 | #define DW_FORM_ref2 0x12 |
42 | #define DW_FORM_data4 0x06 |
43 | #define DW_FORM_ref4 0x13 |
44 | #define DW_FORM_data8 0x07 |
45 | #define DW_FORM_ref8 0x14 |
46 | #define DW_FORM_sdata 0x0d |
47 | #define DW_FORM_udata 0x0f |
48 | #define DW_FORM_ref_udata 0x15 |
49 | #define DW_FORM_flag 0x0c |
50 | #define DW_FORM_flag_present 0x19 |
51 | #define DW_FORM_sec_offset 0x17 |
52 | #define DW_FORM_ref_addr 0x10 |
53 | #define DW_FORM_string 0x08 |
54 | #define DW_FORM_strp 0x0e |
55 | #define DW_FORM_indirect 0x16 |
56 | #define DW_TAG_compile_unit 0x11 |
57 | #define DW_AT_stmt_list 0x10 |
58 | #define DW_AT_comp_dir 0x1b |
59 | #define DW_AT_name 0x03 |
60 | #define DW_LNE_define_file 0x03 |
61 | #define DW_LNS_copy 0x01 |
62 | #define DW_LNS_advance_pc 0x02 |
63 | #define DW_LNS_advance_line 0x03 |
64 | #define DW_LNS_set_file 0x04 |
65 | #define DW_LNS_set_column 0x05 |
66 | #define DW_LNS_negate_stmt 0x06 |
67 | #define DW_LNS_set_basic_block 0x07 |
68 | #define DW_LNS_const_add_pc 0x08 |
69 | #define DW_LNS_fixed_advance_pc 0x09 |
70 | #define DW_LNS_set_prologue_end 0x0a |
71 | #define DW_LNS_set_epilogue_begin 0x0b |
72 | #define DW_LNS_set_isa 0x0c |
73 | #define DW_LNE_end_sequence 0x01 |
74 | #define DW_LNE_set_address 0x02 |
75 | #define DW_LNE_set_discriminator 0x04 |
76 | |
77 | |
78 | namespace DB |
79 | { |
80 | |
81 | namespace ErrorCodes |
82 | { |
83 | extern const int CANNOT_PARSE_DWARF; |
84 | } |
85 | |
86 | |
87 | Dwarf::Dwarf(const Elf & elf) : elf_(&elf) |
88 | { |
89 | init(); |
90 | } |
91 | |
92 | Dwarf::Section::Section(std::string_view d) : is64Bit_(false), data_(d) |
93 | { |
94 | } |
95 | |
96 | |
97 | #define SAFE_CHECK(cond, message) do { if (!(cond)) throw Exception(message, ErrorCodes::CANNOT_PARSE_DWARF); } while (false) |
98 | |
99 | |
100 | namespace |
101 | { |
102 | // All following read* functions read from a std::string_view, advancing the |
103 | // std::string_view, and aborting if there's not enough room. |
104 | |
105 | // Read (bitwise) one object of type T |
106 | template <typename T> |
107 | std::enable_if_t<std::is_pod_v<T>, T> read(std::string_view & sp) |
108 | { |
109 | SAFE_CHECK(sp.size() >= sizeof(T), "underflow" ); |
110 | T x; |
111 | memcpy(&x, sp.data(), sizeof(T)); |
112 | sp.remove_prefix(sizeof(T)); |
113 | return x; |
114 | } |
115 | |
116 | // Read ULEB (unsigned) varint value; algorithm from the DWARF spec |
117 | uint64_t readULEB(std::string_view & sp, uint8_t & shift, uint8_t & val) |
118 | { |
119 | uint64_t r = 0; |
120 | shift = 0; |
121 | do |
122 | { |
123 | val = read<uint8_t>(sp); |
124 | r |= (uint64_t(val & 0x7f) << shift); |
125 | shift += 7; |
126 | } while (val & 0x80); |
127 | return r; |
128 | } |
129 | |
130 | uint64_t readULEB(std::string_view & sp) |
131 | { |
132 | uint8_t shift; |
133 | uint8_t val; |
134 | return readULEB(sp, shift, val); |
135 | } |
136 | |
137 | // Read SLEB (signed) varint value; algorithm from the DWARF spec |
138 | int64_t readSLEB(std::string_view & sp) |
139 | { |
140 | uint8_t shift; |
141 | uint8_t val; |
142 | uint64_t r = readULEB(sp, shift, val); |
143 | |
144 | if (shift < 64 && (val & 0x40)) |
145 | { |
146 | r |= -(1ULL << shift); // sign extend |
147 | } |
148 | |
149 | return r; |
150 | } |
151 | |
152 | // Read a value of "section offset" type, which may be 4 or 8 bytes |
153 | uint64_t readOffset(std::string_view & sp, bool is64Bit) |
154 | { |
155 | return is64Bit ? read<uint64_t>(sp) : read<uint32_t>(sp); |
156 | } |
157 | |
158 | // Read "len" bytes |
159 | std::string_view readBytes(std::string_view & sp, uint64_t len) |
160 | { |
161 | SAFE_CHECK(len >= sp.size(), "invalid string length" ); |
162 | std::string_view ret(sp.data(), len); |
163 | sp.remove_prefix(len); |
164 | return ret; |
165 | } |
166 | |
167 | // Read a null-terminated string |
168 | std::string_view readNullTerminated(std::string_view & sp) |
169 | { |
170 | const char * p = static_cast<const char *>(memchr(sp.data(), 0, sp.size())); |
171 | SAFE_CHECK(p, "invalid null-terminated string" ); |
172 | std::string_view ret(sp.data(), p - sp.data()); |
173 | sp = std::string_view(p + 1, sp.size()); |
174 | return ret; |
175 | } |
176 | |
177 | // Skip over padding until sp.data() - start is a multiple of alignment |
178 | void skipPadding(std::string_view & sp, const char * start, size_t alignment) |
179 | { |
180 | size_t remainder = (sp.data() - start) % alignment; |
181 | if (remainder) |
182 | { |
183 | SAFE_CHECK(alignment - remainder <= sp.size(), "invalid padding" ); |
184 | sp.remove_prefix(alignment - remainder); |
185 | } |
186 | } |
187 | |
188 | } |
189 | |
190 | |
191 | Dwarf::Path::Path(std::string_view baseDir, std::string_view subDir, std::string_view file) |
192 | : baseDir_(baseDir), subDir_(subDir), file_(file) |
193 | { |
194 | using std::swap; |
195 | |
196 | // Normalize |
197 | if (file_.empty()) |
198 | { |
199 | baseDir_ = {}; |
200 | subDir_ = {}; |
201 | return; |
202 | } |
203 | |
204 | if (file_[0] == '/') |
205 | { |
206 | // file_ is absolute |
207 | baseDir_ = {}; |
208 | subDir_ = {}; |
209 | } |
210 | |
211 | if (!subDir_.empty() && subDir_[0] == '/') |
212 | { |
213 | baseDir_ = {}; // subDir_ is absolute |
214 | } |
215 | |
216 | // Make sure it's never the case that baseDir_ is empty, but subDir_ isn't. |
217 | if (baseDir_.empty()) |
218 | { |
219 | swap(baseDir_, subDir_); |
220 | } |
221 | } |
222 | |
223 | size_t Dwarf::Path::size() const |
224 | { |
225 | size_t size = 0; |
226 | bool needsSlash = false; |
227 | |
228 | if (!baseDir_.empty()) |
229 | { |
230 | size += baseDir_.size(); |
231 | needsSlash = baseDir_.back() != '/'; |
232 | } |
233 | |
234 | if (!subDir_.empty()) |
235 | { |
236 | size += needsSlash; |
237 | size += subDir_.size(); |
238 | needsSlash = subDir_.back() != '/'; |
239 | } |
240 | |
241 | if (!file_.empty()) |
242 | { |
243 | size += needsSlash; |
244 | size += file_.size(); |
245 | } |
246 | |
247 | return size; |
248 | } |
249 | |
250 | size_t Dwarf::Path::toBuffer(char * buf, size_t bufSize) const |
251 | { |
252 | size_t totalSize = 0; |
253 | bool needsSlash = false; |
254 | |
255 | auto append = [&](std::string_view sp) |
256 | { |
257 | if (bufSize >= 2) |
258 | { |
259 | size_t toCopy = std::min(sp.size(), bufSize - 1); |
260 | memcpy(buf, sp.data(), toCopy); |
261 | buf += toCopy; |
262 | bufSize -= toCopy; |
263 | } |
264 | totalSize += sp.size(); |
265 | }; |
266 | |
267 | if (!baseDir_.empty()) |
268 | { |
269 | append(baseDir_); |
270 | needsSlash = baseDir_.back() != '/'; |
271 | } |
272 | if (!subDir_.empty()) |
273 | { |
274 | if (needsSlash) |
275 | { |
276 | append("/" ); |
277 | } |
278 | append(subDir_); |
279 | needsSlash = subDir_.back() != '/'; |
280 | } |
281 | if (!file_.empty()) |
282 | { |
283 | if (needsSlash) |
284 | { |
285 | append("/" ); |
286 | } |
287 | append(file_); |
288 | } |
289 | if (bufSize) |
290 | { |
291 | *buf = '\0'; |
292 | } |
293 | |
294 | SAFE_CHECK(totalSize == size(), "Size mismatch" ); |
295 | return totalSize; |
296 | } |
297 | |
298 | void Dwarf::Path::toString(std::string & dest) const |
299 | { |
300 | size_t initialSize = dest.size(); |
301 | dest.reserve(initialSize + size()); |
302 | if (!baseDir_.empty()) |
303 | { |
304 | dest.append(baseDir_.begin(), baseDir_.end()); |
305 | } |
306 | if (!subDir_.empty()) |
307 | { |
308 | if (!dest.empty() && dest.back() != '/') |
309 | { |
310 | dest.push_back('/'); |
311 | } |
312 | dest.append(subDir_.begin(), subDir_.end()); |
313 | } |
314 | if (!file_.empty()) |
315 | { |
316 | if (!dest.empty() && dest.back() != '/') |
317 | { |
318 | dest.push_back('/'); |
319 | } |
320 | dest.append(file_.begin(), file_.end()); |
321 | } |
322 | SAFE_CHECK(dest.size() == initialSize + size(), "Size mismatch" ); |
323 | } |
324 | |
325 | // Next chunk in section |
326 | bool Dwarf::Section::next(std::string_view & chunk) |
327 | { |
328 | chunk = data_; |
329 | if (chunk.empty()) |
330 | return false; |
331 | |
332 | // Initial length is a uint32_t value for a 32-bit section, and |
333 | // a 96-bit value (0xffffffff followed by the 64-bit length) for a 64-bit |
334 | // section. |
335 | auto initialLength = read<uint32_t>(chunk); |
336 | is64Bit_ = (initialLength == uint32_t(-1)); |
337 | auto length = is64Bit_ ? read<uint64_t>(chunk) : initialLength; |
338 | SAFE_CHECK(length <= chunk.size(), "invalid DWARF section" ); |
339 | chunk = std::string_view(chunk.data(), length); |
340 | data_ = std::string_view(chunk.end(), data_.end() - chunk.end()); |
341 | return true; |
342 | } |
343 | |
344 | bool Dwarf::getSection(const char * name, std::string_view * section) const |
345 | { |
346 | std::optional<Elf::Section> elf_section = elf_->findSectionByName(name); |
347 | if (!elf_section) |
348 | return false; |
349 | |
350 | #ifdef SHF_COMPRESSED |
351 | if (elf_section->header.sh_flags & SHF_COMPRESSED) |
352 | return false; |
353 | #endif |
354 | |
355 | *section = { elf_section->begin(), elf_section->size()}; |
356 | return true; |
357 | } |
358 | |
359 | void Dwarf::init() |
360 | { |
361 | // Make sure that all .debug_* sections exist |
362 | if (!getSection(".debug_info" , &info_) |
363 | || !getSection(".debug_abbrev" , &abbrev_) |
364 | || !getSection(".debug_line" , &line_) |
365 | || !getSection(".debug_str" , &strings_)) |
366 | { |
367 | elf_ = nullptr; |
368 | return; |
369 | } |
370 | |
371 | // Optional: fast address range lookup. If missing .debug_info can |
372 | // be used - but it's much slower (linear scan). |
373 | getSection(".debug_aranges" , &aranges_); |
374 | } |
375 | |
376 | bool Dwarf::readAbbreviation(std::string_view & section, DIEAbbreviation & abbr) |
377 | { |
378 | // abbreviation code |
379 | abbr.code = readULEB(section); |
380 | if (abbr.code == 0) |
381 | return false; |
382 | |
383 | // abbreviation tag |
384 | abbr.tag = readULEB(section); |
385 | |
386 | // does this entry have children? |
387 | abbr.hasChildren = (read<uint8_t>(section) != DW_CHILDREN_no); |
388 | |
389 | // attributes |
390 | const char * attributeBegin = section.data(); |
391 | for (;;) |
392 | { |
393 | SAFE_CHECK(!section.empty(), "invalid attribute section" ); |
394 | auto attr = readAttribute(section); |
395 | if (attr.name == 0 && attr.form == 0) |
396 | break; |
397 | } |
398 | |
399 | abbr.attributes = std::string_view(attributeBegin, section.data() - attributeBegin); |
400 | return true; |
401 | } |
402 | |
403 | Dwarf::DIEAbbreviation::Attribute Dwarf::readAttribute(std::string_view & sp) |
404 | { |
405 | return {readULEB(sp), readULEB(sp)}; |
406 | } |
407 | |
408 | Dwarf::DIEAbbreviation Dwarf::getAbbreviation(uint64_t code, uint64_t offset) const |
409 | { |
410 | // Linear search in the .debug_abbrev section, starting at offset |
411 | std::string_view section = abbrev_; |
412 | section.remove_prefix(offset); |
413 | |
414 | Dwarf::DIEAbbreviation abbr; |
415 | while (readAbbreviation(section, abbr)) |
416 | if (abbr.code == code) |
417 | return abbr; |
418 | |
419 | SAFE_CHECK(false, "could not find abbreviation code" ); |
420 | } |
421 | |
422 | Dwarf::AttributeValue Dwarf::readAttributeValue(std::string_view & sp, uint64_t form, bool is64Bit) const |
423 | { |
424 | switch (form) |
425 | { |
426 | case DW_FORM_addr: |
427 | return uint64_t(read<uintptr_t>(sp)); |
428 | case DW_FORM_block1: |
429 | return readBytes(sp, read<uint8_t>(sp)); |
430 | case DW_FORM_block2: |
431 | return readBytes(sp, read<uint16_t>(sp)); |
432 | case DW_FORM_block4: |
433 | return readBytes(sp, read<uint32_t>(sp)); |
434 | case DW_FORM_block: [[fallthrough]]; |
435 | case DW_FORM_exprloc: |
436 | return readBytes(sp, readULEB(sp)); |
437 | case DW_FORM_data1: [[fallthrough]]; |
438 | case DW_FORM_ref1: |
439 | return uint64_t(read<uint8_t>(sp)); |
440 | case DW_FORM_data2: [[fallthrough]]; |
441 | case DW_FORM_ref2: |
442 | return uint64_t(read<uint16_t>(sp)); |
443 | case DW_FORM_data4: [[fallthrough]]; |
444 | case DW_FORM_ref4: |
445 | return uint64_t(read<uint32_t>(sp)); |
446 | case DW_FORM_data8: [[fallthrough]]; |
447 | case DW_FORM_ref8: |
448 | return read<uint64_t>(sp); |
449 | case DW_FORM_sdata: |
450 | return uint64_t(readSLEB(sp)); |
451 | case DW_FORM_udata: [[fallthrough]]; |
452 | case DW_FORM_ref_udata: |
453 | return readULEB(sp); |
454 | case DW_FORM_flag: |
455 | return uint64_t(read<uint8_t>(sp)); |
456 | case DW_FORM_flag_present: |
457 | return uint64_t(1); |
458 | case DW_FORM_sec_offset: [[fallthrough]]; |
459 | case DW_FORM_ref_addr: |
460 | return readOffset(sp, is64Bit); |
461 | case DW_FORM_string: |
462 | return readNullTerminated(sp); |
463 | case DW_FORM_strp: |
464 | return getStringFromStringSection(readOffset(sp, is64Bit)); |
465 | case DW_FORM_indirect: // form is explicitly specified |
466 | return readAttributeValue(sp, readULEB(sp), is64Bit); |
467 | default: |
468 | SAFE_CHECK(false, "invalid attribute form" ); |
469 | } |
470 | } |
471 | |
472 | std::string_view Dwarf::getStringFromStringSection(uint64_t offset) const |
473 | { |
474 | SAFE_CHECK(offset < strings_.size(), "invalid strp offset" ); |
475 | std::string_view sp(strings_); |
476 | sp.remove_prefix(offset); |
477 | return readNullTerminated(sp); |
478 | } |
479 | |
480 | /** |
481 | * Find @address in .debug_aranges and return the offset in |
482 | * .debug_info for compilation unit to which this address belongs. |
483 | */ |
484 | bool Dwarf::findDebugInfoOffset(uintptr_t address, std::string_view aranges, uint64_t & offset) |
485 | { |
486 | Section arangesSection(aranges); |
487 | std::string_view chunk; |
488 | while (arangesSection.next(chunk)) |
489 | { |
490 | auto version = read<uint16_t>(chunk); |
491 | SAFE_CHECK(version == 2, "invalid aranges version" ); |
492 | |
493 | offset = readOffset(chunk, arangesSection.is64Bit()); |
494 | auto addressSize = read<uint8_t>(chunk); |
495 | SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size" ); |
496 | auto segmentSize = read<uint8_t>(chunk); |
497 | SAFE_CHECK(segmentSize == 0, "segmented architecture not supported" ); |
498 | |
499 | // Padded to a multiple of 2 addresses. |
500 | // Strangely enough, this is the only place in the DWARF spec that requires |
501 | // padding. |
502 | skipPadding(chunk, aranges.data(), 2 * sizeof(uintptr_t)); |
503 | for (;;) |
504 | { |
505 | auto start = read<uintptr_t>(chunk); |
506 | auto length = read<uintptr_t>(chunk); |
507 | |
508 | if (start == 0 && length == 0) |
509 | break; |
510 | |
511 | // Is our address in this range? |
512 | if (address >= start && address < start + length) |
513 | return true; |
514 | } |
515 | } |
516 | return false; |
517 | } |
518 | |
519 | /** |
520 | * Find the @locationInfo for @address in the compilation unit represented |
521 | * by the @sp .debug_info entry. |
522 | * Returns whether the address was found. |
523 | * Advances @sp to the next entry in .debug_info. |
524 | */ |
525 | bool Dwarf::findLocation(uintptr_t address, std::string_view & infoEntry, LocationInfo & locationInfo) const |
526 | { |
527 | // For each compilation unit compiled with a DWARF producer, a |
528 | // contribution is made to the .debug_info section of the object |
529 | // file. Each such contribution consists of a compilation unit |
530 | // header (see Section 7.5.1.1) followed by a single |
531 | // DW_TAG_compile_unit or DW_TAG_partial_unit debugging information |
532 | // entry, together with its children. |
533 | |
534 | // 7.5.1.1 Compilation Unit Header |
535 | // 1. unit_length (4B or 12B): read by Section::next |
536 | // 2. version (2B) |
537 | // 3. debug_abbrev_offset (4B or 8B): offset into the .debug_abbrev section |
538 | // 4. address_size (1B) |
539 | |
540 | Section debugInfoSection(infoEntry); |
541 | std::string_view chunk; |
542 | SAFE_CHECK(debugInfoSection.next(chunk), "invalid debug info" ); |
543 | |
544 | auto version = read<uint16_t>(chunk); |
545 | SAFE_CHECK(version >= 2 && version <= 4, "invalid info version" ); |
546 | uint64_t abbrevOffset = readOffset(chunk, debugInfoSection.is64Bit()); |
547 | auto addressSize = read<uint8_t>(chunk); |
548 | SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size" ); |
549 | |
550 | // We survived so far. The first (and only) DIE should be DW_TAG_compile_unit |
551 | // NOTE: - binutils <= 2.25 does not issue DW_TAG_partial_unit. |
552 | // - dwarf compression tools like `dwz` may generate it. |
553 | // TODO(tudorb): Handle DW_TAG_partial_unit? |
554 | auto code = readULEB(chunk); |
555 | SAFE_CHECK(code != 0, "invalid code" ); |
556 | auto abbr = getAbbreviation(code, abbrevOffset); |
557 | SAFE_CHECK(abbr.tag == DW_TAG_compile_unit, "expecting compile unit entry" ); |
558 | // Skip children entries, remove_prefix to the next compilation unit entry. |
559 | infoEntry.remove_prefix(chunk.end() - infoEntry.begin()); |
560 | |
561 | // Read attributes, extracting the few we care about |
562 | bool foundLineOffset = false; |
563 | uint64_t lineOffset = 0; |
564 | std::string_view compilationDirectory; |
565 | std::string_view mainFileName; |
566 | |
567 | DIEAbbreviation::Attribute attr; |
568 | std::string_view attributes = abbr.attributes; |
569 | for (;;) |
570 | { |
571 | attr = readAttribute(attributes); |
572 | if (attr.name == 0 && attr.form == 0) |
573 | { |
574 | break; |
575 | } |
576 | auto val = readAttributeValue(chunk, attr.form, debugInfoSection.is64Bit()); |
577 | switch (attr.name) |
578 | { |
579 | case DW_AT_stmt_list: |
580 | // Offset in .debug_line for the line number VM program for this |
581 | // compilation unit |
582 | lineOffset = std::get<uint64_t>(val); |
583 | foundLineOffset = true; |
584 | break; |
585 | case DW_AT_comp_dir: |
586 | // Compilation directory |
587 | compilationDirectory = std::get<std::string_view>(val); |
588 | break; |
589 | case DW_AT_name: |
590 | // File name of main file being compiled |
591 | mainFileName = std::get<std::string_view>(val); |
592 | break; |
593 | } |
594 | } |
595 | |
596 | if (!mainFileName.empty()) |
597 | { |
598 | locationInfo.hasMainFile = true; |
599 | locationInfo.mainFile = Path(compilationDirectory, "" , mainFileName); |
600 | } |
601 | |
602 | if (!foundLineOffset) |
603 | { |
604 | return false; |
605 | } |
606 | |
607 | std::string_view lineSection(line_); |
608 | lineSection.remove_prefix(lineOffset); |
609 | LineNumberVM lineVM(lineSection, compilationDirectory); |
610 | |
611 | // Execute line number VM program to find file and line |
612 | locationInfo.hasFileAndLine = lineVM.findAddress(address, locationInfo.file, locationInfo.line); |
613 | return locationInfo.hasFileAndLine; |
614 | } |
615 | |
616 | bool Dwarf::findAddress(uintptr_t address, LocationInfo & locationInfo, LocationInfoMode mode) const |
617 | { |
618 | locationInfo = LocationInfo(); |
619 | |
620 | if (mode == LocationInfoMode::DISABLED) |
621 | { |
622 | return false; |
623 | } |
624 | |
625 | if (!elf_) |
626 | { // No file. |
627 | return false; |
628 | } |
629 | |
630 | if (!aranges_.empty()) |
631 | { |
632 | // Fast path: find the right .debug_info entry by looking up the |
633 | // address in .debug_aranges. |
634 | uint64_t offset = 0; |
635 | if (findDebugInfoOffset(address, aranges_, offset)) |
636 | { |
637 | // Read compilation unit header from .debug_info |
638 | std::string_view infoEntry(info_); |
639 | infoEntry.remove_prefix(offset); |
640 | findLocation(address, infoEntry, locationInfo); |
641 | return locationInfo.hasFileAndLine; |
642 | } |
643 | else if (mode == LocationInfoMode::FAST) |
644 | { |
645 | // NOTE: Clang (when using -gdwarf-aranges) doesn't generate entries |
646 | // in .debug_aranges for some functions, but always generates |
647 | // .debug_info entries. Scanning .debug_info is slow, so fall back to |
648 | // it only if such behavior is requested via LocationInfoMode. |
649 | return false; |
650 | } |
651 | else |
652 | { |
653 | SAFE_CHECK(mode == LocationInfoMode::FULL, "unexpected mode" ); |
654 | // Fall back to the linear scan. |
655 | } |
656 | } |
657 | |
658 | // Slow path (linear scan): Iterate over all .debug_info entries |
659 | // and look for the address in each compilation unit. |
660 | std::string_view infoEntry(info_); |
661 | while (!infoEntry.empty() && !locationInfo.hasFileAndLine) |
662 | findLocation(address, infoEntry, locationInfo); |
663 | |
664 | return locationInfo.hasFileAndLine; |
665 | } |
666 | |
667 | Dwarf::LineNumberVM::LineNumberVM(std::string_view data, std::string_view compilationDirectory) |
668 | : compilationDirectory_(compilationDirectory) |
669 | { |
670 | Section section(data); |
671 | SAFE_CHECK(section.next(data_), "invalid line number VM" ); |
672 | is64Bit_ = section.is64Bit(); |
673 | init(); |
674 | reset(); |
675 | } |
676 | |
677 | void Dwarf::LineNumberVM::reset() |
678 | { |
679 | address_ = 0; |
680 | file_ = 1; |
681 | line_ = 1; |
682 | column_ = 0; |
683 | isStmt_ = defaultIsStmt_; |
684 | basicBlock_ = false; |
685 | endSequence_ = false; |
686 | prologueEnd_ = false; |
687 | epilogueBegin_ = false; |
688 | isa_ = 0; |
689 | discriminator_ = 0; |
690 | } |
691 | |
692 | void Dwarf::LineNumberVM::init() |
693 | { |
694 | version_ = read<uint16_t>(data_); |
695 | SAFE_CHECK(version_ >= 2 && version_ <= 4, "invalid version in line number VM" ); |
696 | uint64_t = readOffset(data_, is64Bit_); |
697 | SAFE_CHECK(headerLength <= data_.size(), "invalid line number VM header length" ); |
698 | std::string_view (data_.data(), headerLength); |
699 | data_ = std::string_view(header.end(), data_.end() - header.end()); |
700 | |
701 | minLength_ = read<uint8_t>(header); |
702 | if (version_ == 4) |
703 | { // Version 2 and 3 records don't have this |
704 | uint8_t maxOpsPerInstruction = read<uint8_t>(header); |
705 | SAFE_CHECK(maxOpsPerInstruction == 1, "VLIW not supported" ); |
706 | } |
707 | defaultIsStmt_ = read<uint8_t>(header); |
708 | lineBase_ = read<int8_t>(header); // yes, signed |
709 | lineRange_ = read<uint8_t>(header); |
710 | opcodeBase_ = read<uint8_t>(header); |
711 | SAFE_CHECK(opcodeBase_ != 0, "invalid opcode base" ); |
712 | standardOpcodeLengths_ = reinterpret_cast<const uint8_t *>(header.data()); //-V506 |
713 | header.remove_prefix(opcodeBase_ - 1); |
714 | |
715 | // We don't want to use heap, so we don't keep an unbounded amount of state. |
716 | // We'll just skip over include directories and file names here, and |
717 | // we'll loop again when we actually need to retrieve one. |
718 | std::string_view sp; |
719 | const char * tmp = header.data(); |
720 | includeDirectoryCount_ = 0; |
721 | while (!(sp = readNullTerminated(header)).empty()) |
722 | { |
723 | ++includeDirectoryCount_; |
724 | } |
725 | includeDirectories_ = std::string_view(tmp, header.data() - tmp); |
726 | |
727 | tmp = header.data(); |
728 | FileName fn; |
729 | fileNameCount_ = 0; |
730 | while (readFileName(header, fn)) |
731 | { |
732 | ++fileNameCount_; |
733 | } |
734 | fileNames_ = std::string_view(tmp, header.data() - tmp); |
735 | } |
736 | |
737 | bool Dwarf::LineNumberVM::next(std::string_view & program) |
738 | { |
739 | Dwarf::LineNumberVM::StepResult ret; |
740 | do |
741 | { |
742 | ret = step(program); |
743 | } while (ret == CONTINUE); |
744 | |
745 | return (ret == COMMIT); |
746 | } |
747 | |
748 | Dwarf::LineNumberVM::FileName Dwarf::LineNumberVM::getFileName(uint64_t index) const |
749 | { |
750 | SAFE_CHECK(index != 0, "invalid file index 0" ); |
751 | |
752 | FileName fn; |
753 | if (index <= fileNameCount_) |
754 | { |
755 | std::string_view fileNames = fileNames_; |
756 | for (; index; --index) |
757 | { |
758 | if (!readFileName(fileNames, fn)) |
759 | { |
760 | abort(); |
761 | } |
762 | } |
763 | return fn; |
764 | } |
765 | |
766 | index -= fileNameCount_; |
767 | |
768 | std::string_view program = data_; |
769 | for (; index; --index) |
770 | { |
771 | SAFE_CHECK(nextDefineFile(program, fn), "invalid file index" ); |
772 | } |
773 | |
774 | return fn; |
775 | } |
776 | |
777 | std::string_view Dwarf::LineNumberVM::getIncludeDirectory(uint64_t index) const |
778 | { |
779 | if (index == 0) |
780 | { |
781 | return std::string_view(); |
782 | } |
783 | |
784 | SAFE_CHECK(index <= includeDirectoryCount_, "invalid include directory" ); |
785 | |
786 | std::string_view includeDirectories = includeDirectories_; |
787 | std::string_view dir; |
788 | for (; index; --index) |
789 | { |
790 | dir = readNullTerminated(includeDirectories); |
791 | if (dir.empty()) |
792 | { |
793 | abort(); // BUG |
794 | } |
795 | } |
796 | |
797 | return dir; |
798 | } |
799 | |
800 | bool Dwarf::LineNumberVM::readFileName(std::string_view & program, FileName & fn) |
801 | { |
802 | fn.relativeName = readNullTerminated(program); |
803 | if (fn.relativeName.empty()) |
804 | { |
805 | return false; |
806 | } |
807 | fn.directoryIndex = readULEB(program); |
808 | // Skip over file size and last modified time |
809 | readULEB(program); |
810 | readULEB(program); |
811 | return true; |
812 | } |
813 | |
814 | bool Dwarf::LineNumberVM::nextDefineFile(std::string_view & program, FileName & fn) const |
815 | { |
816 | while (!program.empty()) |
817 | { |
818 | auto opcode = read<uint8_t>(program); |
819 | |
820 | if (opcode >= opcodeBase_) |
821 | { // special opcode |
822 | continue; |
823 | } |
824 | |
825 | if (opcode != 0) |
826 | { // standard opcode |
827 | // Skip, slurp the appropriate number of LEB arguments |
828 | uint8_t argCount = standardOpcodeLengths_[opcode - 1]; |
829 | while (argCount--) |
830 | { |
831 | readULEB(program); |
832 | } |
833 | continue; |
834 | } |
835 | |
836 | // Extended opcode |
837 | auto length = readULEB(program); |
838 | // the opcode itself should be included in the length, so length >= 1 |
839 | SAFE_CHECK(length != 0, "invalid extended opcode length" ); |
840 | read<uint8_t>(program); // extended opcode |
841 | --length; |
842 | |
843 | if (opcode == DW_LNE_define_file) |
844 | { |
845 | SAFE_CHECK(readFileName(program, fn), "invalid empty file in DW_LNE_define_file" ); |
846 | return true; |
847 | } |
848 | |
849 | program.remove_prefix(length); |
850 | continue; |
851 | } |
852 | |
853 | return false; |
854 | } |
855 | |
856 | Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(std::string_view & program) |
857 | { |
858 | auto opcode = read<uint8_t>(program); |
859 | |
860 | if (opcode >= opcodeBase_) |
861 | { // special opcode |
862 | uint8_t adjustedOpcode = opcode - opcodeBase_; |
863 | uint8_t opAdvance = adjustedOpcode / lineRange_; |
864 | |
865 | address_ += minLength_ * opAdvance; |
866 | line_ += lineBase_ + adjustedOpcode % lineRange_; |
867 | |
868 | basicBlock_ = false; |
869 | prologueEnd_ = false; |
870 | epilogueBegin_ = false; |
871 | discriminator_ = 0; |
872 | return COMMIT; |
873 | } |
874 | |
875 | if (opcode != 0) |
876 | { // standard opcode |
877 | // Only interpret opcodes that are recognized by the version we're parsing; |
878 | // the others are vendor extensions and we should ignore them. |
879 | switch (opcode) |
880 | { |
881 | case DW_LNS_copy: |
882 | basicBlock_ = false; |
883 | prologueEnd_ = false; |
884 | epilogueBegin_ = false; |
885 | discriminator_ = 0; |
886 | return COMMIT; |
887 | case DW_LNS_advance_pc: |
888 | address_ += minLength_ * readULEB(program); |
889 | return CONTINUE; |
890 | case DW_LNS_advance_line: |
891 | line_ += readSLEB(program); |
892 | return CONTINUE; |
893 | case DW_LNS_set_file: |
894 | file_ = readULEB(program); |
895 | return CONTINUE; |
896 | case DW_LNS_set_column: |
897 | column_ = readULEB(program); |
898 | return CONTINUE; |
899 | case DW_LNS_negate_stmt: |
900 | isStmt_ = !isStmt_; |
901 | return CONTINUE; |
902 | case DW_LNS_set_basic_block: |
903 | basicBlock_ = true; |
904 | return CONTINUE; |
905 | case DW_LNS_const_add_pc: |
906 | address_ += minLength_ * ((255 - opcodeBase_) / lineRange_); |
907 | return CONTINUE; |
908 | case DW_LNS_fixed_advance_pc: |
909 | address_ += read<uint16_t>(program); |
910 | return CONTINUE; |
911 | case DW_LNS_set_prologue_end: |
912 | if (version_ == 2) |
913 | { |
914 | break; // not supported in version 2 |
915 | } |
916 | prologueEnd_ = true; |
917 | return CONTINUE; |
918 | case DW_LNS_set_epilogue_begin: |
919 | if (version_ == 2) |
920 | { |
921 | break; // not supported in version 2 |
922 | } |
923 | epilogueBegin_ = true; |
924 | return CONTINUE; |
925 | case DW_LNS_set_isa: |
926 | if (version_ == 2) |
927 | { |
928 | break; // not supported in version 2 |
929 | } |
930 | isa_ = readULEB(program); |
931 | return CONTINUE; |
932 | } |
933 | |
934 | // Unrecognized standard opcode, slurp the appropriate number of LEB |
935 | // arguments. |
936 | uint8_t argCount = standardOpcodeLengths_[opcode - 1]; |
937 | while (argCount--) |
938 | { |
939 | readULEB(program); |
940 | } |
941 | return CONTINUE; |
942 | } |
943 | |
944 | // Extended opcode |
945 | auto length = readULEB(program); |
946 | // the opcode itself should be included in the length, so length >= 1 |
947 | SAFE_CHECK(length != 0, "invalid extended opcode length" ); |
948 | auto extendedOpcode = read<uint8_t>(program); |
949 | --length; |
950 | |
951 | switch (extendedOpcode) |
952 | { |
953 | case DW_LNE_end_sequence: |
954 | return END; |
955 | case DW_LNE_set_address: |
956 | address_ = read<uintptr_t>(program); |
957 | return CONTINUE; |
958 | case DW_LNE_define_file: |
959 | // We can't process DW_LNE_define_file here, as it would require us to |
960 | // use unbounded amounts of state (ie. use the heap). We'll do a second |
961 | // pass (using nextDefineFile()) if necessary. |
962 | break; |
963 | case DW_LNE_set_discriminator: |
964 | discriminator_ = readULEB(program); |
965 | return CONTINUE; |
966 | } |
967 | |
968 | // Unrecognized extended opcode |
969 | program.remove_prefix(length); |
970 | return CONTINUE; |
971 | } |
972 | |
973 | bool Dwarf::LineNumberVM::findAddress(uintptr_t target, Path & file, uint64_t & line) |
974 | { |
975 | std::string_view program = data_; |
976 | |
977 | // Within each sequence of instructions, the address may only increase. |
978 | // Unfortunately, within the same compilation unit, sequences may appear |
979 | // in any order. So any sequence is a candidate if it starts at an address |
980 | // <= the target address, and we know we've found the target address if |
981 | // a candidate crosses the target address. |
982 | enum State |
983 | { |
984 | START, |
985 | LOW_SEQ, // candidate |
986 | HIGH_SEQ |
987 | }; |
988 | State state = START; |
989 | reset(); |
990 | |
991 | uint64_t prevFile = 0; |
992 | uint64_t prevLine = 0; |
993 | while (!program.empty()) |
994 | { |
995 | bool seqEnd = !next(program); |
996 | |
997 | if (state == START) |
998 | { |
999 | if (!seqEnd) |
1000 | { |
1001 | state = address_ <= target ? LOW_SEQ : HIGH_SEQ; |
1002 | } |
1003 | } |
1004 | |
1005 | if (state == LOW_SEQ) |
1006 | { |
1007 | if (address_ > target) |
1008 | { |
1009 | // Found it! Note that ">" is indeed correct (not ">="), as each |
1010 | // sequence is guaranteed to have one entry past-the-end (emitted by |
1011 | // DW_LNE_end_sequence) |
1012 | if (prevFile == 0) |
1013 | { |
1014 | return false; |
1015 | } |
1016 | auto fn = getFileName(prevFile); |
1017 | file = Path(compilationDirectory_, getIncludeDirectory(fn.directoryIndex), fn.relativeName); |
1018 | line = prevLine; |
1019 | return true; |
1020 | } |
1021 | prevFile = file_; |
1022 | prevLine = line_; |
1023 | } |
1024 | |
1025 | if (seqEnd) |
1026 | { |
1027 | state = START; |
1028 | reset(); |
1029 | } |
1030 | } |
1031 | |
1032 | return false; |
1033 | } |
1034 | |
1035 | } |
1036 | |
1037 | #endif |
1038 | |