1#if defined(__ELF__) && !defined(__FreeBSD__)
2
3/*
4 * Copyright 2012-present Facebook, Inc.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19/** This file was edited for ClickHouse.
20 */
21
22#include <optional>
23
24#include <string.h>
25
26#include <Common/Elf.h>
27#include <Common/Dwarf.h>
28#include <Common/Exception.h>
29
30
31#define DW_CHILDREN_no 0
32#define DW_FORM_addr 1
33#define DW_FORM_block1 0x0a
34#define DW_FORM_block2 3
35#define DW_FORM_block4 4
36#define DW_FORM_block 9
37#define DW_FORM_exprloc 0x18
38#define DW_FORM_data1 0x0b
39#define DW_FORM_ref1 0x11
40#define DW_FORM_data2 0x05
41#define DW_FORM_ref2 0x12
42#define DW_FORM_data4 0x06
43#define DW_FORM_ref4 0x13
44#define DW_FORM_data8 0x07
45#define DW_FORM_ref8 0x14
46#define DW_FORM_sdata 0x0d
47#define DW_FORM_udata 0x0f
48#define DW_FORM_ref_udata 0x15
49#define DW_FORM_flag 0x0c
50#define DW_FORM_flag_present 0x19
51#define DW_FORM_sec_offset 0x17
52#define DW_FORM_ref_addr 0x10
53#define DW_FORM_string 0x08
54#define DW_FORM_strp 0x0e
55#define DW_FORM_indirect 0x16
56#define DW_TAG_compile_unit 0x11
57#define DW_AT_stmt_list 0x10
58#define DW_AT_comp_dir 0x1b
59#define DW_AT_name 0x03
60#define DW_LNE_define_file 0x03
61#define DW_LNS_copy 0x01
62#define DW_LNS_advance_pc 0x02
63#define DW_LNS_advance_line 0x03
64#define DW_LNS_set_file 0x04
65#define DW_LNS_set_column 0x05
66#define DW_LNS_negate_stmt 0x06
67#define DW_LNS_set_basic_block 0x07
68#define DW_LNS_const_add_pc 0x08
69#define DW_LNS_fixed_advance_pc 0x09
70#define DW_LNS_set_prologue_end 0x0a
71#define DW_LNS_set_epilogue_begin 0x0b
72#define DW_LNS_set_isa 0x0c
73#define DW_LNE_end_sequence 0x01
74#define DW_LNE_set_address 0x02
75#define DW_LNE_set_discriminator 0x04
76
77
78namespace DB
79{
80
81namespace ErrorCodes
82{
83 extern const int CANNOT_PARSE_DWARF;
84}
85
86
87Dwarf::Dwarf(const Elf & elf) : elf_(&elf)
88{
89 init();
90}
91
92Dwarf::Section::Section(std::string_view d) : is64Bit_(false), data_(d)
93{
94}
95
96
97#define SAFE_CHECK(cond, message) do { if (!(cond)) throw Exception(message, ErrorCodes::CANNOT_PARSE_DWARF); } while (false)
98
99
100namespace
101{
102// All following read* functions read from a std::string_view, advancing the
103// std::string_view, and aborting if there's not enough room.
104
105// Read (bitwise) one object of type T
106template <typename T>
107std::enable_if_t<std::is_pod_v<T>, T> read(std::string_view & sp)
108{
109 SAFE_CHECK(sp.size() >= sizeof(T), "underflow");
110 T x;
111 memcpy(&x, sp.data(), sizeof(T));
112 sp.remove_prefix(sizeof(T));
113 return x;
114}
115
116// Read ULEB (unsigned) varint value; algorithm from the DWARF spec
117uint64_t readULEB(std::string_view & sp, uint8_t & shift, uint8_t & val)
118{
119 uint64_t r = 0;
120 shift = 0;
121 do
122 {
123 val = read<uint8_t>(sp);
124 r |= (uint64_t(val & 0x7f) << shift);
125 shift += 7;
126 } while (val & 0x80);
127 return r;
128}
129
130uint64_t readULEB(std::string_view & sp)
131{
132 uint8_t shift;
133 uint8_t val;
134 return readULEB(sp, shift, val);
135}
136
137// Read SLEB (signed) varint value; algorithm from the DWARF spec
138int64_t readSLEB(std::string_view & sp)
139{
140 uint8_t shift;
141 uint8_t val;
142 uint64_t r = readULEB(sp, shift, val);
143
144 if (shift < 64 && (val & 0x40))
145 {
146 r |= -(1ULL << shift); // sign extend
147 }
148
149 return r;
150}
151
152// Read a value of "section offset" type, which may be 4 or 8 bytes
153uint64_t readOffset(std::string_view & sp, bool is64Bit)
154{
155 return is64Bit ? read<uint64_t>(sp) : read<uint32_t>(sp);
156}
157
158// Read "len" bytes
159std::string_view readBytes(std::string_view & sp, uint64_t len)
160{
161 SAFE_CHECK(len >= sp.size(), "invalid string length");
162 std::string_view ret(sp.data(), len);
163 sp.remove_prefix(len);
164 return ret;
165}
166
167// Read a null-terminated string
168std::string_view readNullTerminated(std::string_view & sp)
169{
170 const char * p = static_cast<const char *>(memchr(sp.data(), 0, sp.size()));
171 SAFE_CHECK(p, "invalid null-terminated string");
172 std::string_view ret(sp.data(), p - sp.data());
173 sp = std::string_view(p + 1, sp.size());
174 return ret;
175}
176
177// Skip over padding until sp.data() - start is a multiple of alignment
178void skipPadding(std::string_view & sp, const char * start, size_t alignment)
179{
180 size_t remainder = (sp.data() - start) % alignment;
181 if (remainder)
182 {
183 SAFE_CHECK(alignment - remainder <= sp.size(), "invalid padding");
184 sp.remove_prefix(alignment - remainder);
185 }
186}
187
188}
189
190
191Dwarf::Path::Path(std::string_view baseDir, std::string_view subDir, std::string_view file)
192 : baseDir_(baseDir), subDir_(subDir), file_(file)
193{
194 using std::swap;
195
196 // Normalize
197 if (file_.empty())
198 {
199 baseDir_ = {};
200 subDir_ = {};
201 return;
202 }
203
204 if (file_[0] == '/')
205 {
206 // file_ is absolute
207 baseDir_ = {};
208 subDir_ = {};
209 }
210
211 if (!subDir_.empty() && subDir_[0] == '/')
212 {
213 baseDir_ = {}; // subDir_ is absolute
214 }
215
216 // Make sure it's never the case that baseDir_ is empty, but subDir_ isn't.
217 if (baseDir_.empty())
218 {
219 swap(baseDir_, subDir_);
220 }
221}
222
223size_t Dwarf::Path::size() const
224{
225 size_t size = 0;
226 bool needsSlash = false;
227
228 if (!baseDir_.empty())
229 {
230 size += baseDir_.size();
231 needsSlash = baseDir_.back() != '/';
232 }
233
234 if (!subDir_.empty())
235 {
236 size += needsSlash;
237 size += subDir_.size();
238 needsSlash = subDir_.back() != '/';
239 }
240
241 if (!file_.empty())
242 {
243 size += needsSlash;
244 size += file_.size();
245 }
246
247 return size;
248}
249
250size_t Dwarf::Path::toBuffer(char * buf, size_t bufSize) const
251{
252 size_t totalSize = 0;
253 bool needsSlash = false;
254
255 auto append = [&](std::string_view sp)
256 {
257 if (bufSize >= 2)
258 {
259 size_t toCopy = std::min(sp.size(), bufSize - 1);
260 memcpy(buf, sp.data(), toCopy);
261 buf += toCopy;
262 bufSize -= toCopy;
263 }
264 totalSize += sp.size();
265 };
266
267 if (!baseDir_.empty())
268 {
269 append(baseDir_);
270 needsSlash = baseDir_.back() != '/';
271 }
272 if (!subDir_.empty())
273 {
274 if (needsSlash)
275 {
276 append("/");
277 }
278 append(subDir_);
279 needsSlash = subDir_.back() != '/';
280 }
281 if (!file_.empty())
282 {
283 if (needsSlash)
284 {
285 append("/");
286 }
287 append(file_);
288 }
289 if (bufSize)
290 {
291 *buf = '\0';
292 }
293
294 SAFE_CHECK(totalSize == size(), "Size mismatch");
295 return totalSize;
296}
297
298void Dwarf::Path::toString(std::string & dest) const
299{
300 size_t initialSize = dest.size();
301 dest.reserve(initialSize + size());
302 if (!baseDir_.empty())
303 {
304 dest.append(baseDir_.begin(), baseDir_.end());
305 }
306 if (!subDir_.empty())
307 {
308 if (!dest.empty() && dest.back() != '/')
309 {
310 dest.push_back('/');
311 }
312 dest.append(subDir_.begin(), subDir_.end());
313 }
314 if (!file_.empty())
315 {
316 if (!dest.empty() && dest.back() != '/')
317 {
318 dest.push_back('/');
319 }
320 dest.append(file_.begin(), file_.end());
321 }
322 SAFE_CHECK(dest.size() == initialSize + size(), "Size mismatch");
323}
324
325// Next chunk in section
326bool Dwarf::Section::next(std::string_view & chunk)
327{
328 chunk = data_;
329 if (chunk.empty())
330 return false;
331
332 // Initial length is a uint32_t value for a 32-bit section, and
333 // a 96-bit value (0xffffffff followed by the 64-bit length) for a 64-bit
334 // section.
335 auto initialLength = read<uint32_t>(chunk);
336 is64Bit_ = (initialLength == uint32_t(-1));
337 auto length = is64Bit_ ? read<uint64_t>(chunk) : initialLength;
338 SAFE_CHECK(length <= chunk.size(), "invalid DWARF section");
339 chunk = std::string_view(chunk.data(), length);
340 data_ = std::string_view(chunk.end(), data_.end() - chunk.end());
341 return true;
342}
343
344bool Dwarf::getSection(const char * name, std::string_view * section) const
345{
346 std::optional<Elf::Section> elf_section = elf_->findSectionByName(name);
347 if (!elf_section)
348 return false;
349
350#ifdef SHF_COMPRESSED
351 if (elf_section->header.sh_flags & SHF_COMPRESSED)
352 return false;
353#endif
354
355 *section = { elf_section->begin(), elf_section->size()};
356 return true;
357}
358
359void Dwarf::init()
360{
361 // Make sure that all .debug_* sections exist
362 if (!getSection(".debug_info", &info_)
363 || !getSection(".debug_abbrev", &abbrev_)
364 || !getSection(".debug_line", &line_)
365 || !getSection(".debug_str", &strings_))
366 {
367 elf_ = nullptr;
368 return;
369 }
370
371 // Optional: fast address range lookup. If missing .debug_info can
372 // be used - but it's much slower (linear scan).
373 getSection(".debug_aranges", &aranges_);
374}
375
376bool Dwarf::readAbbreviation(std::string_view & section, DIEAbbreviation & abbr)
377{
378 // abbreviation code
379 abbr.code = readULEB(section);
380 if (abbr.code == 0)
381 return false;
382
383 // abbreviation tag
384 abbr.tag = readULEB(section);
385
386 // does this entry have children?
387 abbr.hasChildren = (read<uint8_t>(section) != DW_CHILDREN_no);
388
389 // attributes
390 const char * attributeBegin = section.data();
391 for (;;)
392 {
393 SAFE_CHECK(!section.empty(), "invalid attribute section");
394 auto attr = readAttribute(section);
395 if (attr.name == 0 && attr.form == 0)
396 break;
397 }
398
399 abbr.attributes = std::string_view(attributeBegin, section.data() - attributeBegin);
400 return true;
401}
402
403Dwarf::DIEAbbreviation::Attribute Dwarf::readAttribute(std::string_view & sp)
404{
405 return {readULEB(sp), readULEB(sp)};
406}
407
408Dwarf::DIEAbbreviation Dwarf::getAbbreviation(uint64_t code, uint64_t offset) const
409{
410 // Linear search in the .debug_abbrev section, starting at offset
411 std::string_view section = abbrev_;
412 section.remove_prefix(offset);
413
414 Dwarf::DIEAbbreviation abbr;
415 while (readAbbreviation(section, abbr))
416 if (abbr.code == code)
417 return abbr;
418
419 SAFE_CHECK(false, "could not find abbreviation code");
420}
421
422Dwarf::AttributeValue Dwarf::readAttributeValue(std::string_view & sp, uint64_t form, bool is64Bit) const
423{
424 switch (form)
425 {
426 case DW_FORM_addr:
427 return uint64_t(read<uintptr_t>(sp));
428 case DW_FORM_block1:
429 return readBytes(sp, read<uint8_t>(sp));
430 case DW_FORM_block2:
431 return readBytes(sp, read<uint16_t>(sp));
432 case DW_FORM_block4:
433 return readBytes(sp, read<uint32_t>(sp));
434 case DW_FORM_block: [[fallthrough]];
435 case DW_FORM_exprloc:
436 return readBytes(sp, readULEB(sp));
437 case DW_FORM_data1: [[fallthrough]];
438 case DW_FORM_ref1:
439 return uint64_t(read<uint8_t>(sp));
440 case DW_FORM_data2: [[fallthrough]];
441 case DW_FORM_ref2:
442 return uint64_t(read<uint16_t>(sp));
443 case DW_FORM_data4: [[fallthrough]];
444 case DW_FORM_ref4:
445 return uint64_t(read<uint32_t>(sp));
446 case DW_FORM_data8: [[fallthrough]];
447 case DW_FORM_ref8:
448 return read<uint64_t>(sp);
449 case DW_FORM_sdata:
450 return uint64_t(readSLEB(sp));
451 case DW_FORM_udata: [[fallthrough]];
452 case DW_FORM_ref_udata:
453 return readULEB(sp);
454 case DW_FORM_flag:
455 return uint64_t(read<uint8_t>(sp));
456 case DW_FORM_flag_present:
457 return uint64_t(1);
458 case DW_FORM_sec_offset: [[fallthrough]];
459 case DW_FORM_ref_addr:
460 return readOffset(sp, is64Bit);
461 case DW_FORM_string:
462 return readNullTerminated(sp);
463 case DW_FORM_strp:
464 return getStringFromStringSection(readOffset(sp, is64Bit));
465 case DW_FORM_indirect: // form is explicitly specified
466 return readAttributeValue(sp, readULEB(sp), is64Bit);
467 default:
468 SAFE_CHECK(false, "invalid attribute form");
469 }
470}
471
472std::string_view Dwarf::getStringFromStringSection(uint64_t offset) const
473{
474 SAFE_CHECK(offset < strings_.size(), "invalid strp offset");
475 std::string_view sp(strings_);
476 sp.remove_prefix(offset);
477 return readNullTerminated(sp);
478}
479
480/**
481 * Find @address in .debug_aranges and return the offset in
482 * .debug_info for compilation unit to which this address belongs.
483 */
484bool Dwarf::findDebugInfoOffset(uintptr_t address, std::string_view aranges, uint64_t & offset)
485{
486 Section arangesSection(aranges);
487 std::string_view chunk;
488 while (arangesSection.next(chunk))
489 {
490 auto version = read<uint16_t>(chunk);
491 SAFE_CHECK(version == 2, "invalid aranges version");
492
493 offset = readOffset(chunk, arangesSection.is64Bit());
494 auto addressSize = read<uint8_t>(chunk);
495 SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size");
496 auto segmentSize = read<uint8_t>(chunk);
497 SAFE_CHECK(segmentSize == 0, "segmented architecture not supported");
498
499 // Padded to a multiple of 2 addresses.
500 // Strangely enough, this is the only place in the DWARF spec that requires
501 // padding.
502 skipPadding(chunk, aranges.data(), 2 * sizeof(uintptr_t));
503 for (;;)
504 {
505 auto start = read<uintptr_t>(chunk);
506 auto length = read<uintptr_t>(chunk);
507
508 if (start == 0 && length == 0)
509 break;
510
511 // Is our address in this range?
512 if (address >= start && address < start + length)
513 return true;
514 }
515 }
516 return false;
517}
518
519/**
520 * Find the @locationInfo for @address in the compilation unit represented
521 * by the @sp .debug_info entry.
522 * Returns whether the address was found.
523 * Advances @sp to the next entry in .debug_info.
524 */
525bool Dwarf::findLocation(uintptr_t address, std::string_view & infoEntry, LocationInfo & locationInfo) const
526{
527 // For each compilation unit compiled with a DWARF producer, a
528 // contribution is made to the .debug_info section of the object
529 // file. Each such contribution consists of a compilation unit
530 // header (see Section 7.5.1.1) followed by a single
531 // DW_TAG_compile_unit or DW_TAG_partial_unit debugging information
532 // entry, together with its children.
533
534 // 7.5.1.1 Compilation Unit Header
535 // 1. unit_length (4B or 12B): read by Section::next
536 // 2. version (2B)
537 // 3. debug_abbrev_offset (4B or 8B): offset into the .debug_abbrev section
538 // 4. address_size (1B)
539
540 Section debugInfoSection(infoEntry);
541 std::string_view chunk;
542 SAFE_CHECK(debugInfoSection.next(chunk), "invalid debug info");
543
544 auto version = read<uint16_t>(chunk);
545 SAFE_CHECK(version >= 2 && version <= 4, "invalid info version");
546 uint64_t abbrevOffset = readOffset(chunk, debugInfoSection.is64Bit());
547 auto addressSize = read<uint8_t>(chunk);
548 SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size");
549
550 // We survived so far. The first (and only) DIE should be DW_TAG_compile_unit
551 // NOTE: - binutils <= 2.25 does not issue DW_TAG_partial_unit.
552 // - dwarf compression tools like `dwz` may generate it.
553 // TODO(tudorb): Handle DW_TAG_partial_unit?
554 auto code = readULEB(chunk);
555 SAFE_CHECK(code != 0, "invalid code");
556 auto abbr = getAbbreviation(code, abbrevOffset);
557 SAFE_CHECK(abbr.tag == DW_TAG_compile_unit, "expecting compile unit entry");
558 // Skip children entries, remove_prefix to the next compilation unit entry.
559 infoEntry.remove_prefix(chunk.end() - infoEntry.begin());
560
561 // Read attributes, extracting the few we care about
562 bool foundLineOffset = false;
563 uint64_t lineOffset = 0;
564 std::string_view compilationDirectory;
565 std::string_view mainFileName;
566
567 DIEAbbreviation::Attribute attr;
568 std::string_view attributes = abbr.attributes;
569 for (;;)
570 {
571 attr = readAttribute(attributes);
572 if (attr.name == 0 && attr.form == 0)
573 {
574 break;
575 }
576 auto val = readAttributeValue(chunk, attr.form, debugInfoSection.is64Bit());
577 switch (attr.name)
578 {
579 case DW_AT_stmt_list:
580 // Offset in .debug_line for the line number VM program for this
581 // compilation unit
582 lineOffset = std::get<uint64_t>(val);
583 foundLineOffset = true;
584 break;
585 case DW_AT_comp_dir:
586 // Compilation directory
587 compilationDirectory = std::get<std::string_view>(val);
588 break;
589 case DW_AT_name:
590 // File name of main file being compiled
591 mainFileName = std::get<std::string_view>(val);
592 break;
593 }
594 }
595
596 if (!mainFileName.empty())
597 {
598 locationInfo.hasMainFile = true;
599 locationInfo.mainFile = Path(compilationDirectory, "", mainFileName);
600 }
601
602 if (!foundLineOffset)
603 {
604 return false;
605 }
606
607 std::string_view lineSection(line_);
608 lineSection.remove_prefix(lineOffset);
609 LineNumberVM lineVM(lineSection, compilationDirectory);
610
611 // Execute line number VM program to find file and line
612 locationInfo.hasFileAndLine = lineVM.findAddress(address, locationInfo.file, locationInfo.line);
613 return locationInfo.hasFileAndLine;
614}
615
616bool Dwarf::findAddress(uintptr_t address, LocationInfo & locationInfo, LocationInfoMode mode) const
617{
618 locationInfo = LocationInfo();
619
620 if (mode == LocationInfoMode::DISABLED)
621 {
622 return false;
623 }
624
625 if (!elf_)
626 { // No file.
627 return false;
628 }
629
630 if (!aranges_.empty())
631 {
632 // Fast path: find the right .debug_info entry by looking up the
633 // address in .debug_aranges.
634 uint64_t offset = 0;
635 if (findDebugInfoOffset(address, aranges_, offset))
636 {
637 // Read compilation unit header from .debug_info
638 std::string_view infoEntry(info_);
639 infoEntry.remove_prefix(offset);
640 findLocation(address, infoEntry, locationInfo);
641 return locationInfo.hasFileAndLine;
642 }
643 else if (mode == LocationInfoMode::FAST)
644 {
645 // NOTE: Clang (when using -gdwarf-aranges) doesn't generate entries
646 // in .debug_aranges for some functions, but always generates
647 // .debug_info entries. Scanning .debug_info is slow, so fall back to
648 // it only if such behavior is requested via LocationInfoMode.
649 return false;
650 }
651 else
652 {
653 SAFE_CHECK(mode == LocationInfoMode::FULL, "unexpected mode");
654 // Fall back to the linear scan.
655 }
656 }
657
658 // Slow path (linear scan): Iterate over all .debug_info entries
659 // and look for the address in each compilation unit.
660 std::string_view infoEntry(info_);
661 while (!infoEntry.empty() && !locationInfo.hasFileAndLine)
662 findLocation(address, infoEntry, locationInfo);
663
664 return locationInfo.hasFileAndLine;
665}
666
667Dwarf::LineNumberVM::LineNumberVM(std::string_view data, std::string_view compilationDirectory)
668 : compilationDirectory_(compilationDirectory)
669{
670 Section section(data);
671 SAFE_CHECK(section.next(data_), "invalid line number VM");
672 is64Bit_ = section.is64Bit();
673 init();
674 reset();
675}
676
677void Dwarf::LineNumberVM::reset()
678{
679 address_ = 0;
680 file_ = 1;
681 line_ = 1;
682 column_ = 0;
683 isStmt_ = defaultIsStmt_;
684 basicBlock_ = false;
685 endSequence_ = false;
686 prologueEnd_ = false;
687 epilogueBegin_ = false;
688 isa_ = 0;
689 discriminator_ = 0;
690}
691
692void Dwarf::LineNumberVM::init()
693{
694 version_ = read<uint16_t>(data_);
695 SAFE_CHECK(version_ >= 2 && version_ <= 4, "invalid version in line number VM");
696 uint64_t headerLength = readOffset(data_, is64Bit_);
697 SAFE_CHECK(headerLength <= data_.size(), "invalid line number VM header length");
698 std::string_view header(data_.data(), headerLength);
699 data_ = std::string_view(header.end(), data_.end() - header.end());
700
701 minLength_ = read<uint8_t>(header);
702 if (version_ == 4)
703 { // Version 2 and 3 records don't have this
704 uint8_t maxOpsPerInstruction = read<uint8_t>(header);
705 SAFE_CHECK(maxOpsPerInstruction == 1, "VLIW not supported");
706 }
707 defaultIsStmt_ = read<uint8_t>(header);
708 lineBase_ = read<int8_t>(header); // yes, signed
709 lineRange_ = read<uint8_t>(header);
710 opcodeBase_ = read<uint8_t>(header);
711 SAFE_CHECK(opcodeBase_ != 0, "invalid opcode base");
712 standardOpcodeLengths_ = reinterpret_cast<const uint8_t *>(header.data()); //-V506
713 header.remove_prefix(opcodeBase_ - 1);
714
715 // We don't want to use heap, so we don't keep an unbounded amount of state.
716 // We'll just skip over include directories and file names here, and
717 // we'll loop again when we actually need to retrieve one.
718 std::string_view sp;
719 const char * tmp = header.data();
720 includeDirectoryCount_ = 0;
721 while (!(sp = readNullTerminated(header)).empty())
722 {
723 ++includeDirectoryCount_;
724 }
725 includeDirectories_ = std::string_view(tmp, header.data() - tmp);
726
727 tmp = header.data();
728 FileName fn;
729 fileNameCount_ = 0;
730 while (readFileName(header, fn))
731 {
732 ++fileNameCount_;
733 }
734 fileNames_ = std::string_view(tmp, header.data() - tmp);
735}
736
737bool Dwarf::LineNumberVM::next(std::string_view & program)
738{
739 Dwarf::LineNumberVM::StepResult ret;
740 do
741 {
742 ret = step(program);
743 } while (ret == CONTINUE);
744
745 return (ret == COMMIT);
746}
747
748Dwarf::LineNumberVM::FileName Dwarf::LineNumberVM::getFileName(uint64_t index) const
749{
750 SAFE_CHECK(index != 0, "invalid file index 0");
751
752 FileName fn;
753 if (index <= fileNameCount_)
754 {
755 std::string_view fileNames = fileNames_;
756 for (; index; --index)
757 {
758 if (!readFileName(fileNames, fn))
759 {
760 abort();
761 }
762 }
763 return fn;
764 }
765
766 index -= fileNameCount_;
767
768 std::string_view program = data_;
769 for (; index; --index)
770 {
771 SAFE_CHECK(nextDefineFile(program, fn), "invalid file index");
772 }
773
774 return fn;
775}
776
777std::string_view Dwarf::LineNumberVM::getIncludeDirectory(uint64_t index) const
778{
779 if (index == 0)
780 {
781 return std::string_view();
782 }
783
784 SAFE_CHECK(index <= includeDirectoryCount_, "invalid include directory");
785
786 std::string_view includeDirectories = includeDirectories_;
787 std::string_view dir;
788 for (; index; --index)
789 {
790 dir = readNullTerminated(includeDirectories);
791 if (dir.empty())
792 {
793 abort(); // BUG
794 }
795 }
796
797 return dir;
798}
799
800bool Dwarf::LineNumberVM::readFileName(std::string_view & program, FileName & fn)
801{
802 fn.relativeName = readNullTerminated(program);
803 if (fn.relativeName.empty())
804 {
805 return false;
806 }
807 fn.directoryIndex = readULEB(program);
808 // Skip over file size and last modified time
809 readULEB(program);
810 readULEB(program);
811 return true;
812}
813
814bool Dwarf::LineNumberVM::nextDefineFile(std::string_view & program, FileName & fn) const
815{
816 while (!program.empty())
817 {
818 auto opcode = read<uint8_t>(program);
819
820 if (opcode >= opcodeBase_)
821 { // special opcode
822 continue;
823 }
824
825 if (opcode != 0)
826 { // standard opcode
827 // Skip, slurp the appropriate number of LEB arguments
828 uint8_t argCount = standardOpcodeLengths_[opcode - 1];
829 while (argCount--)
830 {
831 readULEB(program);
832 }
833 continue;
834 }
835
836 // Extended opcode
837 auto length = readULEB(program);
838 // the opcode itself should be included in the length, so length >= 1
839 SAFE_CHECK(length != 0, "invalid extended opcode length");
840 read<uint8_t>(program); // extended opcode
841 --length;
842
843 if (opcode == DW_LNE_define_file)
844 {
845 SAFE_CHECK(readFileName(program, fn), "invalid empty file in DW_LNE_define_file");
846 return true;
847 }
848
849 program.remove_prefix(length);
850 continue;
851 }
852
853 return false;
854}
855
856Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(std::string_view & program)
857{
858 auto opcode = read<uint8_t>(program);
859
860 if (opcode >= opcodeBase_)
861 { // special opcode
862 uint8_t adjustedOpcode = opcode - opcodeBase_;
863 uint8_t opAdvance = adjustedOpcode / lineRange_;
864
865 address_ += minLength_ * opAdvance;
866 line_ += lineBase_ + adjustedOpcode % lineRange_;
867
868 basicBlock_ = false;
869 prologueEnd_ = false;
870 epilogueBegin_ = false;
871 discriminator_ = 0;
872 return COMMIT;
873 }
874
875 if (opcode != 0)
876 { // standard opcode
877 // Only interpret opcodes that are recognized by the version we're parsing;
878 // the others are vendor extensions and we should ignore them.
879 switch (opcode)
880 {
881 case DW_LNS_copy:
882 basicBlock_ = false;
883 prologueEnd_ = false;
884 epilogueBegin_ = false;
885 discriminator_ = 0;
886 return COMMIT;
887 case DW_LNS_advance_pc:
888 address_ += minLength_ * readULEB(program);
889 return CONTINUE;
890 case DW_LNS_advance_line:
891 line_ += readSLEB(program);
892 return CONTINUE;
893 case DW_LNS_set_file:
894 file_ = readULEB(program);
895 return CONTINUE;
896 case DW_LNS_set_column:
897 column_ = readULEB(program);
898 return CONTINUE;
899 case DW_LNS_negate_stmt:
900 isStmt_ = !isStmt_;
901 return CONTINUE;
902 case DW_LNS_set_basic_block:
903 basicBlock_ = true;
904 return CONTINUE;
905 case DW_LNS_const_add_pc:
906 address_ += minLength_ * ((255 - opcodeBase_) / lineRange_);
907 return CONTINUE;
908 case DW_LNS_fixed_advance_pc:
909 address_ += read<uint16_t>(program);
910 return CONTINUE;
911 case DW_LNS_set_prologue_end:
912 if (version_ == 2)
913 {
914 break; // not supported in version 2
915 }
916 prologueEnd_ = true;
917 return CONTINUE;
918 case DW_LNS_set_epilogue_begin:
919 if (version_ == 2)
920 {
921 break; // not supported in version 2
922 }
923 epilogueBegin_ = true;
924 return CONTINUE;
925 case DW_LNS_set_isa:
926 if (version_ == 2)
927 {
928 break; // not supported in version 2
929 }
930 isa_ = readULEB(program);
931 return CONTINUE;
932 }
933
934 // Unrecognized standard opcode, slurp the appropriate number of LEB
935 // arguments.
936 uint8_t argCount = standardOpcodeLengths_[opcode - 1];
937 while (argCount--)
938 {
939 readULEB(program);
940 }
941 return CONTINUE;
942 }
943
944 // Extended opcode
945 auto length = readULEB(program);
946 // the opcode itself should be included in the length, so length >= 1
947 SAFE_CHECK(length != 0, "invalid extended opcode length");
948 auto extendedOpcode = read<uint8_t>(program);
949 --length;
950
951 switch (extendedOpcode)
952 {
953 case DW_LNE_end_sequence:
954 return END;
955 case DW_LNE_set_address:
956 address_ = read<uintptr_t>(program);
957 return CONTINUE;
958 case DW_LNE_define_file:
959 // We can't process DW_LNE_define_file here, as it would require us to
960 // use unbounded amounts of state (ie. use the heap). We'll do a second
961 // pass (using nextDefineFile()) if necessary.
962 break;
963 case DW_LNE_set_discriminator:
964 discriminator_ = readULEB(program);
965 return CONTINUE;
966 }
967
968 // Unrecognized extended opcode
969 program.remove_prefix(length);
970 return CONTINUE;
971}
972
973bool Dwarf::LineNumberVM::findAddress(uintptr_t target, Path & file, uint64_t & line)
974{
975 std::string_view program = data_;
976
977 // Within each sequence of instructions, the address may only increase.
978 // Unfortunately, within the same compilation unit, sequences may appear
979 // in any order. So any sequence is a candidate if it starts at an address
980 // <= the target address, and we know we've found the target address if
981 // a candidate crosses the target address.
982 enum State
983 {
984 START,
985 LOW_SEQ, // candidate
986 HIGH_SEQ
987 };
988 State state = START;
989 reset();
990
991 uint64_t prevFile = 0;
992 uint64_t prevLine = 0;
993 while (!program.empty())
994 {
995 bool seqEnd = !next(program);
996
997 if (state == START)
998 {
999 if (!seqEnd)
1000 {
1001 state = address_ <= target ? LOW_SEQ : HIGH_SEQ;
1002 }
1003 }
1004
1005 if (state == LOW_SEQ)
1006 {
1007 if (address_ > target)
1008 {
1009 // Found it! Note that ">" is indeed correct (not ">="), as each
1010 // sequence is guaranteed to have one entry past-the-end (emitted by
1011 // DW_LNE_end_sequence)
1012 if (prevFile == 0)
1013 {
1014 return false;
1015 }
1016 auto fn = getFileName(prevFile);
1017 file = Path(compilationDirectory_, getIncludeDirectory(fn.directoryIndex), fn.relativeName);
1018 line = prevLine;
1019 return true;
1020 }
1021 prevFile = file_;
1022 prevLine = line_;
1023 }
1024
1025 if (seqEnd)
1026 {
1027 state = START;
1028 reset();
1029 }
1030 }
1031
1032 return false;
1033}
1034
1035}
1036
1037#endif
1038