1/*
2 * Copyright 2012-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <folly/experimental/symbolizer/Dwarf.h>
18
19#include <type_traits>
20
21// We can delete this #if check once we completely deprecate and remove
22// the autoconf build.
23#if __has_include(<libdwarf/dwarf.h>)
24#include <libdwarf/dwarf.h>
25#else
26#include <dwarf.h> // @manual
27#endif
28
29namespace folly {
30namespace symbolizer {
31
32Dwarf::Dwarf(const ElfFile* elf) : elf_(elf) {
33 init();
34}
35
36Dwarf::Section::Section(folly::StringPiece d) : is64Bit_(false), data_(d) {}
37
38namespace {
39
40// All following read* functions read from a StringPiece, advancing the
41// StringPiece, and aborting if there's not enough room.
42
43// Read (bitwise) one object of type T
44template <class T>
45typename std::enable_if<std::is_pod<T>::value, T>::type read(
46 folly::StringPiece& sp) {
47 FOLLY_SAFE_CHECK(sp.size() >= sizeof(T), "underflow");
48 T x;
49 memcpy(&x, sp.data(), sizeof(T));
50 sp.advance(sizeof(T));
51 return x;
52}
53
54// Read ULEB (unsigned) varint value; algorithm from the DWARF spec
55uint64_t readULEB(folly::StringPiece& sp, uint8_t& shift, uint8_t& val) {
56 uint64_t r = 0;
57 shift = 0;
58 do {
59 val = read<uint8_t>(sp);
60 r |= ((uint64_t)(val & 0x7f) << shift);
61 shift += 7;
62 } while (val & 0x80);
63 return r;
64}
65
66uint64_t readULEB(folly::StringPiece& sp) {
67 uint8_t shift;
68 uint8_t val;
69 return readULEB(sp, shift, val);
70}
71
72// Read SLEB (signed) varint value; algorithm from the DWARF spec
73int64_t readSLEB(folly::StringPiece& sp) {
74 uint8_t shift;
75 uint8_t val;
76 uint64_t r = readULEB(sp, shift, val);
77
78 if (shift < 64 && (val & 0x40)) {
79 r |= -(1ULL << shift); // sign extend
80 }
81
82 return r;
83}
84
85// Read a value of "section offset" type, which may be 4 or 8 bytes
86uint64_t readOffset(folly::StringPiece& sp, bool is64Bit) {
87 return is64Bit ? read<uint64_t>(sp) : read<uint32_t>(sp);
88}
89
90// Read "len" bytes
91folly::StringPiece readBytes(folly::StringPiece& sp, uint64_t len) {
92 FOLLY_SAFE_CHECK(len >= sp.size(), "invalid string length");
93 folly::StringPiece ret(sp.data(), len);
94 sp.advance(len);
95 return ret;
96}
97
98// Read a null-terminated string
99folly::StringPiece readNullTerminated(folly::StringPiece& sp) {
100 const char* p = static_cast<const char*>(memchr(sp.data(), 0, sp.size()));
101 FOLLY_SAFE_CHECK(p, "invalid null-terminated string");
102 folly::StringPiece ret(sp.data(), p);
103 sp.assign(p + 1, sp.end());
104 return ret;
105}
106
107// Skip over padding until sp.data() - start is a multiple of alignment
108void skipPadding(folly::StringPiece& sp, const char* start, size_t alignment) {
109 size_t remainder = (sp.data() - start) % alignment;
110 if (remainder) {
111 FOLLY_SAFE_CHECK(alignment - remainder <= sp.size(), "invalid padding");
112 sp.advance(alignment - remainder);
113 }
114}
115
116// Simplify a path -- as much as we can while not moving data around...
117void simplifyPath(folly::StringPiece& sp) {
118 // Strip leading slashes and useless patterns (./), leaving one initial
119 // slash.
120 for (;;) {
121 if (sp.empty()) {
122 return;
123 }
124
125 // Strip leading slashes, leaving one.
126 while (sp.startsWith("//")) {
127 sp.advance(1);
128 }
129
130 if (sp.startsWith("/./")) {
131 // Note 2, not 3, to keep it absolute
132 sp.advance(2);
133 continue;
134 }
135
136 if (sp.removePrefix("./")) {
137 // Also remove any subsequent slashes to avoid making this path absolute.
138 while (sp.startsWith('/')) {
139 sp.advance(1);
140 }
141 continue;
142 }
143
144 break;
145 }
146
147 // Strip trailing slashes and useless patterns (/.).
148 for (;;) {
149 if (sp.empty()) {
150 return;
151 }
152
153 // Strip trailing slashes, except when this is the root path.
154 while (sp.size() > 1 && sp.removeSuffix('/')) {
155 }
156
157 if (sp.removeSuffix("/.")) {
158 continue;
159 }
160
161 break;
162 }
163}
164
165} // namespace
166
167Dwarf::Path::Path(
168 folly::StringPiece baseDir,
169 folly::StringPiece subDir,
170 folly::StringPiece file)
171 : baseDir_(baseDir), subDir_(subDir), file_(file) {
172 using std::swap;
173
174 // Normalize
175 if (file_.empty()) {
176 baseDir_.clear();
177 subDir_.clear();
178 return;
179 }
180
181 if (file_[0] == '/') {
182 // file_ is absolute
183 baseDir_.clear();
184 subDir_.clear();
185 }
186
187 if (!subDir_.empty() && subDir_[0] == '/') {
188 baseDir_.clear(); // subDir_ is absolute
189 }
190
191 simplifyPath(baseDir_);
192 simplifyPath(subDir_);
193 simplifyPath(file_);
194
195 // Make sure it's never the case that baseDir_ is empty, but subDir_ isn't.
196 if (baseDir_.empty()) {
197 swap(baseDir_, subDir_);
198 }
199}
200
201size_t Dwarf::Path::size() const {
202 size_t size = 0;
203 bool needsSlash = false;
204
205 if (!baseDir_.empty()) {
206 size += baseDir_.size();
207 needsSlash = !baseDir_.endsWith('/');
208 }
209
210 if (!subDir_.empty()) {
211 size += needsSlash;
212 size += subDir_.size();
213 needsSlash = !subDir_.endsWith('/');
214 }
215
216 if (!file_.empty()) {
217 size += needsSlash;
218 size += file_.size();
219 }
220
221 return size;
222}
223
224size_t Dwarf::Path::toBuffer(char* buf, size_t bufSize) const {
225 size_t totalSize = 0;
226 bool needsSlash = false;
227
228 auto append = [&](folly::StringPiece sp) {
229 if (bufSize >= 2) {
230 size_t toCopy = std::min(sp.size(), bufSize - 1);
231 memcpy(buf, sp.data(), toCopy);
232 buf += toCopy;
233 bufSize -= toCopy;
234 }
235 totalSize += sp.size();
236 };
237
238 if (!baseDir_.empty()) {
239 append(baseDir_);
240 needsSlash = !baseDir_.endsWith('/');
241 }
242 if (!subDir_.empty()) {
243 if (needsSlash) {
244 append("/");
245 }
246 append(subDir_);
247 needsSlash = !subDir_.endsWith('/');
248 }
249 if (!file_.empty()) {
250 if (needsSlash) {
251 append("/");
252 }
253 append(file_);
254 }
255 if (bufSize) {
256 *buf = '\0';
257 }
258 assert(totalSize == size());
259 return totalSize;
260}
261
262void Dwarf::Path::toString(std::string& dest) const {
263 size_t initialSize = dest.size();
264 dest.reserve(initialSize + size());
265 if (!baseDir_.empty()) {
266 dest.append(baseDir_.begin(), baseDir_.end());
267 }
268 if (!subDir_.empty()) {
269 if (!dest.empty() && dest.back() != '/') {
270 dest.push_back('/');
271 }
272 dest.append(subDir_.begin(), subDir_.end());
273 }
274 if (!file_.empty()) {
275 if (!dest.empty() && dest.back() != '/') {
276 dest.push_back('/');
277 }
278 dest.append(file_.begin(), file_.end());
279 }
280 assert(dest.size() == initialSize + size());
281}
282
283// Next chunk in section
284bool Dwarf::Section::next(folly::StringPiece& chunk) {
285 chunk = data_;
286 if (chunk.empty()) {
287 return false;
288 }
289
290 // Initial length is a uint32_t value for a 32-bit section, and
291 // a 96-bit value (0xffffffff followed by the 64-bit length) for a 64-bit
292 // section.
293 auto initialLength = read<uint32_t>(chunk);
294 is64Bit_ = (initialLength == (uint32_t)-1);
295 auto length = is64Bit_ ? read<uint64_t>(chunk) : initialLength;
296 FOLLY_SAFE_CHECK(length <= chunk.size(), "invalid DWARF section");
297 chunk.reset(chunk.data(), length);
298 data_.assign(chunk.end(), data_.end());
299 return true;
300}
301
302bool Dwarf::getSection(const char* name, folly::StringPiece* section) const {
303 const ElfW(Shdr)* elfSection = elf_->getSectionByName(name);
304 if (!elfSection) {
305 return false;
306 }
307#ifdef SHF_COMPRESSED
308 if (elfSection->sh_flags & SHF_COMPRESSED) {
309 return false;
310 }
311#endif
312 *section = elf_->getSectionBody(*elfSection);
313 return true;
314}
315
316void Dwarf::init() {
317 // Make sure that all .debug_* sections exist
318 if (!getSection(".debug_info", &info_) ||
319 !getSection(".debug_abbrev", &abbrev_) ||
320 !getSection(".debug_line", &line_) ||
321 !getSection(".debug_str", &strings_)) {
322 elf_ = nullptr;
323 return;
324 }
325
326 // Optional: fast address range lookup. If missing .debug_info can
327 // be used - but it's much slower (linear scan).
328 getSection(".debug_aranges", &aranges_);
329}
330
331bool Dwarf::readAbbreviation(
332 folly::StringPiece& section,
333 DIEAbbreviation& abbr) {
334 // abbreviation code
335 abbr.code = readULEB(section);
336 if (abbr.code == 0) {
337 return false;
338 }
339
340 // abbreviation tag
341 abbr.tag = readULEB(section);
342
343 // does this entry have children?
344 abbr.hasChildren = (read<uint8_t>(section) != DW_CHILDREN_no);
345
346 // attributes
347 const char* attributeBegin = section.data();
348 for (;;) {
349 FOLLY_SAFE_CHECK(!section.empty(), "invalid attribute section");
350 auto attr = readAttribute(section);
351 if (attr.name == 0 && attr.form == 0) {
352 break;
353 }
354 }
355
356 abbr.attributes.assign(attributeBegin, section.data());
357 return true;
358}
359
360Dwarf::DIEAbbreviation::Attribute Dwarf::readAttribute(folly::StringPiece& sp) {
361 return {readULEB(sp), readULEB(sp)};
362}
363
364Dwarf::DIEAbbreviation Dwarf::getAbbreviation(uint64_t code, uint64_t offset)
365 const {
366 // Linear search in the .debug_abbrev section, starting at offset
367 folly::StringPiece section = abbrev_;
368 section.advance(offset);
369
370 Dwarf::DIEAbbreviation abbr;
371 while (readAbbreviation(section, abbr)) {
372 if (abbr.code == code) {
373 return abbr;
374 }
375 }
376
377 FOLLY_SAFE_CHECK(false, "could not find abbreviation code");
378}
379
380Dwarf::AttributeValue Dwarf::readAttributeValue(
381 folly::StringPiece& sp,
382 uint64_t form,
383 bool is64Bit) const {
384 switch (form) {
385 case DW_FORM_addr:
386 return read<uintptr_t>(sp);
387 case DW_FORM_block1:
388 return readBytes(sp, read<uint8_t>(sp));
389 case DW_FORM_block2:
390 return readBytes(sp, read<uint16_t>(sp));
391 case DW_FORM_block4:
392 return readBytes(sp, read<uint32_t>(sp));
393 case DW_FORM_block: // fallthrough
394 case DW_FORM_exprloc:
395 return readBytes(sp, readULEB(sp));
396 case DW_FORM_data1: // fallthrough
397 case DW_FORM_ref1:
398 return read<uint8_t>(sp);
399 case DW_FORM_data2: // fallthrough
400 case DW_FORM_ref2:
401 return read<uint16_t>(sp);
402 case DW_FORM_data4: // fallthrough
403 case DW_FORM_ref4:
404 return read<uint32_t>(sp);
405 case DW_FORM_data8: // fallthrough
406 case DW_FORM_ref8:
407 return read<uint64_t>(sp);
408 case DW_FORM_sdata:
409 return readSLEB(sp);
410 case DW_FORM_udata: // fallthrough
411 case DW_FORM_ref_udata:
412 return readULEB(sp);
413 case DW_FORM_flag:
414 return read<uint8_t>(sp);
415 case DW_FORM_flag_present:
416 return 1;
417 case DW_FORM_sec_offset: // fallthrough
418 case DW_FORM_ref_addr:
419 return readOffset(sp, is64Bit);
420 case DW_FORM_string:
421 return readNullTerminated(sp);
422 case DW_FORM_strp:
423 return getStringFromStringSection(readOffset(sp, is64Bit));
424 case DW_FORM_indirect: // form is explicitly specified
425 return readAttributeValue(sp, readULEB(sp), is64Bit);
426 default:
427 FOLLY_SAFE_CHECK(false, "invalid attribute form");
428 }
429}
430
431folly::StringPiece Dwarf::getStringFromStringSection(uint64_t offset) const {
432 FOLLY_SAFE_CHECK(offset < strings_.size(), "invalid strp offset");
433 folly::StringPiece sp(strings_);
434 sp.advance(offset);
435 return readNullTerminated(sp);
436}
437
438/**
439 * Find @address in .debug_aranges and return the offset in
440 * .debug_info for compilation unit to which this address belongs.
441 */
442bool Dwarf::findDebugInfoOffset(
443 uintptr_t address,
444 StringPiece aranges,
445 uint64_t& offset) {
446 Section arangesSection(aranges);
447 folly::StringPiece chunk;
448 while (arangesSection.next(chunk)) {
449 auto version = read<uint16_t>(chunk);
450 FOLLY_SAFE_CHECK(version == 2, "invalid aranges version");
451
452 offset = readOffset(chunk, arangesSection.is64Bit());
453 auto addressSize = read<uint8_t>(chunk);
454 FOLLY_SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size");
455 auto segmentSize = read<uint8_t>(chunk);
456 FOLLY_SAFE_CHECK(segmentSize == 0, "segmented architecture not supported");
457
458 // Padded to a multiple of 2 addresses.
459 // Strangely enough, this is the only place in the DWARF spec that requires
460 // padding.
461 skipPadding(chunk, aranges.data(), 2 * sizeof(uintptr_t));
462 for (;;) {
463 auto start = read<uintptr_t>(chunk);
464 auto length = read<uintptr_t>(chunk);
465
466 if (start == 0 && length == 0) {
467 break;
468 }
469
470 // Is our address in this range?
471 if (address >= start && address < start + length) {
472 return true;
473 }
474 }
475 }
476 return false;
477}
478
479/**
480 * Find the @locationInfo for @address in the compilation unit represented
481 * by the @sp .debug_info entry.
482 * Returns whether the address was found.
483 * Advances @sp to the next entry in .debug_info.
484 */
485bool Dwarf::findLocation(
486 uintptr_t address,
487 StringPiece& infoEntry,
488 LocationInfo& locationInfo) const {
489 // For each compilation unit compiled with a DWARF producer, a
490 // contribution is made to the .debug_info section of the object
491 // file. Each such contribution consists of a compilation unit
492 // header (see Section 7.5.1.1) followed by a single
493 // DW_TAG_compile_unit or DW_TAG_partial_unit debugging information
494 // entry, together with its children.
495
496 // 7.5.1.1 Compilation Unit Header
497 // 1. unit_length (4B or 12B): read by Section::next
498 // 2. version (2B)
499 // 3. debug_abbrev_offset (4B or 8B): offset into the .debug_abbrev section
500 // 4. address_size (1B)
501
502 Section debugInfoSection(infoEntry);
503 folly::StringPiece chunk;
504 FOLLY_SAFE_CHECK(debugInfoSection.next(chunk), "invalid debug info");
505
506 auto version = read<uint16_t>(chunk);
507 FOLLY_SAFE_CHECK(version >= 2 && version <= 4, "invalid info version");
508 uint64_t abbrevOffset = readOffset(chunk, debugInfoSection.is64Bit());
509 auto addressSize = read<uint8_t>(chunk);
510 FOLLY_SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size");
511
512 // We survived so far. The first (and only) DIE should be DW_TAG_compile_unit
513 // NOTE: - binutils <= 2.25 does not issue DW_TAG_partial_unit.
514 // - dwarf compression tools like `dwz` may generate it.
515 // TODO(tudorb): Handle DW_TAG_partial_unit?
516 auto code = readULEB(chunk);
517 FOLLY_SAFE_CHECK(code != 0, "invalid code");
518 auto abbr = getAbbreviation(code, abbrevOffset);
519 FOLLY_SAFE_CHECK(
520 abbr.tag == DW_TAG_compile_unit, "expecting compile unit entry");
521 // Skip children entries, advance to the next compilation unit entry.
522 infoEntry.advance(chunk.end() - infoEntry.begin());
523
524 // Read attributes, extracting the few we care about
525 bool foundLineOffset = false;
526 uint64_t lineOffset = 0;
527 folly::StringPiece compilationDirectory;
528 folly::StringPiece mainFileName;
529
530 DIEAbbreviation::Attribute attr;
531 folly::StringPiece attributes = abbr.attributes;
532 for (;;) {
533 attr = readAttribute(attributes);
534 if (attr.name == 0 && attr.form == 0) {
535 break;
536 }
537 auto val = readAttributeValue(chunk, attr.form, debugInfoSection.is64Bit());
538 switch (attr.name) {
539 case DW_AT_stmt_list:
540 // Offset in .debug_line for the line number VM program for this
541 // compilation unit
542 lineOffset = boost::get<uint64_t>(val);
543 foundLineOffset = true;
544 break;
545 case DW_AT_comp_dir:
546 // Compilation directory
547 compilationDirectory = boost::get<folly::StringPiece>(val);
548 break;
549 case DW_AT_name:
550 // File name of main file being compiled
551 mainFileName = boost::get<folly::StringPiece>(val);
552 break;
553 }
554 }
555
556 if (!mainFileName.empty()) {
557 locationInfo.hasMainFile = true;
558 locationInfo.mainFile = Path(compilationDirectory, "", mainFileName);
559 }
560
561 if (!foundLineOffset) {
562 return false;
563 }
564
565 folly::StringPiece lineSection(line_);
566 lineSection.advance(lineOffset);
567 LineNumberVM lineVM(lineSection, compilationDirectory);
568
569 // Execute line number VM program to find file and line
570 locationInfo.hasFileAndLine =
571 lineVM.findAddress(address, locationInfo.file, locationInfo.line);
572 return locationInfo.hasFileAndLine;
573}
574
575bool Dwarf::findAddress(
576 uintptr_t address,
577 LocationInfo& locationInfo,
578 LocationInfoMode mode) const {
579 locationInfo = LocationInfo();
580
581 if (mode == LocationInfoMode::DISABLED) {
582 return false;
583 }
584
585 if (!elf_) { // No file.
586 return false;
587 }
588
589 if (!aranges_.empty()) {
590 // Fast path: find the right .debug_info entry by looking up the
591 // address in .debug_aranges.
592 uint64_t offset = 0;
593 if (findDebugInfoOffset(address, aranges_, offset)) {
594 // Read compilation unit header from .debug_info
595 folly::StringPiece infoEntry(info_);
596 infoEntry.advance(offset);
597 findLocation(address, infoEntry, locationInfo);
598 return locationInfo.hasFileAndLine;
599 } else if (mode == LocationInfoMode::FAST) {
600 // NOTE: Clang (when using -gdwarf-aranges) doesn't generate entries
601 // in .debug_aranges for some functions, but always generates
602 // .debug_info entries. Scanning .debug_info is slow, so fall back to
603 // it only if such behavior is requested via LocationInfoMode.
604 return false;
605 } else {
606 DCHECK(mode == LocationInfoMode::FULL);
607 // Fall back to the linear scan.
608 }
609 }
610
611 // Slow path (linear scan): Iterate over all .debug_info entries
612 // and look for the address in each compilation unit.
613 folly::StringPiece infoEntry(info_);
614 while (!infoEntry.empty() && !locationInfo.hasFileAndLine) {
615 findLocation(address, infoEntry, locationInfo);
616 }
617 return locationInfo.hasFileAndLine;
618}
619
620Dwarf::LineNumberVM::LineNumberVM(
621 folly::StringPiece data,
622 folly::StringPiece compilationDirectory)
623 : compilationDirectory_(compilationDirectory) {
624 Section section(data);
625 FOLLY_SAFE_CHECK(section.next(data_), "invalid line number VM");
626 is64Bit_ = section.is64Bit();
627 init();
628 reset();
629}
630
631void Dwarf::LineNumberVM::reset() {
632 address_ = 0;
633 file_ = 1;
634 line_ = 1;
635 column_ = 0;
636 isStmt_ = defaultIsStmt_;
637 basicBlock_ = false;
638 endSequence_ = false;
639 prologueEnd_ = false;
640 epilogueBegin_ = false;
641 isa_ = 0;
642 discriminator_ = 0;
643}
644
645void Dwarf::LineNumberVM::init() {
646 version_ = read<uint16_t>(data_);
647 FOLLY_SAFE_CHECK(
648 version_ >= 2 && version_ <= 4, "invalid version in line number VM");
649 uint64_t headerLength = readOffset(data_, is64Bit_);
650 FOLLY_SAFE_CHECK(
651 headerLength <= data_.size(), "invalid line number VM header length");
652 folly::StringPiece header(data_.data(), headerLength);
653 data_.assign(header.end(), data_.end());
654
655 minLength_ = read<uint8_t>(header);
656 if (version_ == 4) { // Version 2 and 3 records don't have this
657 uint8_t maxOpsPerInstruction = read<uint8_t>(header);
658 FOLLY_SAFE_CHECK(maxOpsPerInstruction == 1, "VLIW not supported");
659 }
660 defaultIsStmt_ = read<uint8_t>(header);
661 lineBase_ = read<int8_t>(header); // yes, signed
662 lineRange_ = read<uint8_t>(header);
663 opcodeBase_ = read<uint8_t>(header);
664 FOLLY_SAFE_CHECK(opcodeBase_ != 0, "invalid opcode base");
665 standardOpcodeLengths_ = reinterpret_cast<const uint8_t*>(header.data());
666 header.advance(opcodeBase_ - 1);
667
668 // We don't want to use heap, so we don't keep an unbounded amount of state.
669 // We'll just skip over include directories and file names here, and
670 // we'll loop again when we actually need to retrieve one.
671 folly::StringPiece sp;
672 const char* tmp = header.data();
673 includeDirectoryCount_ = 0;
674 while (!(sp = readNullTerminated(header)).empty()) {
675 ++includeDirectoryCount_;
676 }
677 includeDirectories_.assign(tmp, header.data());
678
679 tmp = header.data();
680 FileName fn;
681 fileNameCount_ = 0;
682 while (readFileName(header, fn)) {
683 ++fileNameCount_;
684 }
685 fileNames_.assign(tmp, header.data());
686}
687
688bool Dwarf::LineNumberVM::next(folly::StringPiece& program) {
689 Dwarf::LineNumberVM::StepResult ret;
690 do {
691 ret = step(program);
692 } while (ret == CONTINUE);
693
694 return (ret == COMMIT);
695}
696
697Dwarf::LineNumberVM::FileName Dwarf::LineNumberVM::getFileName(
698 uint64_t index) const {
699 FOLLY_SAFE_CHECK(index != 0, "invalid file index 0");
700
701 FileName fn;
702 if (index <= fileNameCount_) {
703 folly::StringPiece fileNames = fileNames_;
704 for (; index; --index) {
705 if (!readFileName(fileNames, fn)) {
706 abort();
707 }
708 }
709 return fn;
710 }
711
712 index -= fileNameCount_;
713
714 folly::StringPiece program = data_;
715 for (; index; --index) {
716 FOLLY_SAFE_CHECK(nextDefineFile(program, fn), "invalid file index");
717 }
718
719 return fn;
720}
721
722folly::StringPiece Dwarf::LineNumberVM::getIncludeDirectory(
723 uint64_t index) const {
724 if (index == 0) {
725 return folly::StringPiece();
726 }
727
728 FOLLY_SAFE_CHECK(
729 index <= includeDirectoryCount_, "invalid include directory");
730
731 folly::StringPiece includeDirectories = includeDirectories_;
732 folly::StringPiece dir;
733 for (; index; --index) {
734 dir = readNullTerminated(includeDirectories);
735 if (dir.empty()) {
736 abort(); // BUG
737 }
738 }
739
740 return dir;
741}
742
743bool Dwarf::LineNumberVM::readFileName(
744 folly::StringPiece& program,
745 FileName& fn) {
746 fn.relativeName = readNullTerminated(program);
747 if (fn.relativeName.empty()) {
748 return false;
749 }
750 fn.directoryIndex = readULEB(program);
751 // Skip over file size and last modified time
752 readULEB(program);
753 readULEB(program);
754 return true;
755}
756
757bool Dwarf::LineNumberVM::nextDefineFile(
758 folly::StringPiece& program,
759 FileName& fn) const {
760 while (!program.empty()) {
761 auto opcode = read<uint8_t>(program);
762
763 if (opcode >= opcodeBase_) { // special opcode
764 continue;
765 }
766
767 if (opcode != 0) { // standard opcode
768 // Skip, slurp the appropriate number of LEB arguments
769 uint8_t argCount = standardOpcodeLengths_[opcode - 1];
770 while (argCount--) {
771 readULEB(program);
772 }
773 continue;
774 }
775
776 // Extended opcode
777 auto length = readULEB(program);
778 // the opcode itself should be included in the length, so length >= 1
779 FOLLY_SAFE_CHECK(length != 0, "invalid extended opcode length");
780 read<uint8_t>(program); // extended opcode
781 --length;
782
783 if (opcode == DW_LNE_define_file) {
784 FOLLY_SAFE_CHECK(
785 readFileName(program, fn),
786 "invalid empty file in DW_LNE_define_file");
787 return true;
788 }
789
790 program.advance(length);
791 continue;
792 }
793
794 return false;
795}
796
797Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(
798 folly::StringPiece& program) {
799 auto opcode = read<uint8_t>(program);
800
801 if (opcode >= opcodeBase_) { // special opcode
802 uint8_t adjustedOpcode = opcode - opcodeBase_;
803 uint8_t opAdvance = adjustedOpcode / lineRange_;
804
805 address_ += minLength_ * opAdvance;
806 line_ += lineBase_ + adjustedOpcode % lineRange_;
807
808 basicBlock_ = false;
809 prologueEnd_ = false;
810 epilogueBegin_ = false;
811 discriminator_ = 0;
812 return COMMIT;
813 }
814
815 if (opcode != 0) { // standard opcode
816 // Only interpret opcodes that are recognized by the version we're parsing;
817 // the others are vendor extensions and we should ignore them.
818 switch (opcode) {
819 case DW_LNS_copy:
820 basicBlock_ = false;
821 prologueEnd_ = false;
822 epilogueBegin_ = false;
823 discriminator_ = 0;
824 return COMMIT;
825 case DW_LNS_advance_pc:
826 address_ += minLength_ * readULEB(program);
827 return CONTINUE;
828 case DW_LNS_advance_line:
829 line_ += readSLEB(program);
830 return CONTINUE;
831 case DW_LNS_set_file:
832 file_ = readULEB(program);
833 return CONTINUE;
834 case DW_LNS_set_column:
835 column_ = readULEB(program);
836 return CONTINUE;
837 case DW_LNS_negate_stmt:
838 isStmt_ = !isStmt_;
839 return CONTINUE;
840 case DW_LNS_set_basic_block:
841 basicBlock_ = true;
842 return CONTINUE;
843 case DW_LNS_const_add_pc:
844 address_ += minLength_ * ((255 - opcodeBase_) / lineRange_);
845 return CONTINUE;
846 case DW_LNS_fixed_advance_pc:
847 address_ += read<uint16_t>(program);
848 return CONTINUE;
849 case DW_LNS_set_prologue_end:
850 if (version_ == 2) {
851 break; // not supported in version 2
852 }
853 prologueEnd_ = true;
854 return CONTINUE;
855 case DW_LNS_set_epilogue_begin:
856 if (version_ == 2) {
857 break; // not supported in version 2
858 }
859 epilogueBegin_ = true;
860 return CONTINUE;
861 case DW_LNS_set_isa:
862 if (version_ == 2) {
863 break; // not supported in version 2
864 }
865 isa_ = readULEB(program);
866 return CONTINUE;
867 }
868
869 // Unrecognized standard opcode, slurp the appropriate number of LEB
870 // arguments.
871 uint8_t argCount = standardOpcodeLengths_[opcode - 1];
872 while (argCount--) {
873 readULEB(program);
874 }
875 return CONTINUE;
876 }
877
878 // Extended opcode
879 auto length = readULEB(program);
880 // the opcode itself should be included in the length, so length >= 1
881 FOLLY_SAFE_CHECK(length != 0, "invalid extended opcode length");
882 auto extendedOpcode = read<uint8_t>(program);
883 --length;
884
885 switch (extendedOpcode) {
886 case DW_LNE_end_sequence:
887 return END;
888 case DW_LNE_set_address:
889 address_ = read<uintptr_t>(program);
890 return CONTINUE;
891 case DW_LNE_define_file:
892 // We can't process DW_LNE_define_file here, as it would require us to
893 // use unbounded amounts of state (ie. use the heap). We'll do a second
894 // pass (using nextDefineFile()) if necessary.
895 break;
896 case DW_LNE_set_discriminator:
897 discriminator_ = readULEB(program);
898 return CONTINUE;
899 }
900
901 // Unrecognized extended opcode
902 program.advance(length);
903 return CONTINUE;
904}
905
906bool Dwarf::LineNumberVM::findAddress(
907 uintptr_t target,
908 Path& file,
909 uint64_t& line) {
910 folly::StringPiece program = data_;
911
912 // Within each sequence of instructions, the address may only increase.
913 // Unfortunately, within the same compilation unit, sequences may appear
914 // in any order. So any sequence is a candidate if it starts at an address
915 // <= the target address, and we know we've found the target address if
916 // a candidate crosses the target address.
917 enum State {
918 START,
919 LOW_SEQ, // candidate
920 HIGH_SEQ
921 };
922 State state = START;
923 reset();
924
925 uint64_t prevFile = 0;
926 uint64_t prevLine = 0;
927 while (!program.empty()) {
928 bool seqEnd = !next(program);
929
930 if (state == START) {
931 if (!seqEnd) {
932 state = address_ <= target ? LOW_SEQ : HIGH_SEQ;
933 }
934 }
935
936 if (state == LOW_SEQ) {
937 if (address_ > target) {
938 // Found it! Note that ">" is indeed correct (not ">="), as each
939 // sequence is guaranteed to have one entry past-the-end (emitted by
940 // DW_LNE_end_sequence)
941 if (prevFile == 0) {
942 return false;
943 }
944 auto fn = getFileName(prevFile);
945 file = Path(
946 compilationDirectory_,
947 getIncludeDirectory(fn.directoryIndex),
948 fn.relativeName);
949 line = prevLine;
950 return true;
951 }
952 prevFile = file_;
953 prevLine = line_;
954 }
955
956 if (seqEnd) {
957 state = START;
958 reset();
959 }
960 }
961
962 return false;
963}
964
965} // namespace symbolizer
966} // namespace folly
967