1 | /* |
2 | * Copyright 2012-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include <folly/experimental/symbolizer/Dwarf.h> |
18 | |
19 | #include <type_traits> |
20 | |
21 | // We can delete this #if check once we completely deprecate and remove |
22 | // the autoconf build. |
23 | #if __has_include(<libdwarf/dwarf.h>) |
24 | #include <libdwarf/dwarf.h> |
25 | #else |
26 | #include <dwarf.h> // @manual |
27 | #endif |
28 | |
29 | namespace folly { |
30 | namespace symbolizer { |
31 | |
32 | Dwarf::Dwarf(const ElfFile* elf) : elf_(elf) { |
33 | init(); |
34 | } |
35 | |
36 | Dwarf::Section::Section(folly::StringPiece d) : is64Bit_(false), data_(d) {} |
37 | |
38 | namespace { |
39 | |
40 | // All following read* functions read from a StringPiece, advancing the |
41 | // StringPiece, and aborting if there's not enough room. |
42 | |
43 | // Read (bitwise) one object of type T |
44 | template <class T> |
45 | typename std::enable_if<std::is_pod<T>::value, T>::type read( |
46 | folly::StringPiece& sp) { |
47 | FOLLY_SAFE_CHECK(sp.size() >= sizeof(T), "underflow" ); |
48 | T x; |
49 | memcpy(&x, sp.data(), sizeof(T)); |
50 | sp.advance(sizeof(T)); |
51 | return x; |
52 | } |
53 | |
54 | // Read ULEB (unsigned) varint value; algorithm from the DWARF spec |
55 | uint64_t readULEB(folly::StringPiece& sp, uint8_t& shift, uint8_t& val) { |
56 | uint64_t r = 0; |
57 | shift = 0; |
58 | do { |
59 | val = read<uint8_t>(sp); |
60 | r |= ((uint64_t)(val & 0x7f) << shift); |
61 | shift += 7; |
62 | } while (val & 0x80); |
63 | return r; |
64 | } |
65 | |
66 | uint64_t readULEB(folly::StringPiece& sp) { |
67 | uint8_t shift; |
68 | uint8_t val; |
69 | return readULEB(sp, shift, val); |
70 | } |
71 | |
72 | // Read SLEB (signed) varint value; algorithm from the DWARF spec |
73 | int64_t readSLEB(folly::StringPiece& sp) { |
74 | uint8_t shift; |
75 | uint8_t val; |
76 | uint64_t r = readULEB(sp, shift, val); |
77 | |
78 | if (shift < 64 && (val & 0x40)) { |
79 | r |= -(1ULL << shift); // sign extend |
80 | } |
81 | |
82 | return r; |
83 | } |
84 | |
85 | // Read a value of "section offset" type, which may be 4 or 8 bytes |
86 | uint64_t readOffset(folly::StringPiece& sp, bool is64Bit) { |
87 | return is64Bit ? read<uint64_t>(sp) : read<uint32_t>(sp); |
88 | } |
89 | |
90 | // Read "len" bytes |
91 | folly::StringPiece readBytes(folly::StringPiece& sp, uint64_t len) { |
92 | FOLLY_SAFE_CHECK(len >= sp.size(), "invalid string length" ); |
93 | folly::StringPiece ret(sp.data(), len); |
94 | sp.advance(len); |
95 | return ret; |
96 | } |
97 | |
98 | // Read a null-terminated string |
99 | folly::StringPiece readNullTerminated(folly::StringPiece& sp) { |
100 | const char* p = static_cast<const char*>(memchr(sp.data(), 0, sp.size())); |
101 | FOLLY_SAFE_CHECK(p, "invalid null-terminated string" ); |
102 | folly::StringPiece ret(sp.data(), p); |
103 | sp.assign(p + 1, sp.end()); |
104 | return ret; |
105 | } |
106 | |
107 | // Skip over padding until sp.data() - start is a multiple of alignment |
108 | void skipPadding(folly::StringPiece& sp, const char* start, size_t alignment) { |
109 | size_t remainder = (sp.data() - start) % alignment; |
110 | if (remainder) { |
111 | FOLLY_SAFE_CHECK(alignment - remainder <= sp.size(), "invalid padding" ); |
112 | sp.advance(alignment - remainder); |
113 | } |
114 | } |
115 | |
116 | // Simplify a path -- as much as we can while not moving data around... |
117 | void simplifyPath(folly::StringPiece& sp) { |
118 | // Strip leading slashes and useless patterns (./), leaving one initial |
119 | // slash. |
120 | for (;;) { |
121 | if (sp.empty()) { |
122 | return; |
123 | } |
124 | |
125 | // Strip leading slashes, leaving one. |
126 | while (sp.startsWith("//" )) { |
127 | sp.advance(1); |
128 | } |
129 | |
130 | if (sp.startsWith("/./" )) { |
131 | // Note 2, not 3, to keep it absolute |
132 | sp.advance(2); |
133 | continue; |
134 | } |
135 | |
136 | if (sp.removePrefix("./" )) { |
137 | // Also remove any subsequent slashes to avoid making this path absolute. |
138 | while (sp.startsWith('/')) { |
139 | sp.advance(1); |
140 | } |
141 | continue; |
142 | } |
143 | |
144 | break; |
145 | } |
146 | |
147 | // Strip trailing slashes and useless patterns (/.). |
148 | for (;;) { |
149 | if (sp.empty()) { |
150 | return; |
151 | } |
152 | |
153 | // Strip trailing slashes, except when this is the root path. |
154 | while (sp.size() > 1 && sp.removeSuffix('/')) { |
155 | } |
156 | |
157 | if (sp.removeSuffix("/." )) { |
158 | continue; |
159 | } |
160 | |
161 | break; |
162 | } |
163 | } |
164 | |
165 | } // namespace |
166 | |
167 | Dwarf::Path::Path( |
168 | folly::StringPiece baseDir, |
169 | folly::StringPiece subDir, |
170 | folly::StringPiece file) |
171 | : baseDir_(baseDir), subDir_(subDir), file_(file) { |
172 | using std::swap; |
173 | |
174 | // Normalize |
175 | if (file_.empty()) { |
176 | baseDir_.clear(); |
177 | subDir_.clear(); |
178 | return; |
179 | } |
180 | |
181 | if (file_[0] == '/') { |
182 | // file_ is absolute |
183 | baseDir_.clear(); |
184 | subDir_.clear(); |
185 | } |
186 | |
187 | if (!subDir_.empty() && subDir_[0] == '/') { |
188 | baseDir_.clear(); // subDir_ is absolute |
189 | } |
190 | |
191 | simplifyPath(baseDir_); |
192 | simplifyPath(subDir_); |
193 | simplifyPath(file_); |
194 | |
195 | // Make sure it's never the case that baseDir_ is empty, but subDir_ isn't. |
196 | if (baseDir_.empty()) { |
197 | swap(baseDir_, subDir_); |
198 | } |
199 | } |
200 | |
201 | size_t Dwarf::Path::size() const { |
202 | size_t size = 0; |
203 | bool needsSlash = false; |
204 | |
205 | if (!baseDir_.empty()) { |
206 | size += baseDir_.size(); |
207 | needsSlash = !baseDir_.endsWith('/'); |
208 | } |
209 | |
210 | if (!subDir_.empty()) { |
211 | size += needsSlash; |
212 | size += subDir_.size(); |
213 | needsSlash = !subDir_.endsWith('/'); |
214 | } |
215 | |
216 | if (!file_.empty()) { |
217 | size += needsSlash; |
218 | size += file_.size(); |
219 | } |
220 | |
221 | return size; |
222 | } |
223 | |
224 | size_t Dwarf::Path::toBuffer(char* buf, size_t bufSize) const { |
225 | size_t totalSize = 0; |
226 | bool needsSlash = false; |
227 | |
228 | auto append = [&](folly::StringPiece sp) { |
229 | if (bufSize >= 2) { |
230 | size_t toCopy = std::min(sp.size(), bufSize - 1); |
231 | memcpy(buf, sp.data(), toCopy); |
232 | buf += toCopy; |
233 | bufSize -= toCopy; |
234 | } |
235 | totalSize += sp.size(); |
236 | }; |
237 | |
238 | if (!baseDir_.empty()) { |
239 | append(baseDir_); |
240 | needsSlash = !baseDir_.endsWith('/'); |
241 | } |
242 | if (!subDir_.empty()) { |
243 | if (needsSlash) { |
244 | append("/" ); |
245 | } |
246 | append(subDir_); |
247 | needsSlash = !subDir_.endsWith('/'); |
248 | } |
249 | if (!file_.empty()) { |
250 | if (needsSlash) { |
251 | append("/" ); |
252 | } |
253 | append(file_); |
254 | } |
255 | if (bufSize) { |
256 | *buf = '\0'; |
257 | } |
258 | assert(totalSize == size()); |
259 | return totalSize; |
260 | } |
261 | |
262 | void Dwarf::Path::toString(std::string& dest) const { |
263 | size_t initialSize = dest.size(); |
264 | dest.reserve(initialSize + size()); |
265 | if (!baseDir_.empty()) { |
266 | dest.append(baseDir_.begin(), baseDir_.end()); |
267 | } |
268 | if (!subDir_.empty()) { |
269 | if (!dest.empty() && dest.back() != '/') { |
270 | dest.push_back('/'); |
271 | } |
272 | dest.append(subDir_.begin(), subDir_.end()); |
273 | } |
274 | if (!file_.empty()) { |
275 | if (!dest.empty() && dest.back() != '/') { |
276 | dest.push_back('/'); |
277 | } |
278 | dest.append(file_.begin(), file_.end()); |
279 | } |
280 | assert(dest.size() == initialSize + size()); |
281 | } |
282 | |
283 | // Next chunk in section |
284 | bool Dwarf::Section::next(folly::StringPiece& chunk) { |
285 | chunk = data_; |
286 | if (chunk.empty()) { |
287 | return false; |
288 | } |
289 | |
290 | // Initial length is a uint32_t value for a 32-bit section, and |
291 | // a 96-bit value (0xffffffff followed by the 64-bit length) for a 64-bit |
292 | // section. |
293 | auto initialLength = read<uint32_t>(chunk); |
294 | is64Bit_ = (initialLength == (uint32_t)-1); |
295 | auto length = is64Bit_ ? read<uint64_t>(chunk) : initialLength; |
296 | FOLLY_SAFE_CHECK(length <= chunk.size(), "invalid DWARF section" ); |
297 | chunk.reset(chunk.data(), length); |
298 | data_.assign(chunk.end(), data_.end()); |
299 | return true; |
300 | } |
301 | |
302 | bool Dwarf::getSection(const char* name, folly::StringPiece* section) const { |
303 | const ElfW(Shdr)* elfSection = elf_->getSectionByName(name); |
304 | if (!elfSection) { |
305 | return false; |
306 | } |
307 | #ifdef SHF_COMPRESSED |
308 | if (elfSection->sh_flags & SHF_COMPRESSED) { |
309 | return false; |
310 | } |
311 | #endif |
312 | *section = elf_->getSectionBody(*elfSection); |
313 | return true; |
314 | } |
315 | |
316 | void Dwarf::init() { |
317 | // Make sure that all .debug_* sections exist |
318 | if (!getSection(".debug_info" , &info_) || |
319 | !getSection(".debug_abbrev" , &abbrev_) || |
320 | !getSection(".debug_line" , &line_) || |
321 | !getSection(".debug_str" , &strings_)) { |
322 | elf_ = nullptr; |
323 | return; |
324 | } |
325 | |
326 | // Optional: fast address range lookup. If missing .debug_info can |
327 | // be used - but it's much slower (linear scan). |
328 | getSection(".debug_aranges" , &aranges_); |
329 | } |
330 | |
331 | bool Dwarf::readAbbreviation( |
332 | folly::StringPiece& section, |
333 | DIEAbbreviation& abbr) { |
334 | // abbreviation code |
335 | abbr.code = readULEB(section); |
336 | if (abbr.code == 0) { |
337 | return false; |
338 | } |
339 | |
340 | // abbreviation tag |
341 | abbr.tag = readULEB(section); |
342 | |
343 | // does this entry have children? |
344 | abbr.hasChildren = (read<uint8_t>(section) != DW_CHILDREN_no); |
345 | |
346 | // attributes |
347 | const char* attributeBegin = section.data(); |
348 | for (;;) { |
349 | FOLLY_SAFE_CHECK(!section.empty(), "invalid attribute section" ); |
350 | auto attr = readAttribute(section); |
351 | if (attr.name == 0 && attr.form == 0) { |
352 | break; |
353 | } |
354 | } |
355 | |
356 | abbr.attributes.assign(attributeBegin, section.data()); |
357 | return true; |
358 | } |
359 | |
360 | Dwarf::DIEAbbreviation::Attribute Dwarf::readAttribute(folly::StringPiece& sp) { |
361 | return {readULEB(sp), readULEB(sp)}; |
362 | } |
363 | |
364 | Dwarf::DIEAbbreviation Dwarf::getAbbreviation(uint64_t code, uint64_t offset) |
365 | const { |
366 | // Linear search in the .debug_abbrev section, starting at offset |
367 | folly::StringPiece section = abbrev_; |
368 | section.advance(offset); |
369 | |
370 | Dwarf::DIEAbbreviation abbr; |
371 | while (readAbbreviation(section, abbr)) { |
372 | if (abbr.code == code) { |
373 | return abbr; |
374 | } |
375 | } |
376 | |
377 | FOLLY_SAFE_CHECK(false, "could not find abbreviation code" ); |
378 | } |
379 | |
380 | Dwarf::AttributeValue Dwarf::readAttributeValue( |
381 | folly::StringPiece& sp, |
382 | uint64_t form, |
383 | bool is64Bit) const { |
384 | switch (form) { |
385 | case DW_FORM_addr: |
386 | return read<uintptr_t>(sp); |
387 | case DW_FORM_block1: |
388 | return readBytes(sp, read<uint8_t>(sp)); |
389 | case DW_FORM_block2: |
390 | return readBytes(sp, read<uint16_t>(sp)); |
391 | case DW_FORM_block4: |
392 | return readBytes(sp, read<uint32_t>(sp)); |
393 | case DW_FORM_block: // fallthrough |
394 | case DW_FORM_exprloc: |
395 | return readBytes(sp, readULEB(sp)); |
396 | case DW_FORM_data1: // fallthrough |
397 | case DW_FORM_ref1: |
398 | return read<uint8_t>(sp); |
399 | case DW_FORM_data2: // fallthrough |
400 | case DW_FORM_ref2: |
401 | return read<uint16_t>(sp); |
402 | case DW_FORM_data4: // fallthrough |
403 | case DW_FORM_ref4: |
404 | return read<uint32_t>(sp); |
405 | case DW_FORM_data8: // fallthrough |
406 | case DW_FORM_ref8: |
407 | return read<uint64_t>(sp); |
408 | case DW_FORM_sdata: |
409 | return readSLEB(sp); |
410 | case DW_FORM_udata: // fallthrough |
411 | case DW_FORM_ref_udata: |
412 | return readULEB(sp); |
413 | case DW_FORM_flag: |
414 | return read<uint8_t>(sp); |
415 | case DW_FORM_flag_present: |
416 | return 1; |
417 | case DW_FORM_sec_offset: // fallthrough |
418 | case DW_FORM_ref_addr: |
419 | return readOffset(sp, is64Bit); |
420 | case DW_FORM_string: |
421 | return readNullTerminated(sp); |
422 | case DW_FORM_strp: |
423 | return getStringFromStringSection(readOffset(sp, is64Bit)); |
424 | case DW_FORM_indirect: // form is explicitly specified |
425 | return readAttributeValue(sp, readULEB(sp), is64Bit); |
426 | default: |
427 | FOLLY_SAFE_CHECK(false, "invalid attribute form" ); |
428 | } |
429 | } |
430 | |
431 | folly::StringPiece Dwarf::getStringFromStringSection(uint64_t offset) const { |
432 | FOLLY_SAFE_CHECK(offset < strings_.size(), "invalid strp offset" ); |
433 | folly::StringPiece sp(strings_); |
434 | sp.advance(offset); |
435 | return readNullTerminated(sp); |
436 | } |
437 | |
438 | /** |
439 | * Find @address in .debug_aranges and return the offset in |
440 | * .debug_info for compilation unit to which this address belongs. |
441 | */ |
442 | bool Dwarf::findDebugInfoOffset( |
443 | uintptr_t address, |
444 | StringPiece aranges, |
445 | uint64_t& offset) { |
446 | Section arangesSection(aranges); |
447 | folly::StringPiece chunk; |
448 | while (arangesSection.next(chunk)) { |
449 | auto version = read<uint16_t>(chunk); |
450 | FOLLY_SAFE_CHECK(version == 2, "invalid aranges version" ); |
451 | |
452 | offset = readOffset(chunk, arangesSection.is64Bit()); |
453 | auto addressSize = read<uint8_t>(chunk); |
454 | FOLLY_SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size" ); |
455 | auto segmentSize = read<uint8_t>(chunk); |
456 | FOLLY_SAFE_CHECK(segmentSize == 0, "segmented architecture not supported" ); |
457 | |
458 | // Padded to a multiple of 2 addresses. |
459 | // Strangely enough, this is the only place in the DWARF spec that requires |
460 | // padding. |
461 | skipPadding(chunk, aranges.data(), 2 * sizeof(uintptr_t)); |
462 | for (;;) { |
463 | auto start = read<uintptr_t>(chunk); |
464 | auto length = read<uintptr_t>(chunk); |
465 | |
466 | if (start == 0 && length == 0) { |
467 | break; |
468 | } |
469 | |
470 | // Is our address in this range? |
471 | if (address >= start && address < start + length) { |
472 | return true; |
473 | } |
474 | } |
475 | } |
476 | return false; |
477 | } |
478 | |
479 | /** |
480 | * Find the @locationInfo for @address in the compilation unit represented |
481 | * by the @sp .debug_info entry. |
482 | * Returns whether the address was found. |
483 | * Advances @sp to the next entry in .debug_info. |
484 | */ |
485 | bool Dwarf::findLocation( |
486 | uintptr_t address, |
487 | StringPiece& infoEntry, |
488 | LocationInfo& locationInfo) const { |
489 | // For each compilation unit compiled with a DWARF producer, a |
490 | // contribution is made to the .debug_info section of the object |
491 | // file. Each such contribution consists of a compilation unit |
492 | // header (see Section 7.5.1.1) followed by a single |
493 | // DW_TAG_compile_unit or DW_TAG_partial_unit debugging information |
494 | // entry, together with its children. |
495 | |
496 | // 7.5.1.1 Compilation Unit Header |
497 | // 1. unit_length (4B or 12B): read by Section::next |
498 | // 2. version (2B) |
499 | // 3. debug_abbrev_offset (4B or 8B): offset into the .debug_abbrev section |
500 | // 4. address_size (1B) |
501 | |
502 | Section debugInfoSection(infoEntry); |
503 | folly::StringPiece chunk; |
504 | FOLLY_SAFE_CHECK(debugInfoSection.next(chunk), "invalid debug info" ); |
505 | |
506 | auto version = read<uint16_t>(chunk); |
507 | FOLLY_SAFE_CHECK(version >= 2 && version <= 4, "invalid info version" ); |
508 | uint64_t abbrevOffset = readOffset(chunk, debugInfoSection.is64Bit()); |
509 | auto addressSize = read<uint8_t>(chunk); |
510 | FOLLY_SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size" ); |
511 | |
512 | // We survived so far. The first (and only) DIE should be DW_TAG_compile_unit |
513 | // NOTE: - binutils <= 2.25 does not issue DW_TAG_partial_unit. |
514 | // - dwarf compression tools like `dwz` may generate it. |
515 | // TODO(tudorb): Handle DW_TAG_partial_unit? |
516 | auto code = readULEB(chunk); |
517 | FOLLY_SAFE_CHECK(code != 0, "invalid code" ); |
518 | auto abbr = getAbbreviation(code, abbrevOffset); |
519 | FOLLY_SAFE_CHECK( |
520 | abbr.tag == DW_TAG_compile_unit, "expecting compile unit entry" ); |
521 | // Skip children entries, advance to the next compilation unit entry. |
522 | infoEntry.advance(chunk.end() - infoEntry.begin()); |
523 | |
524 | // Read attributes, extracting the few we care about |
525 | bool foundLineOffset = false; |
526 | uint64_t lineOffset = 0; |
527 | folly::StringPiece compilationDirectory; |
528 | folly::StringPiece mainFileName; |
529 | |
530 | DIEAbbreviation::Attribute attr; |
531 | folly::StringPiece attributes = abbr.attributes; |
532 | for (;;) { |
533 | attr = readAttribute(attributes); |
534 | if (attr.name == 0 && attr.form == 0) { |
535 | break; |
536 | } |
537 | auto val = readAttributeValue(chunk, attr.form, debugInfoSection.is64Bit()); |
538 | switch (attr.name) { |
539 | case DW_AT_stmt_list: |
540 | // Offset in .debug_line for the line number VM program for this |
541 | // compilation unit |
542 | lineOffset = boost::get<uint64_t>(val); |
543 | foundLineOffset = true; |
544 | break; |
545 | case DW_AT_comp_dir: |
546 | // Compilation directory |
547 | compilationDirectory = boost::get<folly::StringPiece>(val); |
548 | break; |
549 | case DW_AT_name: |
550 | // File name of main file being compiled |
551 | mainFileName = boost::get<folly::StringPiece>(val); |
552 | break; |
553 | } |
554 | } |
555 | |
556 | if (!mainFileName.empty()) { |
557 | locationInfo.hasMainFile = true; |
558 | locationInfo.mainFile = Path(compilationDirectory, "" , mainFileName); |
559 | } |
560 | |
561 | if (!foundLineOffset) { |
562 | return false; |
563 | } |
564 | |
565 | folly::StringPiece lineSection(line_); |
566 | lineSection.advance(lineOffset); |
567 | LineNumberVM lineVM(lineSection, compilationDirectory); |
568 | |
569 | // Execute line number VM program to find file and line |
570 | locationInfo.hasFileAndLine = |
571 | lineVM.findAddress(address, locationInfo.file, locationInfo.line); |
572 | return locationInfo.hasFileAndLine; |
573 | } |
574 | |
575 | bool Dwarf::findAddress( |
576 | uintptr_t address, |
577 | LocationInfo& locationInfo, |
578 | LocationInfoMode mode) const { |
579 | locationInfo = LocationInfo(); |
580 | |
581 | if (mode == LocationInfoMode::DISABLED) { |
582 | return false; |
583 | } |
584 | |
585 | if (!elf_) { // No file. |
586 | return false; |
587 | } |
588 | |
589 | if (!aranges_.empty()) { |
590 | // Fast path: find the right .debug_info entry by looking up the |
591 | // address in .debug_aranges. |
592 | uint64_t offset = 0; |
593 | if (findDebugInfoOffset(address, aranges_, offset)) { |
594 | // Read compilation unit header from .debug_info |
595 | folly::StringPiece infoEntry(info_); |
596 | infoEntry.advance(offset); |
597 | findLocation(address, infoEntry, locationInfo); |
598 | return locationInfo.hasFileAndLine; |
599 | } else if (mode == LocationInfoMode::FAST) { |
600 | // NOTE: Clang (when using -gdwarf-aranges) doesn't generate entries |
601 | // in .debug_aranges for some functions, but always generates |
602 | // .debug_info entries. Scanning .debug_info is slow, so fall back to |
603 | // it only if such behavior is requested via LocationInfoMode. |
604 | return false; |
605 | } else { |
606 | DCHECK(mode == LocationInfoMode::FULL); |
607 | // Fall back to the linear scan. |
608 | } |
609 | } |
610 | |
611 | // Slow path (linear scan): Iterate over all .debug_info entries |
612 | // and look for the address in each compilation unit. |
613 | folly::StringPiece infoEntry(info_); |
614 | while (!infoEntry.empty() && !locationInfo.hasFileAndLine) { |
615 | findLocation(address, infoEntry, locationInfo); |
616 | } |
617 | return locationInfo.hasFileAndLine; |
618 | } |
619 | |
620 | Dwarf::LineNumberVM::LineNumberVM( |
621 | folly::StringPiece data, |
622 | folly::StringPiece compilationDirectory) |
623 | : compilationDirectory_(compilationDirectory) { |
624 | Section section(data); |
625 | FOLLY_SAFE_CHECK(section.next(data_), "invalid line number VM" ); |
626 | is64Bit_ = section.is64Bit(); |
627 | init(); |
628 | reset(); |
629 | } |
630 | |
631 | void Dwarf::LineNumberVM::reset() { |
632 | address_ = 0; |
633 | file_ = 1; |
634 | line_ = 1; |
635 | column_ = 0; |
636 | isStmt_ = defaultIsStmt_; |
637 | basicBlock_ = false; |
638 | endSequence_ = false; |
639 | prologueEnd_ = false; |
640 | epilogueBegin_ = false; |
641 | isa_ = 0; |
642 | discriminator_ = 0; |
643 | } |
644 | |
645 | void Dwarf::LineNumberVM::init() { |
646 | version_ = read<uint16_t>(data_); |
647 | FOLLY_SAFE_CHECK( |
648 | version_ >= 2 && version_ <= 4, "invalid version in line number VM" ); |
649 | uint64_t = readOffset(data_, is64Bit_); |
650 | FOLLY_SAFE_CHECK( |
651 | headerLength <= data_.size(), "invalid line number VM header length" ); |
652 | folly::StringPiece (data_.data(), headerLength); |
653 | data_.assign(header.end(), data_.end()); |
654 | |
655 | minLength_ = read<uint8_t>(header); |
656 | if (version_ == 4) { // Version 2 and 3 records don't have this |
657 | uint8_t maxOpsPerInstruction = read<uint8_t>(header); |
658 | FOLLY_SAFE_CHECK(maxOpsPerInstruction == 1, "VLIW not supported" ); |
659 | } |
660 | defaultIsStmt_ = read<uint8_t>(header); |
661 | lineBase_ = read<int8_t>(header); // yes, signed |
662 | lineRange_ = read<uint8_t>(header); |
663 | opcodeBase_ = read<uint8_t>(header); |
664 | FOLLY_SAFE_CHECK(opcodeBase_ != 0, "invalid opcode base" ); |
665 | standardOpcodeLengths_ = reinterpret_cast<const uint8_t*>(header.data()); |
666 | header.advance(opcodeBase_ - 1); |
667 | |
668 | // We don't want to use heap, so we don't keep an unbounded amount of state. |
669 | // We'll just skip over include directories and file names here, and |
670 | // we'll loop again when we actually need to retrieve one. |
671 | folly::StringPiece sp; |
672 | const char* tmp = header.data(); |
673 | includeDirectoryCount_ = 0; |
674 | while (!(sp = readNullTerminated(header)).empty()) { |
675 | ++includeDirectoryCount_; |
676 | } |
677 | includeDirectories_.assign(tmp, header.data()); |
678 | |
679 | tmp = header.data(); |
680 | FileName fn; |
681 | fileNameCount_ = 0; |
682 | while (readFileName(header, fn)) { |
683 | ++fileNameCount_; |
684 | } |
685 | fileNames_.assign(tmp, header.data()); |
686 | } |
687 | |
688 | bool Dwarf::LineNumberVM::next(folly::StringPiece& program) { |
689 | Dwarf::LineNumberVM::StepResult ret; |
690 | do { |
691 | ret = step(program); |
692 | } while (ret == CONTINUE); |
693 | |
694 | return (ret == COMMIT); |
695 | } |
696 | |
697 | Dwarf::LineNumberVM::FileName Dwarf::LineNumberVM::getFileName( |
698 | uint64_t index) const { |
699 | FOLLY_SAFE_CHECK(index != 0, "invalid file index 0" ); |
700 | |
701 | FileName fn; |
702 | if (index <= fileNameCount_) { |
703 | folly::StringPiece fileNames = fileNames_; |
704 | for (; index; --index) { |
705 | if (!readFileName(fileNames, fn)) { |
706 | abort(); |
707 | } |
708 | } |
709 | return fn; |
710 | } |
711 | |
712 | index -= fileNameCount_; |
713 | |
714 | folly::StringPiece program = data_; |
715 | for (; index; --index) { |
716 | FOLLY_SAFE_CHECK(nextDefineFile(program, fn), "invalid file index" ); |
717 | } |
718 | |
719 | return fn; |
720 | } |
721 | |
722 | folly::StringPiece Dwarf::LineNumberVM::getIncludeDirectory( |
723 | uint64_t index) const { |
724 | if (index == 0) { |
725 | return folly::StringPiece(); |
726 | } |
727 | |
728 | FOLLY_SAFE_CHECK( |
729 | index <= includeDirectoryCount_, "invalid include directory" ); |
730 | |
731 | folly::StringPiece includeDirectories = includeDirectories_; |
732 | folly::StringPiece dir; |
733 | for (; index; --index) { |
734 | dir = readNullTerminated(includeDirectories); |
735 | if (dir.empty()) { |
736 | abort(); // BUG |
737 | } |
738 | } |
739 | |
740 | return dir; |
741 | } |
742 | |
743 | bool Dwarf::LineNumberVM::readFileName( |
744 | folly::StringPiece& program, |
745 | FileName& fn) { |
746 | fn.relativeName = readNullTerminated(program); |
747 | if (fn.relativeName.empty()) { |
748 | return false; |
749 | } |
750 | fn.directoryIndex = readULEB(program); |
751 | // Skip over file size and last modified time |
752 | readULEB(program); |
753 | readULEB(program); |
754 | return true; |
755 | } |
756 | |
757 | bool Dwarf::LineNumberVM::nextDefineFile( |
758 | folly::StringPiece& program, |
759 | FileName& fn) const { |
760 | while (!program.empty()) { |
761 | auto opcode = read<uint8_t>(program); |
762 | |
763 | if (opcode >= opcodeBase_) { // special opcode |
764 | continue; |
765 | } |
766 | |
767 | if (opcode != 0) { // standard opcode |
768 | // Skip, slurp the appropriate number of LEB arguments |
769 | uint8_t argCount = standardOpcodeLengths_[opcode - 1]; |
770 | while (argCount--) { |
771 | readULEB(program); |
772 | } |
773 | continue; |
774 | } |
775 | |
776 | // Extended opcode |
777 | auto length = readULEB(program); |
778 | // the opcode itself should be included in the length, so length >= 1 |
779 | FOLLY_SAFE_CHECK(length != 0, "invalid extended opcode length" ); |
780 | read<uint8_t>(program); // extended opcode |
781 | --length; |
782 | |
783 | if (opcode == DW_LNE_define_file) { |
784 | FOLLY_SAFE_CHECK( |
785 | readFileName(program, fn), |
786 | "invalid empty file in DW_LNE_define_file" ); |
787 | return true; |
788 | } |
789 | |
790 | program.advance(length); |
791 | continue; |
792 | } |
793 | |
794 | return false; |
795 | } |
796 | |
797 | Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step( |
798 | folly::StringPiece& program) { |
799 | auto opcode = read<uint8_t>(program); |
800 | |
801 | if (opcode >= opcodeBase_) { // special opcode |
802 | uint8_t adjustedOpcode = opcode - opcodeBase_; |
803 | uint8_t opAdvance = adjustedOpcode / lineRange_; |
804 | |
805 | address_ += minLength_ * opAdvance; |
806 | line_ += lineBase_ + adjustedOpcode % lineRange_; |
807 | |
808 | basicBlock_ = false; |
809 | prologueEnd_ = false; |
810 | epilogueBegin_ = false; |
811 | discriminator_ = 0; |
812 | return COMMIT; |
813 | } |
814 | |
815 | if (opcode != 0) { // standard opcode |
816 | // Only interpret opcodes that are recognized by the version we're parsing; |
817 | // the others are vendor extensions and we should ignore them. |
818 | switch (opcode) { |
819 | case DW_LNS_copy: |
820 | basicBlock_ = false; |
821 | prologueEnd_ = false; |
822 | epilogueBegin_ = false; |
823 | discriminator_ = 0; |
824 | return COMMIT; |
825 | case DW_LNS_advance_pc: |
826 | address_ += minLength_ * readULEB(program); |
827 | return CONTINUE; |
828 | case DW_LNS_advance_line: |
829 | line_ += readSLEB(program); |
830 | return CONTINUE; |
831 | case DW_LNS_set_file: |
832 | file_ = readULEB(program); |
833 | return CONTINUE; |
834 | case DW_LNS_set_column: |
835 | column_ = readULEB(program); |
836 | return CONTINUE; |
837 | case DW_LNS_negate_stmt: |
838 | isStmt_ = !isStmt_; |
839 | return CONTINUE; |
840 | case DW_LNS_set_basic_block: |
841 | basicBlock_ = true; |
842 | return CONTINUE; |
843 | case DW_LNS_const_add_pc: |
844 | address_ += minLength_ * ((255 - opcodeBase_) / lineRange_); |
845 | return CONTINUE; |
846 | case DW_LNS_fixed_advance_pc: |
847 | address_ += read<uint16_t>(program); |
848 | return CONTINUE; |
849 | case DW_LNS_set_prologue_end: |
850 | if (version_ == 2) { |
851 | break; // not supported in version 2 |
852 | } |
853 | prologueEnd_ = true; |
854 | return CONTINUE; |
855 | case DW_LNS_set_epilogue_begin: |
856 | if (version_ == 2) { |
857 | break; // not supported in version 2 |
858 | } |
859 | epilogueBegin_ = true; |
860 | return CONTINUE; |
861 | case DW_LNS_set_isa: |
862 | if (version_ == 2) { |
863 | break; // not supported in version 2 |
864 | } |
865 | isa_ = readULEB(program); |
866 | return CONTINUE; |
867 | } |
868 | |
869 | // Unrecognized standard opcode, slurp the appropriate number of LEB |
870 | // arguments. |
871 | uint8_t argCount = standardOpcodeLengths_[opcode - 1]; |
872 | while (argCount--) { |
873 | readULEB(program); |
874 | } |
875 | return CONTINUE; |
876 | } |
877 | |
878 | // Extended opcode |
879 | auto length = readULEB(program); |
880 | // the opcode itself should be included in the length, so length >= 1 |
881 | FOLLY_SAFE_CHECK(length != 0, "invalid extended opcode length" ); |
882 | auto extendedOpcode = read<uint8_t>(program); |
883 | --length; |
884 | |
885 | switch (extendedOpcode) { |
886 | case DW_LNE_end_sequence: |
887 | return END; |
888 | case DW_LNE_set_address: |
889 | address_ = read<uintptr_t>(program); |
890 | return CONTINUE; |
891 | case DW_LNE_define_file: |
892 | // We can't process DW_LNE_define_file here, as it would require us to |
893 | // use unbounded amounts of state (ie. use the heap). We'll do a second |
894 | // pass (using nextDefineFile()) if necessary. |
895 | break; |
896 | case DW_LNE_set_discriminator: |
897 | discriminator_ = readULEB(program); |
898 | return CONTINUE; |
899 | } |
900 | |
901 | // Unrecognized extended opcode |
902 | program.advance(length); |
903 | return CONTINUE; |
904 | } |
905 | |
906 | bool Dwarf::LineNumberVM::findAddress( |
907 | uintptr_t target, |
908 | Path& file, |
909 | uint64_t& line) { |
910 | folly::StringPiece program = data_; |
911 | |
912 | // Within each sequence of instructions, the address may only increase. |
913 | // Unfortunately, within the same compilation unit, sequences may appear |
914 | // in any order. So any sequence is a candidate if it starts at an address |
915 | // <= the target address, and we know we've found the target address if |
916 | // a candidate crosses the target address. |
917 | enum State { |
918 | START, |
919 | LOW_SEQ, // candidate |
920 | HIGH_SEQ |
921 | }; |
922 | State state = START; |
923 | reset(); |
924 | |
925 | uint64_t prevFile = 0; |
926 | uint64_t prevLine = 0; |
927 | while (!program.empty()) { |
928 | bool seqEnd = !next(program); |
929 | |
930 | if (state == START) { |
931 | if (!seqEnd) { |
932 | state = address_ <= target ? LOW_SEQ : HIGH_SEQ; |
933 | } |
934 | } |
935 | |
936 | if (state == LOW_SEQ) { |
937 | if (address_ > target) { |
938 | // Found it! Note that ">" is indeed correct (not ">="), as each |
939 | // sequence is guaranteed to have one entry past-the-end (emitted by |
940 | // DW_LNE_end_sequence) |
941 | if (prevFile == 0) { |
942 | return false; |
943 | } |
944 | auto fn = getFileName(prevFile); |
945 | file = Path( |
946 | compilationDirectory_, |
947 | getIncludeDirectory(fn.directoryIndex), |
948 | fn.relativeName); |
949 | line = prevLine; |
950 | return true; |
951 | } |
952 | prevFile = file_; |
953 | prevLine = line_; |
954 | } |
955 | |
956 | if (seqEnd) { |
957 | state = START; |
958 | reset(); |
959 | } |
960 | } |
961 | |
962 | return false; |
963 | } |
964 | |
965 | } // namespace symbolizer |
966 | } // namespace folly |
967 | |