Dwarf.cpp source code [ClickHouse/dbms/src/Common/Dwarf.cpp]

1	#if defined(__ELF__) && !defined(__FreeBSD__)
2
3	/*
4	* Copyright 2012-present Facebook, Inc.
5	*
6	* Licensed under the Apache License, Version 2.0 (the "License");
7	* you may not use this file except in compliance with the License.
8	* You may obtain a copy of the License at
9	*
10	* http://www.apache.org/licenses/LICENSE-2.0
11	*
12	* Unless required by applicable law or agreed to in writing, software
13	* distributed under the License is distributed on an "AS IS" BASIS,
14	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15	* See the License for the specific language governing permissions and
16	* limitations under the License.
17	*/
18
19	/* This file was edited for ClickHouse.*
20	*/
21
22	#include <optional>
23
24	#include <string.h>
25
26	#include <Common/Elf.h>
27	#include <Common/Dwarf.h>
28	#include <Common/Exception.h>
29
30
31	#define DW_CHILDREN_no 0
32	#define DW_FORM_addr 1
33	#define DW_FORM_block1 0x0a
34	#define DW_FORM_block2 3
35	#define DW_FORM_block4 4
36	#define DW_FORM_block 9
37	#define DW_FORM_exprloc 0x18
38	#define DW_FORM_data1 0x0b
39	#define DW_FORM_ref1 0x11
40	#define DW_FORM_data2 0x05
41	#define DW_FORM_ref2 0x12
42	#define DW_FORM_data4 0x06
43	#define DW_FORM_ref4 0x13
44	#define DW_FORM_data8 0x07
45	#define DW_FORM_ref8 0x14
46	#define DW_FORM_sdata 0x0d
47	#define DW_FORM_udata 0x0f
48	#define DW_FORM_ref_udata 0x15
49	#define DW_FORM_flag 0x0c
50	#define DW_FORM_flag_present 0x19
51	#define DW_FORM_sec_offset 0x17
52	#define DW_FORM_ref_addr 0x10
53	#define DW_FORM_string 0x08
54	#define DW_FORM_strp 0x0e
55	#define DW_FORM_indirect 0x16
56	#define DW_TAG_compile_unit 0x11
57	#define DW_AT_stmt_list 0x10
58	#define DW_AT_comp_dir 0x1b
59	#define DW_AT_name 0x03
60	#define DW_LNE_define_file 0x03
61	#define DW_LNS_copy 0x01
62	#define DW_LNS_advance_pc 0x02
63	#define DW_LNS_advance_line 0x03
64	#define DW_LNS_set_file 0x04
65	#define DW_LNS_set_column 0x05
66	#define DW_LNS_negate_stmt 0x06
67	#define DW_LNS_set_basic_block 0x07
68	#define DW_LNS_const_add_pc 0x08
69	#define DW_LNS_fixed_advance_pc 0x09
70	#define DW_LNS_set_prologue_end 0x0a
71	#define DW_LNS_set_epilogue_begin 0x0b
72	#define DW_LNS_set_isa 0x0c
73	#define DW_LNE_end_sequence 0x01
74	#define DW_LNE_set_address 0x02
75	#define DW_LNE_set_discriminator 0x04
76
77
78	namespace DB
79	{
80
81	namespace ErrorCodes
82	{
83	extern const int CANNOT_PARSE_DWARF;
84	}
85
86
87	Dwarf::Dwarf(const Elf & elf) : elf_(&elf)
88	{
89	init();
90	}
91
92	Dwarf::Section::Section(std::string_view d) : is64Bit_(false), data_(d)
93	{
94	}
95
96
97	#define SAFE_CHECK(cond, message) do { if (!(cond)) throw Exception (message, ErrorCodes::CANNOT_PARSE_DWARF); } while (false)
98
99
100	namespace
101	{
102	// All following read functions read from a std::string_view, advancing the*
103	// std::string_view, and aborting if there's not enough room.
104
105	// Read (bitwise) one object of type T
106	template <typename T>
107	std::enable_if_t<std::is_pod_v<T>, T> read(std::string_view & sp)
108	{
109	SAFE_CHECK(sp.size() >= sizeof(T), "underflow");
110	T x;
111	memcpy(&x, sp.data(), sizeof(T));
112	sp.remove_prefix(sizeof(T));
113	return x;
114	}
115
116	// Read ULEB (unsigned) varint value; algorithm from the DWARF spec
117	uint64_t readULEB(std::string_view & sp, uint8_t & shift, uint8_t & val)
118	{
119	uint64_t r = `0`;
120	shift = `0`;
121	do
122	{
123	val = read<uint8_t>(sp);
124	r \|= (uint64_t(val & `0x7f`) << shift);
125	shift += `7`;
126	} while (val & `0x80`);
127	return r;
128	}
129
130	uint64_t readULEB(std::string_view & sp)
131	{
132	uint8_t shift;
133	uint8_t val;
134	return readULEB(sp, shift, val);
135	}
136
137	// Read SLEB (signed) varint value; algorithm from the DWARF spec
138	int64_t readSLEB(std::string_view & sp)
139	{
140	uint8_t shift;
141	uint8_t val;
142	uint64_t r = readULEB(sp, shift, val);
143
144	if (shift < `64` && (val & `0x40`))
145	{
146	r \|= -(`1ULL` << shift); // sign extend
147	}
148
149	return r;
150	}
151
152	// Read a value of "section offset" type, which may be 4 or 8 bytes
153	uint64_t readOffset(std::string_view & sp, bool is64Bit)
154	{
155	return is64Bit ? read<uint64_t>(sp) : read<uint32_t>(sp);
156	}
157
158	// Read "len" bytes
159	std::string_view readBytes(std::string_view & sp, uint64_t len)
160	{
161	SAFE_CHECK(len >= sp.size(), "invalid string length");
162	std::string_view ret(sp.data(), len);
163	sp.remove_prefix(len);
164	return ret;
165	}
166
167	// Read a null-terminated string
168	std::string_view readNullTerminated(std::string_view & sp)
169	{
170	const char * p = static_cast<const char *>(memchr(sp.data(), `0`, sp.size()));
171	SAFE_CHECK(p, "invalid null-terminated string");
172	std::string_view ret(sp.data(), p - sp.data());
173	sp = std::string_view (p + `1`, sp.size());
174	return ret;
175	}
176
177	// Skip over padding until sp.data() - start is a multiple of alignment
178	void skipPadding(std::string_view & sp, const char * start, size_t alignment)
179	{
180	size_t remainder = (sp.data() - start) % alignment;
181	if (remainder)
182	{
183	SAFE_CHECK(alignment - remainder <= sp.size(), "invalid padding");
184	sp.remove_prefix(alignment - remainder);
185	}
186	}
187
188	}
189
190
191	Dwarf::Path::Path(std::string_view baseDir, std::string_view subDir, std::string_view file)
192	: baseDir_(baseDir), subDir_(subDir), file_(file)
193	{
194	using std::swap;
195
196	// Normalize
197	if (file_.empty())
198	{
199	baseDir_ = {};
200	subDir_ = {};
201	return;
202	}
203
204	if (file_[`0`] == `'/'`)
205	{
206	// file_ is absolute
207	baseDir_ = {};
208	subDir_ = {};
209	}
210
211	if (!subDir_.empty() && subDir_[`0`] == `'/'`)
212	{
213	baseDir_ = {}; // subDir_ is absolute
214	}
215
216	// Make sure it's never the case that baseDir_ is empty, but subDir_ isn't.
217	if (baseDir_.empty())
218	{
219	swap(baseDir_, subDir_);
220	}
221	}
222
223	size_t Dwarf::Path::size() const
224	{
225	size_t size = `0`;
226	bool needsSlash = false;
227
228	if (!baseDir_.empty())
229	{
230	size += baseDir_.size();
231	needsSlash = baseDir_.back() != `'/'`;
232	}
233
234	if (!subDir_.empty())
235	{
236	size += needsSlash;
237	size += subDir_.size();
238	needsSlash = subDir_.back() != `'/'`;
239	}
240
241	if (!file_.empty())
242	{
243	size += needsSlash;
244	size += file_.size();
245	}
246
247	return size;
248	}
249
250	size_t Dwarf::Path::toBuffer(char * buf, size_t bufSize) const
251	{
252	size_t totalSize = `0`;
253	bool needsSlash = false;
254
255	auto append = [&](std::string_view sp)
256	{
257	if (bufSize >= `2`)
258	{
259	size_t toCopy = std::min(sp.size(), bufSize - `1`);
260	memcpy(buf, sp.data(), toCopy);
261	buf += toCopy;
262	bufSize -= toCopy;
263	}
264	totalSize += sp.size();
265	};
266
267	if (!baseDir_.empty())
268	{
269	append (baseDir_);
270	needsSlash = baseDir_.back() != `'/'`;
271	}
272	if (!subDir_.empty())
273	{
274	if (needsSlash)
275	{
276	append ("/");
277	}
278	append (subDir_);
279	needsSlash = subDir_.back() != `'/'`;
280	}
281	if (!file_.empty())
282	{
283	if (needsSlash)
284	{
285	append ("/");
286	}
287	append (file_);
288	}
289	if (bufSize)
290	{
291	*buf = `'\0'`;
292	}
293
294	SAFE_CHECK(totalSize == size(), "Size mismatch");
295	return totalSize;
296	}
297
298	void Dwarf::Path::toString(std::string & dest) const
299	{
300	size_t initialSize = dest.size();
301	dest.reserve(initialSize + size());
302	if (!baseDir_.empty())
303	{
304	dest.append(baseDir_.begin(), baseDir_.end());
305	}
306	if (!subDir_.empty())
307	{
308	if (!dest.empty() && dest.back() != `'/'`)
309	{
310	dest.push_back(`'/'`);
311	}
312	dest.append(subDir_.begin(), subDir_.end());
313	}
314	if (!file_.empty())
315	{
316	if (!dest.empty() && dest.back() != `'/'`)
317	{
318	dest.push_back(`'/'`);
319	}
320	dest.append(file_.begin(), file_.end());
321	}
322	SAFE_CHECK(dest.size() == initialSize + size(), "Size mismatch");
323	}
324
325	// Next chunk in section
326	bool Dwarf::Section::next(std::string_view & chunk)
327	{
328	chunk = data_;
329	if (chunk.empty())
330	return false;
331
332	// Initial length is a uint32_t value for a 32-bit section, and
333	// a 96-bit value (0xffffffff followed by the 64-bit length) for a 64-bit
334	// section.
335	auto initialLength = read<uint32_t>(chunk);
336	is64Bit_ = (initialLength == uint32_t(-`1`));
337	auto length = is64Bit_ ? read<uint64_t>(chunk) : initialLength;
338	SAFE_CHECK(length <= chunk.size(), "invalid DWARF section");
339	chunk = std::string_view (chunk.data(), length);
340	data_ = std::string_view (chunk.end(), data_.end() - chunk.end());
341	return true;
342	}
343
344	bool Dwarf::getSection(const char * name, std::string_view * section) const
345	{
346	std::optional<Elf::Section> elf_section = elf_->findSectionByName(name);
347	if (!elf_section)
348	return false;
349
350	#ifdef SHF_COMPRESSED
351	if (elf_section ->header.sh_flags & SHF_COMPRESSED)
352	return false;
353	#endif
354
355	*section = { elf_section ->begin(), elf_section ->size()};
356	return true;
357	}
358
359	void Dwarf::init()
360	{
361	// Make sure that all .debug_ sections exist*
362	if (!getSection(".debug_info", &info_)
363	\|\| !getSection(".debug_abbrev", &abbrev_)
364	\|\| !getSection(".debug_line", &line_)
365	\|\| !getSection(".debug_str", &strings_))
366	{
367	elf_ = nullptr;
368	return;
369	}
370
371	// Optional: fast address range lookup. If missing .debug_info can
372	// be used - but it's much slower (linear scan).
373	getSection(".debug_aranges", &aranges_);
374	}
375
376	bool Dwarf::readAbbreviation(std::string_view & section, DIEAbbreviation & abbr)
377	{
378	// abbreviation code
379	abbr.code = readULEB(section);
380	if (abbr.code == `0`)
381	return false;
382
383	// abbreviation tag
384	abbr.tag = readULEB(section);
385
386	// does this entry have children?
387	abbr.hasChildren = (read<uint8_t>(section) != DW_CHILDREN_no);
388
389	// attributes
390	const char * attributeBegin = section.data();
391	for (;;)
392	{
393	SAFE_CHECK(!section.empty(), "invalid attribute section");
394	auto attr = readAttribute(section);
395	if (attr.name == `0` && attr.form == `0`)
396	break;
397	}
398
399	abbr.attributes = std::string_view (attributeBegin, section.data() - attributeBegin);
400	return true;
401	}
402
403	Dwarf::DIEAbbreviation::Attribute Dwarf::readAttribute(std::string_view & sp)
404	{
405	return {readULEB(sp), readULEB(sp)};
406	}
407
408	Dwarf::DIEAbbreviation Dwarf::getAbbreviation(uint64_t code, uint64_t offset) const
409	{
410	// Linear search in the .debug_abbrev section, starting at offset
411	std::string_view section = abbrev_;
412	section.remove_prefix(offset);
413
414	Dwarf::DIEAbbreviation abbr;
415	while (readAbbreviation(section, abbr))
416	if (abbr.code == code)
417	return abbr;
418
419	SAFE_CHECK(false, "could not find abbreviation code");
420	}
421
422	Dwarf::AttributeValue Dwarf::readAttributeValue(std::string_view & sp, uint64_t form, bool is64Bit) const
423	{
424	switch (form)
425	{
426	case DW_FORM_addr:
427	return uint64_t(read<uintptr_t>(sp));
428	case DW_FORM_block1:
429	return readBytes(sp, read<uint8_t>(sp));
430	case DW_FORM_block2:
431	return readBytes(sp, read<uint16_t>(sp));
432	case DW_FORM_block4:
433	return readBytes(sp, read<uint32_t>(sp));
434	case DW_FORM_block: [[fallthrough]];
435	case DW_FORM_exprloc:
436	return readBytes(sp, readULEB(sp));
437	case DW_FORM_data1: [[fallthrough]];
438	case DW_FORM_ref1:
439	return uint64_t(read<uint8_t>(sp));
440	case DW_FORM_data2: [[fallthrough]];
441	case DW_FORM_ref2:
442	return uint64_t(read<uint16_t>(sp));
443	case DW_FORM_data4: [[fallthrough]];
444	case DW_FORM_ref4:
445	return uint64_t(read<uint32_t>(sp));
446	case DW_FORM_data8: [[fallthrough]];
447	case DW_FORM_ref8:
448	return read<uint64_t>(sp);
449	case DW_FORM_sdata:
450	return uint64_t(readSLEB(sp));
451	case DW_FORM_udata: [[fallthrough]];
452	case DW_FORM_ref_udata:
453	return readULEB(sp);
454	case DW_FORM_flag:
455	return uint64_t(read<uint8_t>(sp));
456	case DW_FORM_flag_present:
457	return uint64_t(`1`);
458	case DW_FORM_sec_offset: [[fallthrough]];
459	case DW_FORM_ref_addr:
460	return readOffset(sp, is64Bit);
461	case DW_FORM_string:
462	return readNullTerminated(sp);
463	case DW_FORM_strp:
464	return getStringFromStringSection(readOffset(sp, is64Bit));
465	case DW_FORM_indirect: // form is explicitly specified
466	return readAttributeValue(sp, readULEB(sp), is64Bit);
467	default:
468	SAFE_CHECK(false, "invalid attribute form");
469	}
470	}
471
472	std::string_view Dwarf::getStringFromStringSection(uint64_t offset) const
473	{
474	SAFE_CHECK(offset < strings_.size(), "invalid strp offset");
475	std::string_view sp(strings_);
476	sp.remove_prefix(offset);
477	return readNullTerminated(sp);
478	}
479
480	/**
481	* Find @address in .debug_aranges and return the offset in
482	* .debug_info for compilation unit to which this address belongs.
483	*/
484	bool Dwarf::findDebugInfoOffset(uintptr_t address, std::string_view aranges, uint64_t & offset)
485	{
486	Section arangesSection(aranges);
487	std::string_view chunk;
488	while (arangesSection.next(chunk))
489	{
490	auto version = read<uint16_t>(chunk);
491	SAFE_CHECK(version == `2`, "invalid aranges version");
492
493	offset = readOffset(chunk, arangesSection.is64Bit());
494	auto addressSize = read<uint8_t>(chunk);
495	SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size");
496	auto segmentSize = read<uint8_t>(chunk);
497	SAFE_CHECK(segmentSize == `0`, "segmented architecture not supported");
498
499	// Padded to a multiple of 2 addresses.
500	// Strangely enough, this is the only place in the DWARF spec that requires
501	// padding.
502	skipPadding(chunk, aranges.data(), `2` * sizeof(uintptr_t));
503	for (;;)
504	{
505	auto start = read<uintptr_t>(chunk);
506	auto length = read<uintptr_t>(chunk);
507
508	if (start == `0` && length == `0`)
509	break;
510
511	// Is our address in this range?
512	if (address >= start && address < start + length)
513	return true;
514	}
515	}
516	return false;
517	}
518
519	/**
520	* Find the @locationInfo for @address in the compilation unit represented
521	* by the @sp .debug_info entry.
522	* Returns whether the address was found.
523	* Advances @sp to the next entry in .debug_info.
524	*/
525	bool Dwarf::findLocation(uintptr_t address, std::string_view & infoEntry, LocationInfo & locationInfo) const
526	{
527	// For each compilation unit compiled with a DWARF producer, a
528	// contribution is made to the .debug_info section of the object
529	// file. Each such contribution consists of a compilation unit
530	// header (see Section 7.5.1.1) followed by a single
531	// DW_TAG_compile_unit or DW_TAG_partial_unit debugging information
532	// entry, together with its children.
533
534	// 7.5.1.1 Compilation Unit Header
535	// 1. unit_length (4B or 12B): read by Section::next
536	// 2. version (2B)
537	// 3. debug_abbrev_offset (4B or 8B): offset into the .debug_abbrev section
538	// 4. address_size (1B)
539
540	Section debugInfoSection(infoEntry);
541	std::string_view chunk;
542	SAFE_CHECK(debugInfoSection.next(chunk), "invalid debug info");
543
544	auto version = read<uint16_t>(chunk);
545	SAFE_CHECK(version >= `2` && version <= `4`, "invalid info version");
546	uint64_t abbrevOffset = readOffset(chunk, debugInfoSection.is64Bit());
547	auto addressSize = read<uint8_t>(chunk);
548	SAFE_CHECK(addressSize == sizeof(uintptr_t), "invalid address size");
549
550	// We survived so far. The first (and only) DIE should be DW_TAG_compile_unit
551	// NOTE: - binutils <= 2.25 does not issue DW_TAG_partial_unit.
552	// - dwarf compression tools like `dwz` may generate it.
553	// TODO(tudorb): Handle DW_TAG_partial_unit?
554	auto code = readULEB(chunk);
555	SAFE_CHECK(code != `0`, "invalid code");
556	auto abbr = getAbbreviation(code, abbrevOffset);
557	SAFE_CHECK(abbr.tag == DW_TAG_compile_unit, "expecting compile unit entry");
558	// Skip children entries, remove_prefix to the next compilation unit entry.
559	infoEntry.remove_prefix(chunk.end() - infoEntry.begin());
560
561	// Read attributes, extracting the few we care about
562	bool foundLineOffset = false;
563	uint64_t lineOffset = `0`;
564	std::string_view compilationDirectory;
565	std::string_view mainFileName;
566
567	DIEAbbreviation::Attribute attr;
568	std::string_view attributes = abbr.attributes;
569	for (;;)
570	{
571	attr = readAttribute(attributes);
572	if (attr.name == `0` && attr.form == `0`)
573	{
574	break;
575	}
576	auto val = readAttributeValue(chunk, attr.form, debugInfoSection.is64Bit());
577	switch (attr.name)
578	{
579	case DW_AT_stmt_list:
580	// Offset in .debug_line for the line number VM program for this
581	// compilation unit
582	lineOffset = std::get<uint64_t>(val);
583	foundLineOffset = true;
584	break;
585	case DW_AT_comp_dir:
586	// Compilation directory
587	compilationDirectory = std::get<std::string_view>(val);
588	break;
589	case DW_AT_name:
590	// File name of main file being compiled
591	mainFileName = std::get<std::string_view>(val);
592	break;
593	}
594	}
595
596	if (!mainFileName.empty())
597	{
598	locationInfo.hasMainFile = true;
599	locationInfo.mainFile = Path (compilationDirectory, "", mainFileName);
600	}
601
602	if (!foundLineOffset)
603	{
604	return false;
605	}
606
607	std::string_view lineSection(line_);
608	lineSection.remove_prefix(lineOffset);
609	LineNumberVM lineVM(lineSection, compilationDirectory);
610
611	// Execute line number VM program to find file and line
612	locationInfo.hasFileAndLine = lineVM.findAddress(address, locationInfo.file, locationInfo.line);
613	return locationInfo.hasFileAndLine;
614	}
615
616	bool Dwarf::findAddress(uintptr_t address, LocationInfo & locationInfo, LocationInfoMode mode) const
617	{
618	locationInfo = LocationInfo ();
619
620	if (mode == LocationInfoMode::DISABLED)
621	{
622	return false;
623	}
624
625	if (!elf_)
626	{ // No file.
627	return false;
628	}
629
630	if (!aranges_.empty())
631	{
632	// Fast path: find the right .debug_info entry by looking up the
633	// address in .debug_aranges.
634	uint64_t offset = `0`;
635	if (findDebugInfoOffset(address, aranges_, offset))
636	{
637	// Read compilation unit header from .debug_info
638	std::string_view infoEntry(info_);
639	infoEntry.remove_prefix(offset);
640	findLocation(address, infoEntry, locationInfo);
641	return locationInfo.hasFileAndLine;
642	}
643	else if (mode == LocationInfoMode::FAST)
644	{
645	// NOTE: Clang (when using -gdwarf-aranges) doesn't generate entries
646	// in .debug_aranges for some functions, but always generates
647	// .debug_info entries. Scanning .debug_info is slow, so fall back to
648	// it only if such behavior is requested via LocationInfoMode.
649	return false;
650	}
651	else
652	{
653	SAFE_CHECK(mode == LocationInfoMode::FULL, "unexpected mode");
654	// Fall back to the linear scan.
655	}
656	}
657
658	// Slow path (linear scan): Iterate over all .debug_info entries
659	// and look for the address in each compilation unit.
660	std::string_view infoEntry(info_);
661	while (!infoEntry.empty() && !locationInfo.hasFileAndLine)
662	findLocation(address, infoEntry, locationInfo);
663
664	return locationInfo.hasFileAndLine;
665	}
666
667	Dwarf::LineNumberVM::LineNumberVM(std::string_view data, std::string_view compilationDirectory)
668	: compilationDirectory_(compilationDirectory)
669	{
670	Section section(data);
671	SAFE_CHECK(section.next(data_), "invalid line number VM");
672	is64Bit_ = section.is64Bit();
673	init();
674	reset();
675	}
676
677	void Dwarf::LineNumberVM::reset()
678	{
679	address_ = `0`;
680	file_ = `1`;
681	line_ = `1`;
682	column_ = `0`;
683	isStmt_ = defaultIsStmt_;
684	basicBlock_ = false;
685	endSequence_ = false;
686	prologueEnd_ = false;
687	epilogueBegin_ = false;
688	isa_ = `0`;
689	discriminator_ = `0`;
690	}
691
692	void Dwarf::LineNumberVM::init()
693	{
694	version_ = read<uint16_t>(data_);
695	SAFE_CHECK(version_ >= `2` && version_ <= `4`, "invalid version in line number VM");
696	uint64_t headerLength = readOffset(data_, is64Bit_);
697	SAFE_CHECK(headerLength <= data_.size(), "invalid line number VM header length");
698	std::string_view header(data_.data(), headerLength);
699	data_ = std::string_view (header.end(), data_.end() - header.end());
700
701	minLength_ = read<uint8_t>(header);
702	if (version_ == `4`)
703	{ // Version 2 and 3 records don't have this
704	uint8_t maxOpsPerInstruction = read<uint8_t>(header);
705	SAFE_CHECK(maxOpsPerInstruction == `1`, "VLIW not supported");
706	}
707	defaultIsStmt_ = read<uint8_t>(header);
708	lineBase_ = read<int8_t>(header); // yes, signed
709	lineRange_ = read<uint8_t>(header);
710	opcodeBase_ = read<uint8_t>(header);
711	SAFE_CHECK(opcodeBase_ != `0`, "invalid opcode base");
712	standardOpcodeLengths_ = reinterpret_cast<const uint8_t >(header.data()); //-V506*
713	header.remove_prefix(opcodeBase_ - `1`);
714
715	// We don't want to use heap, so we don't keep an unbounded amount of state.
716	// We'll just skip over include directories and file names here, and
717	// we'll loop again when we actually need to retrieve one.
718	std::string_view sp;
719	const char * tmp = header.data();
720	includeDirectoryCount_ = `0`;
721	while (!(sp = readNullTerminated(header)).empty())
722	{
723	++includeDirectoryCount_;
724	}
725	includeDirectories_ = std::string_view (tmp, header.data() - tmp);
726
727	tmp = header.data();
728	FileName fn;
729	fileNameCount_ = `0`;
730	while (readFileName(header, fn))
731	{
732	++fileNameCount_;
733	}
734	fileNames_ = std::string_view (tmp, header.data() - tmp);
735	}
736
737	bool Dwarf::LineNumberVM::next(std::string_view & program)
738	{
739	Dwarf::LineNumberVM::StepResult ret;
740	do
741	{
742	ret = step(program);
743	} while (ret == CONTINUE);
744
745	return (ret == COMMIT);
746	}
747
748	Dwarf::LineNumberVM::FileName Dwarf::LineNumberVM::getFileName(uint64_t index) const
749	{
750	SAFE_CHECK(index != `0`, "invalid file index 0");
751
752	FileName fn;
753	if (index <= fileNameCount_)
754	{
755	std::string_view fileNames = fileNames_;
756	for (; index; --index)
757	{
758	if (!readFileName(fileNames, fn))
759	{
760	abort();
761	}
762	}
763	return fn;
764	}
765
766	index -= fileNameCount_;
767
768	std::string_view program = data_;
769	for (; index; --index)
770	{
771	SAFE_CHECK(nextDefineFile(program, fn), "invalid file index");
772	}
773
774	return fn;
775	}
776
777	std::string_view Dwarf::LineNumberVM::getIncludeDirectory(uint64_t index) const
778	{
779	if (index == `0`)
780	{
781	return std::string_view ();
782	}
783
784	SAFE_CHECK(index <= includeDirectoryCount_, "invalid include directory");
785
786	std::string_view includeDirectories = includeDirectories_;
787	std::string_view dir;
788	for (; index; --index)
789	{
790	dir = readNullTerminated(includeDirectories);
791	if (dir.empty())
792	{
793	abort(); // BUG
794	}
795	}
796
797	return dir;
798	}
799
800	bool Dwarf::LineNumberVM::readFileName(std::string_view & program, FileName & fn)
801	{
802	fn.relativeName = readNullTerminated(program);
803	if (fn.relativeName.empty())
804	{
805	return false;
806	}
807	fn.directoryIndex = readULEB(program);
808	// Skip over file size and last modified time
809	readULEB(program);
810	readULEB(program);
811	return true;
812	}
813
814	bool Dwarf::LineNumberVM::nextDefineFile(std::string_view & program, FileName & fn) const
815	{
816	while (!program.empty())
817	{
818	auto opcode = read<uint8_t>(program);
819
820	if (opcode >= opcodeBase_)
821	{ // special opcode
822	continue;
823	}
824
825	if (opcode != `0`)
826	{ // standard opcode
827	// Skip, slurp the appropriate number of LEB arguments
828	uint8_t argCount = standardOpcodeLengths_[opcode - `1`];
829	while (argCount--)
830	{
831	readULEB(program);
832	}
833	continue;
834	}
835
836	// Extended opcode
837	auto length = readULEB(program);
838	// the opcode itself should be included in the length, so length >= 1
839	SAFE_CHECK(length != `0`, "invalid extended opcode length");
840	read<uint8_t>(program); // extended opcode
841	--length;
842
843	if (opcode == DW_LNE_define_file)
844	{
845	SAFE_CHECK(readFileName(program, fn), "invalid empty file in DW_LNE_define_file");
846	return true;
847	}
848
849	program.remove_prefix(length);
850	continue;
851	}
852
853	return false;
854	}
855
856	Dwarf::LineNumberVM::StepResult Dwarf::LineNumberVM::step(std::string_view & program)
857	{
858	auto opcode = read<uint8_t>(program);
859
860	if (opcode >= opcodeBase_)
861	{ // special opcode
862	uint8_t adjustedOpcode = opcode - opcodeBase_;
863	uint8_t opAdvance = adjustedOpcode / lineRange_;
864
865	address_ += minLength_ * opAdvance;
866	line_ += lineBase_ + adjustedOpcode % lineRange_;
867
868	basicBlock_ = false;
869	prologueEnd_ = false;
870	epilogueBegin_ = false;
871	discriminator_ = `0`;
872	return COMMIT;
873	}
874
875	if (opcode != `0`)
876	{ // standard opcode
877	// Only interpret opcodes that are recognized by the version we're parsing;
878	// the others are vendor extensions and we should ignore them.
879	switch (opcode)
880	{
881	case DW_LNS_copy:
882	basicBlock_ = false;
883	prologueEnd_ = false;
884	epilogueBegin_ = false;
885	discriminator_ = `0`;
886	return COMMIT;
887	case DW_LNS_advance_pc:
888	address_ += minLength_ * readULEB(program);
889	return CONTINUE;
890	case DW_LNS_advance_line:
891	line_ += readSLEB(program);
892	return CONTINUE;
893	case DW_LNS_set_file:
894	file_ = readULEB(program);
895	return CONTINUE;
896	case DW_LNS_set_column:
897	column_ = readULEB(program);
898	return CONTINUE;
899	case DW_LNS_negate_stmt:
900	isStmt_ = !isStmt_;
901	return CONTINUE;
902	case DW_LNS_set_basic_block:
903	basicBlock_ = true;
904	return CONTINUE;
905	case DW_LNS_const_add_pc:
906	address_ += minLength_ * ((`255` - opcodeBase_) / lineRange_);
907	return CONTINUE;
908	case DW_LNS_fixed_advance_pc:
909	address_ += read<uint16_t>(program);
910	return CONTINUE;
911	case DW_LNS_set_prologue_end:
912	if (version_ == `2`)
913	{
914	break; // not supported in version 2
915	}
916	prologueEnd_ = true;
917	return CONTINUE;
918	case DW_LNS_set_epilogue_begin:
919	if (version_ == `2`)
920	{
921	break; // not supported in version 2
922	}
923	epilogueBegin_ = true;
924	return CONTINUE;
925	case DW_LNS_set_isa:
926	if (version_ == `2`)
927	{
928	break; // not supported in version 2
929	}
930	isa_ = readULEB(program);
931	return CONTINUE;
932	}
933
934	// Unrecognized standard opcode, slurp the appropriate number of LEB
935	// arguments.
936	uint8_t argCount = standardOpcodeLengths_[opcode - `1`];
937	while (argCount--)
938	{
939	readULEB(program);
940	}
941	return CONTINUE;
942	}
943
944	// Extended opcode
945	auto length = readULEB(program);
946	// the opcode itself should be included in the length, so length >= 1
947	SAFE_CHECK(length != `0`, "invalid extended opcode length");
948	auto extendedOpcode = read<uint8_t>(program);
949	--length;
950
951	switch (extendedOpcode)
952	{
953	case DW_LNE_end_sequence:
954	return END;
955	case DW_LNE_set_address:
956	address_ = read<uintptr_t>(program);
957	return CONTINUE;
958	case DW_LNE_define_file:
959	// We can't process DW_LNE_define_file here, as it would require us to
960	// use unbounded amounts of state (ie. use the heap). We'll do a second
961	// pass (using nextDefineFile()) if necessary.
962	break;
963	case DW_LNE_set_discriminator:
964	discriminator_ = readULEB(program);
965	return CONTINUE;
966	}
967
968	// Unrecognized extended opcode
969	program.remove_prefix(length);
970	return CONTINUE;
971	}
972
973	bool Dwarf::LineNumberVM::findAddress(uintptr_t target, Path & file, uint64_t & line)
974	{
975	std::string_view program = data_;
976
977	// Within each sequence of instructions, the address may only increase.
978	// Unfortunately, within the same compilation unit, sequences may appear
979	// in any order. So any sequence is a candidate if it starts at an address
980	// <= the target address, and we know we've found the target address if
981	// a candidate crosses the target address.
982	enum State
983	{
984	START,
985	LOW_SEQ, // candidate
986	HIGH_SEQ
987	};
988	State state = START;
989	reset();
990
991	uint64_t prevFile = `0`;
992	uint64_t prevLine = `0`;
993	while (!program.empty())
994	{
995	bool seqEnd = !next(program);
996
997	if (state == START)
998	{
999	if (!seqEnd)
1000	{
1001	state = address_ <= target ? LOW_SEQ : HIGH_SEQ;
1002	}
1003	}
1004
1005	if (state == LOW_SEQ)
1006	{
1007	if (address_ > target)
1008	{
1009	// Found it! Note that ">" is indeed correct (not ">="), as each
1010	// sequence is guaranteed to have one entry past-the-end (emitted by
1011	// DW_LNE_end_sequence)
1012	if (prevFile == `0`)
1013	{
1014	return false;
1015	}
1016	auto fn = getFileName(prevFile);
1017	file = Path (compilationDirectory_, getIncludeDirectory(fn.directoryIndex), fn.relativeName);
1018	line = prevLine;
1019	return true;
1020	}
1021	prevFile = file_;
1022	prevLine = line_;
1023	}
1024
1025	if (seqEnd)
1026	{
1027	state = START;
1028	reset();
1029	}
1030	}
1031
1032	return false;
1033	}
1034
1035	}
1036
1037	#endif
1038

Browse the source code of ClickHouse/dbms/src/Common/Dwarf.cpp