1#if defined(__ELF__) && !defined(__FreeBSD__)
2
3#include <Common/SymbolIndex.h>
4
5#include <algorithm>
6#include <optional>
7
8#include <link.h>
9
10//#include <iostream>
11#include <filesystem>
12
13/**
14
15ELF object can contain three different places with symbol names and addresses:
16
171. Symbol table in section headers. It is used for static linking and usually left in executable.
18It is not loaded in memory and they are not necessary for program to run.
19It does not relate to debug info and present regardless to -g flag.
20You can use strip to get rid of this symbol table.
21If you have this symbol table in your binary, you can manually read it and get symbol names, even for symbols from anonymous namespaces.
22
232. Hashes in program headers such as DT_HASH and DT_GNU_HASH.
24It is necessary for dynamic object (.so libraries and any dynamically linked executable that depend on .so libraries)
25because it is used for dynamic linking that happens in runtime and performed by dynamic loader.
26Only exported symbols will be presented in that hash tables. Symbols from anonymous namespaces are not.
27This part of executable binary is loaded in memory and accessible via 'dl_iterate_phdr', 'dladdr' and 'backtrace_symbols' functions from libc.
28ClickHouse versions prior to 19.13 has used just these symbol names to symbolize stack traces
29and stack traces may be incomplete due to lack of symbols with internal linkage.
30But because ClickHouse is linked with most of the symbols exported (-rdynamic flag) it can still provide good enough stack traces.
31
323. DWARF debug info. It contains the most detailed information about symbols and everything else.
33It allows to get source file names and line numbers from addresses. Only available if you use -g option for compiler.
34It is also used by default for ClickHouse builds, but because of its weight (about two gigabytes)
35it is splitted to separate binary and provided in clickhouse-common-static-dbg package.
36This separate binary is placed in /usr/lib/debug/usr/bin/clickhouse and is loaded automatically by tools like gdb, addr2line.
37When you build ClickHouse by yourself, debug info is not splitted and present in a single huge binary.
38
39What ClickHouse is using to provide good stack traces?
40
41In versions prior to 19.13, only "program headers" (2) was used.
42
43In version 19.13, ClickHouse will read program headers (2) and cache them,
44also it will read itself as ELF binary and extract symbol tables from section headers (1)
45to also symbolize functions that are not exported for dynamic linking.
46And finally, it will read DWARF info (3) if available to display file names and line numbers.
47
48What detail can you obtain depending on your binary?
49
50If you have debug info (you build ClickHouse by yourself or install clickhouse-common-static-dbg package), you will get source file names and line numbers.
51Otherwise you will get only symbol names. If your binary contains symbol table in section headers (the default, unless stripped), you will get all symbol names.
52Otherwise you will get only exported symbols from program headers.
53
54*/
55
56
57namespace DB
58{
59
60namespace
61{
62
63/// Notes: "PHDR" is "Program Headers".
64/// To look at program headers, run:
65/// readelf -l ./clickhouse-server
66/// To look at section headers, run:
67/// readelf -S ./clickhouse-server
68/// Also look at: https://wiki.osdev.org/ELF
69/// Also look at: man elf
70/// http://www.linker-aliens.org/blogs/ali/entry/inside_elf_symbol_tables/
71/// https://stackoverflow.com/questions/32088140/multiple-string-tables-in-elf-object
72
73
74/// Based on the code of musl-libc and the answer of Kanalpiroge on
75/// https://stackoverflow.com/questions/15779185/list-all-the-functions-symbols-on-the-fly-in-c-code-on-a-linux-architecture
76/// It does not extract all the symbols (but only public - exported and used for dynamic linking),
77/// but will work if we cannot find or parse ELF files.
78void collectSymbolsFromProgramHeaders(dl_phdr_info * info,
79 std::vector<SymbolIndex::Symbol> & symbols)
80{
81 /* Iterate over all headers of the current shared lib
82 * (first call is for the executable itself)
83 */
84 for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index)
85 {
86 /* Further processing is only needed if the dynamic section is reached
87 */
88 if (info->dlpi_phdr[header_index].p_type != PT_DYNAMIC)
89 continue;
90
91 /* Get a pointer to the first entry of the dynamic section.
92 * It's address is the shared lib's address + the virtual address
93 */
94 const ElfW(Dyn) * dyn_begin = reinterpret_cast<const ElfW(Dyn) *>(info->dlpi_addr + info->dlpi_phdr[header_index].p_vaddr);
95
96 /// For unknown reason, addresses are sometimes relative sometimes absolute.
97 auto correct_address = [](ElfW(Addr) base, ElfW(Addr) ptr)
98 {
99 return ptr > base ? ptr : base + ptr;
100 };
101
102 /* Iterate over all entries of the dynamic section until the
103 * end of the symbol table is reached. This is indicated by
104 * an entry with d_tag == DT_NULL.
105 */
106
107 size_t sym_cnt = 0;
108 for (auto it = dyn_begin; it->d_tag != DT_NULL; ++it)
109 {
110 // TODO: this branch leads to invalid address of the hash table. Need further investigation.
111 // if (it->d_tag == DT_HASH)
112 // {
113 // const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(correct_address(info->dlpi_addr, it->d_un.d_ptr));
114 // sym_cnt = hash[1];
115 // break;
116 // }
117 if (it->d_tag == DT_GNU_HASH)
118 {
119 /// This code based on Musl-libc.
120
121 const uint32_t * buckets = nullptr;
122 const uint32_t * hashval = nullptr;
123
124 const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(correct_address(info->dlpi_addr, it->d_un.d_ptr));
125
126 buckets = hash + 4 + (hash[2] * sizeof(size_t) / 4);
127
128 for (ElfW(Word) i = 0; i < hash[0]; ++i)
129 if (buckets[i] > sym_cnt)
130 sym_cnt = buckets[i];
131
132 if (sym_cnt)
133 {
134 sym_cnt -= hash[1];
135 hashval = buckets + hash[0] + sym_cnt;
136 do
137 {
138 ++sym_cnt;
139 }
140 while (!(*hashval++ & 1));
141 }
142
143 break;
144 }
145 }
146
147 if (!sym_cnt)
148 continue;
149
150 const char * strtab = nullptr;
151 for (auto it = dyn_begin; it->d_tag != DT_NULL; ++it)
152 {
153 if (it->d_tag == DT_STRTAB)
154 {
155 strtab = reinterpret_cast<const char *>(correct_address(info->dlpi_addr, it->d_un.d_ptr));
156 break;
157 }
158 }
159
160 if (!strtab)
161 continue;
162
163 for (auto it = dyn_begin; it->d_tag != DT_NULL; ++it)
164 {
165 if (it->d_tag == DT_SYMTAB)
166 {
167 /* Get the pointer to the first entry of the symbol table */
168 const ElfW(Sym) * elf_sym = reinterpret_cast<const ElfW(Sym) *>(correct_address(info->dlpi_addr, it->d_un.d_ptr));
169
170 /* Iterate over the symbol table */
171 for (ElfW(Word) sym_index = 0; sym_index < sym_cnt; ++sym_index)
172 {
173 /// We are not interested in empty symbols.
174 if (!elf_sym[sym_index].st_size)
175 continue;
176
177 /* Get the name of the sym_index-th symbol.
178 * This is located at the address of st_name relative to the beginning of the string table.
179 */
180 const char * sym_name = &strtab[elf_sym[sym_index].st_name];
181
182 if (!sym_name)
183 continue;
184
185 SymbolIndex::Symbol symbol;
186 symbol.address_begin = reinterpret_cast<const void *>(info->dlpi_addr + elf_sym[sym_index].st_value);
187 symbol.address_end = reinterpret_cast<const void *>(info->dlpi_addr + elf_sym[sym_index].st_value + elf_sym[sym_index].st_size);
188 symbol.name = sym_name;
189 symbols.push_back(std::move(symbol));
190 }
191
192 break;
193 }
194 }
195 }
196}
197
198
199void collectSymbolsFromELFSymbolTable(
200 dl_phdr_info * info,
201 const Elf & elf,
202 const Elf::Section & symbol_table,
203 const Elf::Section & string_table,
204 std::vector<SymbolIndex::Symbol> & symbols)
205{
206 /// Iterate symbol table.
207 const ElfSym * symbol_table_entry = reinterpret_cast<const ElfSym *>(symbol_table.begin());
208 const ElfSym * symbol_table_end = reinterpret_cast<const ElfSym *>(symbol_table.end());
209
210 const char * strings = string_table.begin();
211
212 for (; symbol_table_entry < symbol_table_end; ++symbol_table_entry)
213 {
214 if (!symbol_table_entry->st_name
215 || !symbol_table_entry->st_value
216 || !symbol_table_entry->st_size
217 || strings + symbol_table_entry->st_name >= elf.end())
218 continue;
219
220 /// Find the name in strings table.
221 const char * symbol_name = strings + symbol_table_entry->st_name;
222
223 if (!symbol_name)
224 continue;
225
226 SymbolIndex::Symbol symbol;
227 symbol.address_begin = reinterpret_cast<const void *>(info->dlpi_addr + symbol_table_entry->st_value);
228 symbol.address_end = reinterpret_cast<const void *>(info->dlpi_addr + symbol_table_entry->st_value + symbol_table_entry->st_size);
229 symbol.name = symbol_name;
230 symbols.push_back(std::move(symbol));
231 }
232}
233
234
235bool searchAndCollectSymbolsFromELFSymbolTable(
236 dl_phdr_info * info,
237 const Elf & elf,
238 unsigned section_header_type,
239 const char * string_table_name,
240 std::vector<SymbolIndex::Symbol> & symbols)
241{
242 std::optional<Elf::Section> symbol_table;
243 std::optional<Elf::Section> string_table;
244
245 if (!elf.iterateSections([&](const Elf::Section & section, size_t)
246 {
247 if (section.header.sh_type == section_header_type)
248 symbol_table.emplace(section);
249 else if (section.header.sh_type == SHT_STRTAB && 0 == strcmp(section.name(), string_table_name))
250 string_table.emplace(section);
251
252 if (symbol_table && string_table)
253 return true;
254 return false;
255 }))
256 {
257 return false;
258 }
259
260 collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols);
261 return true;
262}
263
264
265void collectSymbolsFromELF(dl_phdr_info * info,
266 std::vector<SymbolIndex::Symbol> & symbols,
267 std::vector<SymbolIndex::Object> & objects)
268{
269 std::string object_name = info->dlpi_name;
270
271 /// If the name is empty - it's main executable.
272 /// Find a elf file for the main executable.
273
274 if (object_name.empty())
275 object_name = "/proc/self/exe";
276
277 std::error_code ec;
278 std::filesystem::path canonical_path = std::filesystem::canonical(object_name, ec);
279
280 if (ec)
281 return;
282
283 /// Debug info and symbol table sections may be splitted to separate binary.
284 std::filesystem::path debug_info_path = std::filesystem::path("/usr/lib/debug") / canonical_path.relative_path();
285
286 object_name = std::filesystem::exists(debug_info_path) ? debug_info_path : canonical_path;
287
288 SymbolIndex::Object object;
289 object.elf = std::make_unique<Elf>(object_name);
290 object.address_begin = reinterpret_cast<const void *>(info->dlpi_addr);
291 object.address_end = reinterpret_cast<const void *>(info->dlpi_addr + object.elf->size());
292 object.name = object_name;
293 objects.push_back(std::move(object));
294
295 searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols);
296
297 /// Unneeded because they were parsed from "program headers" of loaded objects.
298 //searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols);
299}
300
301
302/* Callback for dl_iterate_phdr.
303 * Is called by dl_iterate_phdr for every loaded shared lib until something
304 * else than 0 is returned by one call of this function.
305 */
306int collectSymbols(dl_phdr_info * info, size_t, void * data_ptr)
307{
308 SymbolIndex::Data & data = *reinterpret_cast<SymbolIndex::Data *>(data_ptr);
309
310 collectSymbolsFromProgramHeaders(info, data.symbols);
311 collectSymbolsFromELF(info, data.symbols, data.objects);
312
313 /* Continue iterations */
314 return 0;
315}
316
317
318template <typename T>
319const T * find(const void * address, const std::vector<T> & vec)
320{
321 /// First range that has left boundary greater than address.
322
323 auto it = std::lower_bound(vec.begin(), vec.end(), address,
324 [](const T & symbol, const void * addr) { return symbol.address_begin <= addr; });
325
326 if (it == vec.begin())
327 return nullptr;
328 else
329 --it; /// Last range that has left boundary less or equals than address.
330
331 if (address >= it->address_begin && address < it->address_end)
332 return &*it;
333 else
334 return nullptr;
335}
336
337}
338
339
340void SymbolIndex::update()
341{
342 dl_iterate_phdr(collectSymbols, &data.symbols);
343
344 std::sort(data.objects.begin(), data.objects.end(), [](const Object & a, const Object & b) { return a.address_begin < b.address_begin; });
345 std::sort(data.symbols.begin(), data.symbols.end(), [](const Symbol & a, const Symbol & b) { return a.address_begin < b.address_begin; });
346
347 /// We found symbols both from loaded program headers and from ELF symbol tables.
348 data.symbols.erase(std::unique(data.symbols.begin(), data.symbols.end(), [](const Symbol & a, const Symbol & b)
349 {
350 return a.address_begin == b.address_begin && a.address_end == b.address_end;
351 }), data.symbols.end());
352}
353
354const SymbolIndex::Symbol * SymbolIndex::findSymbol(const void * address) const
355{
356 return find(address, data.symbols);
357}
358
359const SymbolIndex::Object * SymbolIndex::findObject(const void * address) const
360{
361 return find(address, data.objects);
362}
363
364SymbolIndex & SymbolIndex::instance()
365{
366 static SymbolIndex instance;
367 return instance;
368}
369
370}
371
372#endif
373