1 | #if defined(__ELF__) && !defined(__FreeBSD__) |
2 | |
3 | #include <Common/SymbolIndex.h> |
4 | |
5 | #include <algorithm> |
6 | #include <optional> |
7 | |
8 | #include <link.h> |
9 | |
10 | //#include <iostream> |
11 | #include <filesystem> |
12 | |
13 | /** |
14 | |
15 | ELF object can contain three different places with symbol names and addresses: |
16 | |
17 | 1. Symbol table in section headers. It is used for static linking and usually left in executable. |
18 | It is not loaded in memory and they are not necessary for program to run. |
19 | It does not relate to debug info and present regardless to -g flag. |
20 | You can use strip to get rid of this symbol table. |
21 | If you have this symbol table in your binary, you can manually read it and get symbol names, even for symbols from anonymous namespaces. |
22 | |
23 | 2. Hashes in program headers such as DT_HASH and DT_GNU_HASH. |
24 | It is necessary for dynamic object (.so libraries and any dynamically linked executable that depend on .so libraries) |
25 | because it is used for dynamic linking that happens in runtime and performed by dynamic loader. |
26 | Only exported symbols will be presented in that hash tables. Symbols from anonymous namespaces are not. |
27 | This part of executable binary is loaded in memory and accessible via 'dl_iterate_phdr', 'dladdr' and 'backtrace_symbols' functions from libc. |
28 | ClickHouse versions prior to 19.13 has used just these symbol names to symbolize stack traces |
29 | and stack traces may be incomplete due to lack of symbols with internal linkage. |
30 | But because ClickHouse is linked with most of the symbols exported (-rdynamic flag) it can still provide good enough stack traces. |
31 | |
32 | 3. DWARF debug info. It contains the most detailed information about symbols and everything else. |
33 | It allows to get source file names and line numbers from addresses. Only available if you use -g option for compiler. |
34 | It is also used by default for ClickHouse builds, but because of its weight (about two gigabytes) |
35 | it is splitted to separate binary and provided in clickhouse-common-static-dbg package. |
36 | This separate binary is placed in /usr/lib/debug/usr/bin/clickhouse and is loaded automatically by tools like gdb, addr2line. |
37 | When you build ClickHouse by yourself, debug info is not splitted and present in a single huge binary. |
38 | |
39 | What ClickHouse is using to provide good stack traces? |
40 | |
41 | In versions prior to 19.13, only "program headers" (2) was used. |
42 | |
43 | In version 19.13, ClickHouse will read program headers (2) and cache them, |
44 | also it will read itself as ELF binary and extract symbol tables from section headers (1) |
45 | to also symbolize functions that are not exported for dynamic linking. |
46 | And finally, it will read DWARF info (3) if available to display file names and line numbers. |
47 | |
48 | What detail can you obtain depending on your binary? |
49 | |
50 | If you have debug info (you build ClickHouse by yourself or install clickhouse-common-static-dbg package), you will get source file names and line numbers. |
51 | Otherwise you will get only symbol names. If your binary contains symbol table in section headers (the default, unless stripped), you will get all symbol names. |
52 | Otherwise you will get only exported symbols from program headers. |
53 | |
54 | */ |
55 | |
56 | |
57 | namespace DB |
58 | { |
59 | |
60 | namespace |
61 | { |
62 | |
63 | /// Notes: "PHDR" is "Program Headers". |
64 | /// To look at program headers, run: |
65 | /// readelf -l ./clickhouse-server |
66 | /// To look at section headers, run: |
67 | /// readelf -S ./clickhouse-server |
68 | /// Also look at: https://wiki.osdev.org/ELF |
69 | /// Also look at: man elf |
70 | /// http://www.linker-aliens.org/blogs/ali/entry/inside_elf_symbol_tables/ |
71 | /// https://stackoverflow.com/questions/32088140/multiple-string-tables-in-elf-object |
72 | |
73 | |
74 | /// Based on the code of musl-libc and the answer of Kanalpiroge on |
75 | /// https://stackoverflow.com/questions/15779185/list-all-the-functions-symbols-on-the-fly-in-c-code-on-a-linux-architecture |
76 | /// It does not extract all the symbols (but only public - exported and used for dynamic linking), |
77 | /// but will work if we cannot find or parse ELF files. |
78 | void (dl_phdr_info * info, |
79 | std::vector<SymbolIndex::Symbol> & symbols) |
80 | { |
81 | /* Iterate over all headers of the current shared lib |
82 | * (first call is for the executable itself) |
83 | */ |
84 | for (size_t = 0; header_index < info->dlpi_phnum; ++header_index) |
85 | { |
86 | /* Further processing is only needed if the dynamic section is reached |
87 | */ |
88 | if (info->dlpi_phdr[header_index].p_type != PT_DYNAMIC) |
89 | continue; |
90 | |
91 | /* Get a pointer to the first entry of the dynamic section. |
92 | * It's address is the shared lib's address + the virtual address |
93 | */ |
94 | const ElfW(Dyn) * dyn_begin = reinterpret_cast<const ElfW(Dyn) *>(info->dlpi_addr + info->dlpi_phdr[header_index].p_vaddr); |
95 | |
96 | /// For unknown reason, addresses are sometimes relative sometimes absolute. |
97 | auto correct_address = [](ElfW(Addr) base, ElfW(Addr) ptr) |
98 | { |
99 | return ptr > base ? ptr : base + ptr; |
100 | }; |
101 | |
102 | /* Iterate over all entries of the dynamic section until the |
103 | * end of the symbol table is reached. This is indicated by |
104 | * an entry with d_tag == DT_NULL. |
105 | */ |
106 | |
107 | size_t sym_cnt = 0; |
108 | for (auto it = dyn_begin; it->d_tag != DT_NULL; ++it) |
109 | { |
110 | // TODO: this branch leads to invalid address of the hash table. Need further investigation. |
111 | // if (it->d_tag == DT_HASH) |
112 | // { |
113 | // const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(correct_address(info->dlpi_addr, it->d_un.d_ptr)); |
114 | // sym_cnt = hash[1]; |
115 | // break; |
116 | // } |
117 | if (it->d_tag == DT_GNU_HASH) |
118 | { |
119 | /// This code based on Musl-libc. |
120 | |
121 | const uint32_t * buckets = nullptr; |
122 | const uint32_t * hashval = nullptr; |
123 | |
124 | const ElfW(Word) * hash = reinterpret_cast<const ElfW(Word) *>(correct_address(info->dlpi_addr, it->d_un.d_ptr)); |
125 | |
126 | buckets = hash + 4 + (hash[2] * sizeof(size_t) / 4); |
127 | |
128 | for (ElfW(Word) i = 0; i < hash[0]; ++i) |
129 | if (buckets[i] > sym_cnt) |
130 | sym_cnt = buckets[i]; |
131 | |
132 | if (sym_cnt) |
133 | { |
134 | sym_cnt -= hash[1]; |
135 | hashval = buckets + hash[0] + sym_cnt; |
136 | do |
137 | { |
138 | ++sym_cnt; |
139 | } |
140 | while (!(*hashval++ & 1)); |
141 | } |
142 | |
143 | break; |
144 | } |
145 | } |
146 | |
147 | if (!sym_cnt) |
148 | continue; |
149 | |
150 | const char * strtab = nullptr; |
151 | for (auto it = dyn_begin; it->d_tag != DT_NULL; ++it) |
152 | { |
153 | if (it->d_tag == DT_STRTAB) |
154 | { |
155 | strtab = reinterpret_cast<const char *>(correct_address(info->dlpi_addr, it->d_un.d_ptr)); |
156 | break; |
157 | } |
158 | } |
159 | |
160 | if (!strtab) |
161 | continue; |
162 | |
163 | for (auto it = dyn_begin; it->d_tag != DT_NULL; ++it) |
164 | { |
165 | if (it->d_tag == DT_SYMTAB) |
166 | { |
167 | /* Get the pointer to the first entry of the symbol table */ |
168 | const ElfW(Sym) * elf_sym = reinterpret_cast<const ElfW(Sym) *>(correct_address(info->dlpi_addr, it->d_un.d_ptr)); |
169 | |
170 | /* Iterate over the symbol table */ |
171 | for (ElfW(Word) sym_index = 0; sym_index < sym_cnt; ++sym_index) |
172 | { |
173 | /// We are not interested in empty symbols. |
174 | if (!elf_sym[sym_index].st_size) |
175 | continue; |
176 | |
177 | /* Get the name of the sym_index-th symbol. |
178 | * This is located at the address of st_name relative to the beginning of the string table. |
179 | */ |
180 | const char * sym_name = &strtab[elf_sym[sym_index].st_name]; |
181 | |
182 | if (!sym_name) |
183 | continue; |
184 | |
185 | SymbolIndex::Symbol symbol; |
186 | symbol.address_begin = reinterpret_cast<const void *>(info->dlpi_addr + elf_sym[sym_index].st_value); |
187 | symbol.address_end = reinterpret_cast<const void *>(info->dlpi_addr + elf_sym[sym_index].st_value + elf_sym[sym_index].st_size); |
188 | symbol.name = sym_name; |
189 | symbols.push_back(std::move(symbol)); |
190 | } |
191 | |
192 | break; |
193 | } |
194 | } |
195 | } |
196 | } |
197 | |
198 | |
199 | void collectSymbolsFromELFSymbolTable( |
200 | dl_phdr_info * info, |
201 | const Elf & elf, |
202 | const Elf::Section & symbol_table, |
203 | const Elf::Section & string_table, |
204 | std::vector<SymbolIndex::Symbol> & symbols) |
205 | { |
206 | /// Iterate symbol table. |
207 | const ElfSym * symbol_table_entry = reinterpret_cast<const ElfSym *>(symbol_table.begin()); |
208 | const ElfSym * symbol_table_end = reinterpret_cast<const ElfSym *>(symbol_table.end()); |
209 | |
210 | const char * strings = string_table.begin(); |
211 | |
212 | for (; symbol_table_entry < symbol_table_end; ++symbol_table_entry) |
213 | { |
214 | if (!symbol_table_entry->st_name |
215 | || !symbol_table_entry->st_value |
216 | || !symbol_table_entry->st_size |
217 | || strings + symbol_table_entry->st_name >= elf.end()) |
218 | continue; |
219 | |
220 | /// Find the name in strings table. |
221 | const char * symbol_name = strings + symbol_table_entry->st_name; |
222 | |
223 | if (!symbol_name) |
224 | continue; |
225 | |
226 | SymbolIndex::Symbol symbol; |
227 | symbol.address_begin = reinterpret_cast<const void *>(info->dlpi_addr + symbol_table_entry->st_value); |
228 | symbol.address_end = reinterpret_cast<const void *>(info->dlpi_addr + symbol_table_entry->st_value + symbol_table_entry->st_size); |
229 | symbol.name = symbol_name; |
230 | symbols.push_back(std::move(symbol)); |
231 | } |
232 | } |
233 | |
234 | |
235 | bool searchAndCollectSymbolsFromELFSymbolTable( |
236 | dl_phdr_info * info, |
237 | const Elf & elf, |
238 | unsigned , |
239 | const char * string_table_name, |
240 | std::vector<SymbolIndex::Symbol> & symbols) |
241 | { |
242 | std::optional<Elf::Section> symbol_table; |
243 | std::optional<Elf::Section> string_table; |
244 | |
245 | if (!elf.iterateSections([&](const Elf::Section & section, size_t) |
246 | { |
247 | if (section.header.sh_type == section_header_type) |
248 | symbol_table.emplace(section); |
249 | else if (section.header.sh_type == SHT_STRTAB && 0 == strcmp(section.name(), string_table_name)) |
250 | string_table.emplace(section); |
251 | |
252 | if (symbol_table && string_table) |
253 | return true; |
254 | return false; |
255 | })) |
256 | { |
257 | return false; |
258 | } |
259 | |
260 | collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols); |
261 | return true; |
262 | } |
263 | |
264 | |
265 | void collectSymbolsFromELF(dl_phdr_info * info, |
266 | std::vector<SymbolIndex::Symbol> & symbols, |
267 | std::vector<SymbolIndex::Object> & objects) |
268 | { |
269 | std::string object_name = info->dlpi_name; |
270 | |
271 | /// If the name is empty - it's main executable. |
272 | /// Find a elf file for the main executable. |
273 | |
274 | if (object_name.empty()) |
275 | object_name = "/proc/self/exe" ; |
276 | |
277 | std::error_code ec; |
278 | std::filesystem::path canonical_path = std::filesystem::canonical(object_name, ec); |
279 | |
280 | if (ec) |
281 | return; |
282 | |
283 | /// Debug info and symbol table sections may be splitted to separate binary. |
284 | std::filesystem::path debug_info_path = std::filesystem::path("/usr/lib/debug" ) / canonical_path.relative_path(); |
285 | |
286 | object_name = std::filesystem::exists(debug_info_path) ? debug_info_path : canonical_path; |
287 | |
288 | SymbolIndex::Object object; |
289 | object.elf = std::make_unique<Elf>(object_name); |
290 | object.address_begin = reinterpret_cast<const void *>(info->dlpi_addr); |
291 | object.address_end = reinterpret_cast<const void *>(info->dlpi_addr + object.elf->size()); |
292 | object.name = object_name; |
293 | objects.push_back(std::move(object)); |
294 | |
295 | searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab" , symbols); |
296 | |
297 | /// Unneeded because they were parsed from "program headers" of loaded objects. |
298 | //searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols); |
299 | } |
300 | |
301 | |
302 | /* Callback for dl_iterate_phdr. |
303 | * Is called by dl_iterate_phdr for every loaded shared lib until something |
304 | * else than 0 is returned by one call of this function. |
305 | */ |
306 | int collectSymbols(dl_phdr_info * info, size_t, void * data_ptr) |
307 | { |
308 | SymbolIndex::Data & data = *reinterpret_cast<SymbolIndex::Data *>(data_ptr); |
309 | |
310 | collectSymbolsFromProgramHeaders(info, data.symbols); |
311 | collectSymbolsFromELF(info, data.symbols, data.objects); |
312 | |
313 | /* Continue iterations */ |
314 | return 0; |
315 | } |
316 | |
317 | |
318 | template <typename T> |
319 | const T * find(const void * address, const std::vector<T> & vec) |
320 | { |
321 | /// First range that has left boundary greater than address. |
322 | |
323 | auto it = std::lower_bound(vec.begin(), vec.end(), address, |
324 | [](const T & symbol, const void * addr) { return symbol.address_begin <= addr; }); |
325 | |
326 | if (it == vec.begin()) |
327 | return nullptr; |
328 | else |
329 | --it; /// Last range that has left boundary less or equals than address. |
330 | |
331 | if (address >= it->address_begin && address < it->address_end) |
332 | return &*it; |
333 | else |
334 | return nullptr; |
335 | } |
336 | |
337 | } |
338 | |
339 | |
340 | void SymbolIndex::update() |
341 | { |
342 | dl_iterate_phdr(collectSymbols, &data.symbols); |
343 | |
344 | std::sort(data.objects.begin(), data.objects.end(), [](const Object & a, const Object & b) { return a.address_begin < b.address_begin; }); |
345 | std::sort(data.symbols.begin(), data.symbols.end(), [](const Symbol & a, const Symbol & b) { return a.address_begin < b.address_begin; }); |
346 | |
347 | /// We found symbols both from loaded program headers and from ELF symbol tables. |
348 | data.symbols.erase(std::unique(data.symbols.begin(), data.symbols.end(), [](const Symbol & a, const Symbol & b) |
349 | { |
350 | return a.address_begin == b.address_begin && a.address_end == b.address_end; |
351 | }), data.symbols.end()); |
352 | } |
353 | |
354 | const SymbolIndex::Symbol * SymbolIndex::findSymbol(const void * address) const |
355 | { |
356 | return find(address, data.symbols); |
357 | } |
358 | |
359 | const SymbolIndex::Object * SymbolIndex::findObject(const void * address) const |
360 | { |
361 | return find(address, data.objects); |
362 | } |
363 | |
364 | SymbolIndex & SymbolIndex::instance() |
365 | { |
366 | static SymbolIndex instance; |
367 | return instance; |
368 | } |
369 | |
370 | } |
371 | |
372 | #endif |
373 | |