1 | // Copyright 2017 The Abseil Authors. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | // Allow dynamic symbol lookup in an in-memory Elf image. |
16 | // |
17 | |
18 | #include "absl/debugging/internal/elf_mem_image.h" |
19 | |
20 | #ifdef ABSL_HAVE_ELF_MEM_IMAGE // defined in elf_mem_image.h |
21 | |
22 | #include <string.h> |
23 | #include <cassert> |
24 | #include <cstddef> |
25 | #include "absl/base/internal/raw_logging.h" |
26 | |
27 | // From binutils/include/elf/common.h (this doesn't appear to be documented |
28 | // anywhere else). |
29 | // |
30 | // /* This flag appears in a Versym structure. It means that the symbol |
31 | // is hidden, and is only visible with an explicit version number. |
32 | // This is a GNU extension. */ |
33 | // #define VERSYM_HIDDEN 0x8000 |
34 | // |
35 | // /* This is the mask for the rest of the Versym information. */ |
36 | // #define VERSYM_VERSION 0x7fff |
37 | |
38 | #define VERSYM_VERSION 0x7fff |
39 | |
40 | namespace absl { |
41 | namespace debugging_internal { |
42 | |
43 | namespace { |
44 | |
45 | #if __WORDSIZE == 32 |
46 | const int kElfClass = ELFCLASS32; |
47 | int ElfBind(const ElfW(Sym) *symbol) { return ELF32_ST_BIND(symbol->st_info); } |
48 | int ElfType(const ElfW(Sym) *symbol) { return ELF32_ST_TYPE(symbol->st_info); } |
49 | #elif __WORDSIZE == 64 |
50 | const int kElfClass = ELFCLASS64; |
51 | int ElfBind(const ElfW(Sym) *symbol) { return ELF64_ST_BIND(symbol->st_info); } |
52 | int ElfType(const ElfW(Sym) *symbol) { return ELF64_ST_TYPE(symbol->st_info); } |
53 | #else |
54 | const int kElfClass = -1; |
55 | int ElfBind(const ElfW(Sym) *) { |
56 | ABSL_RAW_LOG(FATAL, "Unexpected word size" ); |
57 | return 0; |
58 | } |
59 | int ElfType(const ElfW(Sym) *) { |
60 | ABSL_RAW_LOG(FATAL, "Unexpected word size" ); |
61 | return 0; |
62 | } |
63 | #endif |
64 | |
65 | // Extract an element from one of the ELF tables, cast it to desired type. |
66 | // This is just a simple arithmetic and a glorified cast. |
67 | // Callers are responsible for bounds checking. |
68 | template <typename T> |
69 | const T *GetTableElement(const ElfW(Ehdr) * ehdr, ElfW(Off) table_offset, |
70 | ElfW(Word) element_size, size_t index) { |
71 | return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr) |
72 | + table_offset |
73 | + index * element_size); |
74 | } |
75 | |
76 | } // namespace |
77 | |
78 | // The value of this variable doesn't matter; it's used only for its |
79 | // unique address. |
80 | const int ElfMemImage::kInvalidBaseSentinel = 0; |
81 | |
82 | ElfMemImage::ElfMemImage(const void *base) { |
83 | ABSL_RAW_CHECK(base != kInvalidBase, "bad pointer" ); |
84 | Init(base); |
85 | } |
86 | |
87 | int ElfMemImage::GetNumSymbols() const { |
88 | if (!hash_) { |
89 | return 0; |
90 | } |
91 | // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash |
92 | return hash_[1]; |
93 | } |
94 | |
95 | const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const { |
96 | ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range" ); |
97 | return dynsym_ + index; |
98 | } |
99 | |
100 | const ElfW(Versym) *ElfMemImage::GetVersym(int index) const { |
101 | ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range" ); |
102 | return versym_ + index; |
103 | } |
104 | |
105 | const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const { |
106 | ABSL_RAW_CHECK(index < ehdr_->e_phnum, "index out of range" ); |
107 | return GetTableElement<ElfW(Phdr)>(ehdr_, |
108 | ehdr_->e_phoff, |
109 | ehdr_->e_phentsize, |
110 | index); |
111 | } |
112 | |
113 | const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const { |
114 | ABSL_RAW_CHECK(offset < strsize_, "offset out of range" ); |
115 | return dynstr_ + offset; |
116 | } |
117 | |
118 | const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const { |
119 | if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) { |
120 | // Symbol corresponds to "special" (e.g. SHN_ABS) section. |
121 | return reinterpret_cast<const void *>(sym->st_value); |
122 | } |
123 | ABSL_RAW_CHECK(link_base_ < sym->st_value, "symbol out of range" ); |
124 | return GetTableElement<char>(ehdr_, 0, 1, sym->st_value - link_base_); |
125 | } |
126 | |
127 | const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const { |
128 | ABSL_RAW_CHECK(0 <= index && static_cast<size_t>(index) <= verdefnum_, |
129 | "index out of range" ); |
130 | const ElfW(Verdef) *version_definition = verdef_; |
131 | while (version_definition->vd_ndx < index && version_definition->vd_next) { |
132 | const char *const version_definition_as_char = |
133 | reinterpret_cast<const char *>(version_definition); |
134 | version_definition = |
135 | reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char + |
136 | version_definition->vd_next); |
137 | } |
138 | return version_definition->vd_ndx == index ? version_definition : nullptr; |
139 | } |
140 | |
141 | const ElfW(Verdaux) *ElfMemImage::GetVerdefAux( |
142 | const ElfW(Verdef) *verdef) const { |
143 | return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1); |
144 | } |
145 | |
146 | const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const { |
147 | ABSL_RAW_CHECK(offset < strsize_, "offset out of range" ); |
148 | return dynstr_ + offset; |
149 | } |
150 | |
151 | void ElfMemImage::Init(const void *base) { |
152 | ehdr_ = nullptr; |
153 | dynsym_ = nullptr; |
154 | dynstr_ = nullptr; |
155 | versym_ = nullptr; |
156 | verdef_ = nullptr; |
157 | hash_ = nullptr; |
158 | strsize_ = 0; |
159 | verdefnum_ = 0; |
160 | link_base_ = ~0L; // Sentinel: PT_LOAD .p_vaddr can't possibly be this. |
161 | if (!base) { |
162 | return; |
163 | } |
164 | const char *const base_as_char = reinterpret_cast<const char *>(base); |
165 | if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 || |
166 | base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) { |
167 | assert(false); |
168 | return; |
169 | } |
170 | int elf_class = base_as_char[EI_CLASS]; |
171 | if (elf_class != kElfClass) { |
172 | assert(false); |
173 | return; |
174 | } |
175 | switch (base_as_char[EI_DATA]) { |
176 | case ELFDATA2LSB: { |
177 | if (__LITTLE_ENDIAN != __BYTE_ORDER) { |
178 | assert(false); |
179 | return; |
180 | } |
181 | break; |
182 | } |
183 | case ELFDATA2MSB: { |
184 | if (__BIG_ENDIAN != __BYTE_ORDER) { |
185 | assert(false); |
186 | return; |
187 | } |
188 | break; |
189 | } |
190 | default: { |
191 | assert(false); |
192 | return; |
193 | } |
194 | } |
195 | |
196 | ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base); |
197 | const ElfW(Phdr) * = nullptr; |
198 | for (int i = 0; i < ehdr_->e_phnum; ++i) { |
199 | const ElfW(Phdr) *const = GetPhdr(i); |
200 | switch (program_header->p_type) { |
201 | case PT_LOAD: |
202 | if (!~link_base_) { |
203 | link_base_ = program_header->p_vaddr; |
204 | } |
205 | break; |
206 | case PT_DYNAMIC: |
207 | dynamic_program_header = program_header; |
208 | break; |
209 | } |
210 | } |
211 | if (!~link_base_ || !dynamic_program_header) { |
212 | assert(false); |
213 | // Mark this image as not present. Can not recur infinitely. |
214 | Init(nullptr); |
215 | return; |
216 | } |
217 | ptrdiff_t relocation = |
218 | base_as_char - reinterpret_cast<const char *>(link_base_); |
219 | ElfW(Dyn) *dynamic_entry = |
220 | reinterpret_cast<ElfW(Dyn) *>(dynamic_program_header->p_vaddr + |
221 | relocation); |
222 | for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) { |
223 | const ElfW(Xword) value = dynamic_entry->d_un.d_val + relocation; |
224 | switch (dynamic_entry->d_tag) { |
225 | case DT_HASH: |
226 | hash_ = reinterpret_cast<ElfW(Word) *>(value); |
227 | break; |
228 | case DT_SYMTAB: |
229 | dynsym_ = reinterpret_cast<ElfW(Sym) *>(value); |
230 | break; |
231 | case DT_STRTAB: |
232 | dynstr_ = reinterpret_cast<const char *>(value); |
233 | break; |
234 | case DT_VERSYM: |
235 | versym_ = reinterpret_cast<ElfW(Versym) *>(value); |
236 | break; |
237 | case DT_VERDEF: |
238 | verdef_ = reinterpret_cast<ElfW(Verdef) *>(value); |
239 | break; |
240 | case DT_VERDEFNUM: |
241 | verdefnum_ = dynamic_entry->d_un.d_val; |
242 | break; |
243 | case DT_STRSZ: |
244 | strsize_ = dynamic_entry->d_un.d_val; |
245 | break; |
246 | default: |
247 | // Unrecognized entries explicitly ignored. |
248 | break; |
249 | } |
250 | } |
251 | if (!hash_ || !dynsym_ || !dynstr_ || !versym_ || |
252 | !verdef_ || !verdefnum_ || !strsize_) { |
253 | assert(false); // invalid VDSO |
254 | // Mark this image as not present. Can not recur infinitely. |
255 | Init(nullptr); |
256 | return; |
257 | } |
258 | } |
259 | |
260 | bool ElfMemImage::LookupSymbol(const char *name, |
261 | const char *version, |
262 | int type, |
263 | SymbolInfo *info_out) const { |
264 | for (const SymbolInfo& info : *this) { |
265 | if (strcmp(info.name, name) == 0 && strcmp(info.version, version) == 0 && |
266 | ElfType(info.symbol) == type) { |
267 | if (info_out) { |
268 | *info_out = info; |
269 | } |
270 | return true; |
271 | } |
272 | } |
273 | return false; |
274 | } |
275 | |
276 | bool ElfMemImage::LookupSymbolByAddress(const void *address, |
277 | SymbolInfo *info_out) const { |
278 | for (const SymbolInfo& info : *this) { |
279 | const char *const symbol_start = |
280 | reinterpret_cast<const char *>(info.address); |
281 | const char *const symbol_end = symbol_start + info.symbol->st_size; |
282 | if (symbol_start <= address && address < symbol_end) { |
283 | if (info_out) { |
284 | // Client wants to know details for that symbol (the usual case). |
285 | if (ElfBind(info.symbol) == STB_GLOBAL) { |
286 | // Strong symbol; just return it. |
287 | *info_out = info; |
288 | return true; |
289 | } else { |
290 | // Weak or local. Record it, but keep looking for a strong one. |
291 | *info_out = info; |
292 | } |
293 | } else { |
294 | // Client only cares if there is an overlapping symbol. |
295 | return true; |
296 | } |
297 | } |
298 | } |
299 | return false; |
300 | } |
301 | |
302 | ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index) |
303 | : index_(index), image_(image) { |
304 | } |
305 | |
306 | const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const { |
307 | return &info_; |
308 | } |
309 | |
310 | const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const { |
311 | return info_; |
312 | } |
313 | |
314 | bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const { |
315 | return this->image_ == rhs.image_ && this->index_ == rhs.index_; |
316 | } |
317 | |
318 | bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const { |
319 | return !(*this == rhs); |
320 | } |
321 | |
322 | ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() { |
323 | this->Update(1); |
324 | return *this; |
325 | } |
326 | |
327 | ElfMemImage::SymbolIterator ElfMemImage::begin() const { |
328 | SymbolIterator it(this, 0); |
329 | it.Update(0); |
330 | return it; |
331 | } |
332 | |
333 | ElfMemImage::SymbolIterator ElfMemImage::end() const { |
334 | return SymbolIterator(this, GetNumSymbols()); |
335 | } |
336 | |
337 | void ElfMemImage::SymbolIterator::Update(int increment) { |
338 | const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_); |
339 | ABSL_RAW_CHECK(image->IsPresent() || increment == 0, "" ); |
340 | if (!image->IsPresent()) { |
341 | return; |
342 | } |
343 | index_ += increment; |
344 | if (index_ >= image->GetNumSymbols()) { |
345 | index_ = image->GetNumSymbols(); |
346 | return; |
347 | } |
348 | const ElfW(Sym) *symbol = image->GetDynsym(index_); |
349 | const ElfW(Versym) *version_symbol = image->GetVersym(index_); |
350 | ABSL_RAW_CHECK(symbol && version_symbol, "" ); |
351 | const char *const symbol_name = image->GetDynstr(symbol->st_name); |
352 | const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION; |
353 | const ElfW(Verdef) *version_definition = nullptr; |
354 | const char *version_name = "" ; |
355 | if (symbol->st_shndx == SHN_UNDEF) { |
356 | // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and |
357 | // version_index could well be greater than verdefnum_, so calling |
358 | // GetVerdef(version_index) may trigger assertion. |
359 | } else { |
360 | version_definition = image->GetVerdef(version_index); |
361 | } |
362 | if (version_definition) { |
363 | // I am expecting 1 or 2 auxiliary entries: 1 for the version itself, |
364 | // optional 2nd if the version has a parent. |
365 | ABSL_RAW_CHECK( |
366 | version_definition->vd_cnt == 1 || version_definition->vd_cnt == 2, |
367 | "wrong number of entries" ); |
368 | const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition); |
369 | version_name = image->GetVerstr(version_aux->vda_name); |
370 | } |
371 | info_.name = symbol_name; |
372 | info_.version = version_name; |
373 | info_.address = image->GetSymAddr(symbol); |
374 | info_.symbol = symbol; |
375 | } |
376 | |
377 | } // namespace debugging_internal |
378 | } // namespace absl |
379 | |
380 | #endif // ABSL_HAVE_ELF_MEM_IMAGE |
381 | |