1// Copyright 2017 The Abseil Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Allow dynamic symbol lookup in an in-memory Elf image.
16//
17
18#include "absl/debugging/internal/elf_mem_image.h"
19
20#ifdef ABSL_HAVE_ELF_MEM_IMAGE // defined in elf_mem_image.h
21
22#include <string.h>
23#include <cassert>
24#include <cstddef>
25#include "absl/base/internal/raw_logging.h"
26
27// From binutils/include/elf/common.h (this doesn't appear to be documented
28// anywhere else).
29//
30// /* This flag appears in a Versym structure. It means that the symbol
31// is hidden, and is only visible with an explicit version number.
32// This is a GNU extension. */
33// #define VERSYM_HIDDEN 0x8000
34//
35// /* This is the mask for the rest of the Versym information. */
36// #define VERSYM_VERSION 0x7fff
37
38#define VERSYM_VERSION 0x7fff
39
40namespace absl {
41namespace debugging_internal {
42
43namespace {
44
45#if __WORDSIZE == 32
46const int kElfClass = ELFCLASS32;
47int ElfBind(const ElfW(Sym) *symbol) { return ELF32_ST_BIND(symbol->st_info); }
48int ElfType(const ElfW(Sym) *symbol) { return ELF32_ST_TYPE(symbol->st_info); }
49#elif __WORDSIZE == 64
50const int kElfClass = ELFCLASS64;
51int ElfBind(const ElfW(Sym) *symbol) { return ELF64_ST_BIND(symbol->st_info); }
52int ElfType(const ElfW(Sym) *symbol) { return ELF64_ST_TYPE(symbol->st_info); }
53#else
54const int kElfClass = -1;
55int ElfBind(const ElfW(Sym) *) {
56 ABSL_RAW_LOG(FATAL, "Unexpected word size");
57 return 0;
58}
59int ElfType(const ElfW(Sym) *) {
60 ABSL_RAW_LOG(FATAL, "Unexpected word size");
61 return 0;
62}
63#endif
64
65// Extract an element from one of the ELF tables, cast it to desired type.
66// This is just a simple arithmetic and a glorified cast.
67// Callers are responsible for bounds checking.
68template <typename T>
69const T *GetTableElement(const ElfW(Ehdr) * ehdr, ElfW(Off) table_offset,
70 ElfW(Word) element_size, size_t index) {
71 return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr)
72 + table_offset
73 + index * element_size);
74}
75
76} // namespace
77
78// The value of this variable doesn't matter; it's used only for its
79// unique address.
80const int ElfMemImage::kInvalidBaseSentinel = 0;
81
82ElfMemImage::ElfMemImage(const void *base) {
83 ABSL_RAW_CHECK(base != kInvalidBase, "bad pointer");
84 Init(base);
85}
86
87int ElfMemImage::GetNumSymbols() const {
88 if (!hash_) {
89 return 0;
90 }
91 // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash
92 return hash_[1];
93}
94
95const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const {
96 ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
97 return dynsym_ + index;
98}
99
100const ElfW(Versym) *ElfMemImage::GetVersym(int index) const {
101 ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
102 return versym_ + index;
103}
104
105const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const {
106 ABSL_RAW_CHECK(index < ehdr_->e_phnum, "index out of range");
107 return GetTableElement<ElfW(Phdr)>(ehdr_,
108 ehdr_->e_phoff,
109 ehdr_->e_phentsize,
110 index);
111}
112
113const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const {
114 ABSL_RAW_CHECK(offset < strsize_, "offset out of range");
115 return dynstr_ + offset;
116}
117
118const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const {
119 if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) {
120 // Symbol corresponds to "special" (e.g. SHN_ABS) section.
121 return reinterpret_cast<const void *>(sym->st_value);
122 }
123 ABSL_RAW_CHECK(link_base_ < sym->st_value, "symbol out of range");
124 return GetTableElement<char>(ehdr_, 0, 1, sym->st_value - link_base_);
125}
126
127const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const {
128 ABSL_RAW_CHECK(0 <= index && static_cast<size_t>(index) <= verdefnum_,
129 "index out of range");
130 const ElfW(Verdef) *version_definition = verdef_;
131 while (version_definition->vd_ndx < index && version_definition->vd_next) {
132 const char *const version_definition_as_char =
133 reinterpret_cast<const char *>(version_definition);
134 version_definition =
135 reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char +
136 version_definition->vd_next);
137 }
138 return version_definition->vd_ndx == index ? version_definition : nullptr;
139}
140
141const ElfW(Verdaux) *ElfMemImage::GetVerdefAux(
142 const ElfW(Verdef) *verdef) const {
143 return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1);
144}
145
146const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const {
147 ABSL_RAW_CHECK(offset < strsize_, "offset out of range");
148 return dynstr_ + offset;
149}
150
151void ElfMemImage::Init(const void *base) {
152 ehdr_ = nullptr;
153 dynsym_ = nullptr;
154 dynstr_ = nullptr;
155 versym_ = nullptr;
156 verdef_ = nullptr;
157 hash_ = nullptr;
158 strsize_ = 0;
159 verdefnum_ = 0;
160 link_base_ = ~0L; // Sentinel: PT_LOAD .p_vaddr can't possibly be this.
161 if (!base) {
162 return;
163 }
164 const char *const base_as_char = reinterpret_cast<const char *>(base);
165 if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 ||
166 base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) {
167 assert(false);
168 return;
169 }
170 int elf_class = base_as_char[EI_CLASS];
171 if (elf_class != kElfClass) {
172 assert(false);
173 return;
174 }
175 switch (base_as_char[EI_DATA]) {
176 case ELFDATA2LSB: {
177 if (__LITTLE_ENDIAN != __BYTE_ORDER) {
178 assert(false);
179 return;
180 }
181 break;
182 }
183 case ELFDATA2MSB: {
184 if (__BIG_ENDIAN != __BYTE_ORDER) {
185 assert(false);
186 return;
187 }
188 break;
189 }
190 default: {
191 assert(false);
192 return;
193 }
194 }
195
196 ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base);
197 const ElfW(Phdr) *dynamic_program_header = nullptr;
198 for (int i = 0; i < ehdr_->e_phnum; ++i) {
199 const ElfW(Phdr) *const program_header = GetPhdr(i);
200 switch (program_header->p_type) {
201 case PT_LOAD:
202 if (!~link_base_) {
203 link_base_ = program_header->p_vaddr;
204 }
205 break;
206 case PT_DYNAMIC:
207 dynamic_program_header = program_header;
208 break;
209 }
210 }
211 if (!~link_base_ || !dynamic_program_header) {
212 assert(false);
213 // Mark this image as not present. Can not recur infinitely.
214 Init(nullptr);
215 return;
216 }
217 ptrdiff_t relocation =
218 base_as_char - reinterpret_cast<const char *>(link_base_);
219 ElfW(Dyn) *dynamic_entry =
220 reinterpret_cast<ElfW(Dyn) *>(dynamic_program_header->p_vaddr +
221 relocation);
222 for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) {
223 const ElfW(Xword) value = dynamic_entry->d_un.d_val + relocation;
224 switch (dynamic_entry->d_tag) {
225 case DT_HASH:
226 hash_ = reinterpret_cast<ElfW(Word) *>(value);
227 break;
228 case DT_SYMTAB:
229 dynsym_ = reinterpret_cast<ElfW(Sym) *>(value);
230 break;
231 case DT_STRTAB:
232 dynstr_ = reinterpret_cast<const char *>(value);
233 break;
234 case DT_VERSYM:
235 versym_ = reinterpret_cast<ElfW(Versym) *>(value);
236 break;
237 case DT_VERDEF:
238 verdef_ = reinterpret_cast<ElfW(Verdef) *>(value);
239 break;
240 case DT_VERDEFNUM:
241 verdefnum_ = dynamic_entry->d_un.d_val;
242 break;
243 case DT_STRSZ:
244 strsize_ = dynamic_entry->d_un.d_val;
245 break;
246 default:
247 // Unrecognized entries explicitly ignored.
248 break;
249 }
250 }
251 if (!hash_ || !dynsym_ || !dynstr_ || !versym_ ||
252 !verdef_ || !verdefnum_ || !strsize_) {
253 assert(false); // invalid VDSO
254 // Mark this image as not present. Can not recur infinitely.
255 Init(nullptr);
256 return;
257 }
258}
259
260bool ElfMemImage::LookupSymbol(const char *name,
261 const char *version,
262 int type,
263 SymbolInfo *info_out) const {
264 for (const SymbolInfo& info : *this) {
265 if (strcmp(info.name, name) == 0 && strcmp(info.version, version) == 0 &&
266 ElfType(info.symbol) == type) {
267 if (info_out) {
268 *info_out = info;
269 }
270 return true;
271 }
272 }
273 return false;
274}
275
276bool ElfMemImage::LookupSymbolByAddress(const void *address,
277 SymbolInfo *info_out) const {
278 for (const SymbolInfo& info : *this) {
279 const char *const symbol_start =
280 reinterpret_cast<const char *>(info.address);
281 const char *const symbol_end = symbol_start + info.symbol->st_size;
282 if (symbol_start <= address && address < symbol_end) {
283 if (info_out) {
284 // Client wants to know details for that symbol (the usual case).
285 if (ElfBind(info.symbol) == STB_GLOBAL) {
286 // Strong symbol; just return it.
287 *info_out = info;
288 return true;
289 } else {
290 // Weak or local. Record it, but keep looking for a strong one.
291 *info_out = info;
292 }
293 } else {
294 // Client only cares if there is an overlapping symbol.
295 return true;
296 }
297 }
298 }
299 return false;
300}
301
302ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index)
303 : index_(index), image_(image) {
304}
305
306const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const {
307 return &info_;
308}
309
310const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const {
311 return info_;
312}
313
314bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const {
315 return this->image_ == rhs.image_ && this->index_ == rhs.index_;
316}
317
318bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const {
319 return !(*this == rhs);
320}
321
322ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() {
323 this->Update(1);
324 return *this;
325}
326
327ElfMemImage::SymbolIterator ElfMemImage::begin() const {
328 SymbolIterator it(this, 0);
329 it.Update(0);
330 return it;
331}
332
333ElfMemImage::SymbolIterator ElfMemImage::end() const {
334 return SymbolIterator(this, GetNumSymbols());
335}
336
337void ElfMemImage::SymbolIterator::Update(int increment) {
338 const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_);
339 ABSL_RAW_CHECK(image->IsPresent() || increment == 0, "");
340 if (!image->IsPresent()) {
341 return;
342 }
343 index_ += increment;
344 if (index_ >= image->GetNumSymbols()) {
345 index_ = image->GetNumSymbols();
346 return;
347 }
348 const ElfW(Sym) *symbol = image->GetDynsym(index_);
349 const ElfW(Versym) *version_symbol = image->GetVersym(index_);
350 ABSL_RAW_CHECK(symbol && version_symbol, "");
351 const char *const symbol_name = image->GetDynstr(symbol->st_name);
352 const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION;
353 const ElfW(Verdef) *version_definition = nullptr;
354 const char *version_name = "";
355 if (symbol->st_shndx == SHN_UNDEF) {
356 // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and
357 // version_index could well be greater than verdefnum_, so calling
358 // GetVerdef(version_index) may trigger assertion.
359 } else {
360 version_definition = image->GetVerdef(version_index);
361 }
362 if (version_definition) {
363 // I am expecting 1 or 2 auxiliary entries: 1 for the version itself,
364 // optional 2nd if the version has a parent.
365 ABSL_RAW_CHECK(
366 version_definition->vd_cnt == 1 || version_definition->vd_cnt == 2,
367 "wrong number of entries");
368 const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition);
369 version_name = image->GetVerstr(version_aux->vda_name);
370 }
371 info_.name = symbol_name;
372 info_.version = version_name;
373 info_.address = image->GetSymAddr(symbol);
374 info_.symbol = symbol;
375}
376
377} // namespace debugging_internal
378} // namespace absl
379
380#endif // ABSL_HAVE_ELF_MEM_IMAGE
381