1/*
2 * Copyright 2012-present Facebook, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#include <folly/experimental/symbolizer/Elf.h>
17
18#include <fcntl.h>
19#include <folly/portability/SysMman.h>
20#include <sys/stat.h>
21#include <sys/types.h>
22
23#include <cstring>
24#include <string>
25
26#include <glog/logging.h>
27
28#include <folly/Conv.h>
29#include <folly/Exception.h>
30#include <folly/ScopeGuard.h>
31
32#ifndef STT_GNU_IFUNC
33#define STT_GNU_IFUNC 10
34#endif
35
36namespace folly {
37namespace symbolizer {
38
39ElfFile::ElfFile() noexcept
40 : fd_(-1),
41 file_(static_cast<char*>(MAP_FAILED)),
42 length_(0),
43 baseAddress_(0) {}
44
45ElfFile::ElfFile(const char* name, bool readOnly)
46 : fd_(-1),
47 file_(static_cast<char*>(MAP_FAILED)),
48 length_(0),
49 baseAddress_(0) {
50 open(name, readOnly);
51}
52
53void ElfFile::open(const char* name, bool readOnly) {
54 const char* msg = "";
55 int r = openNoThrow(name, readOnly, &msg);
56 if (r == kSystemError) {
57 throwSystemError(msg);
58 } else {
59 CHECK_EQ(r, kSuccess) << msg;
60 }
61}
62
63int ElfFile::openNoThrow(
64 const char* name,
65 bool readOnly,
66 const char** msg) noexcept {
67 FOLLY_SAFE_CHECK(fd_ == -1, "File already open");
68 // Always close fd and unmap in case of failure along the way to avoid
69 // check failure above if we leave fd != -1 and the object is recycled
70 // like it is inside SignalSafeElfCache
71 auto guard = makeGuard([&] { reset(); });
72 strncat(filepath_, name, kFilepathMaxLen - 1);
73 fd_ = ::open(name, readOnly ? O_RDONLY : O_RDWR);
74 if (fd_ == -1) {
75 if (msg) {
76 *msg = "open";
77 }
78 return kSystemError;
79 }
80 struct stat st;
81 int r = fstat(fd_, &st);
82 if (r == -1) {
83 if (msg) {
84 *msg = "fstat";
85 }
86 return kSystemError;
87 }
88
89 length_ = st.st_size;
90 int prot = PROT_READ;
91 if (!readOnly) {
92 prot |= PROT_WRITE;
93 }
94 file_ = static_cast<char*>(mmap(nullptr, length_, prot, MAP_SHARED, fd_, 0));
95 if (file_ == MAP_FAILED) {
96 if (msg) {
97 *msg = "mmap";
98 }
99 return kSystemError;
100 }
101 if (!init(msg)) {
102 reset();
103 errno = EINVAL;
104 return kInvalidElfFile;
105 }
106 guard.dismiss();
107 return kSuccess;
108}
109
110int ElfFile::openAndFollow(
111 const char* name,
112 bool readOnly,
113 const char** msg) noexcept {
114 auto result = openNoThrow(name, readOnly, msg);
115 if (!readOnly || result != kSuccess) {
116 return result;
117 }
118
119 /* NOTE .gnu_debuglink specifies only the name of the debugging info file
120 * (with no directory components). GDB checks 3 different directories, but
121 * ElfFile only supports the first version:
122 * - dirname(name)
123 * - dirname(name) + /.debug/
124 * - X/dirname(name)/ - where X is set in gdb's `debug-file-directory`.
125 */
126 auto dirend = strrchr(name, '/');
127 // include ending '/' if any.
128 auto dirlen = dirend != nullptr ? dirend + 1 - name : 0;
129
130 auto debuginfo = getSectionByName(".gnu_debuglink");
131 if (!debuginfo) {
132 return result;
133 }
134
135 // The section starts with the filename, with any leading directory
136 // components removed, followed by a zero byte.
137 auto debugFileName = getSectionBody(*debuginfo);
138 auto debugFileLen = strlen(debugFileName.begin());
139 if (dirlen + debugFileLen >= PATH_MAX) {
140 return result;
141 }
142
143 char linkname[PATH_MAX];
144 memcpy(linkname, name, dirlen);
145 memcpy(linkname + dirlen, debugFileName.begin(), debugFileLen + 1);
146 reset();
147 result = openNoThrow(linkname, readOnly, msg);
148 if (result == kSuccess) {
149 return result;
150 }
151 return openNoThrow(name, readOnly, msg);
152}
153
154ElfFile::~ElfFile() {
155 reset();
156}
157
158ElfFile::ElfFile(ElfFile&& other) noexcept
159 : fd_(other.fd_),
160 file_(other.file_),
161 length_(other.length_),
162 baseAddress_(other.baseAddress_) {
163 // copy other.filepath_, leaving filepath_ zero-terminated, always.
164 strncat(filepath_, other.filepath_, kFilepathMaxLen - 1);
165 other.filepath_[0] = 0;
166 other.fd_ = -1;
167 other.file_ = static_cast<char*>(MAP_FAILED);
168 other.length_ = 0;
169 other.baseAddress_ = 0;
170}
171
172ElfFile& ElfFile::operator=(ElfFile&& other) {
173 assert(this != &other);
174 reset();
175
176 // copy other.filepath_, leaving filepath_ zero-terminated, always.
177 strncat(filepath_, other.filepath_, kFilepathMaxLen - 1);
178 fd_ = other.fd_;
179 file_ = other.file_;
180 length_ = other.length_;
181 baseAddress_ = other.baseAddress_;
182
183 other.filepath_[0] = 0;
184 other.fd_ = -1;
185 other.file_ = static_cast<char*>(MAP_FAILED);
186 other.length_ = 0;
187 other.baseAddress_ = 0;
188
189 return *this;
190}
191
192void ElfFile::reset() {
193 filepath_[0] = 0;
194
195 if (file_ != MAP_FAILED) {
196 munmap(file_, length_);
197 file_ = static_cast<char*>(MAP_FAILED);
198 }
199
200 if (fd_ != -1) {
201 close(fd_);
202 fd_ = -1;
203 }
204}
205
206bool ElfFile::init(const char** msg) {
207 if (length_ < 4) {
208 if (msg) {
209 *msg = "not an ELF file (too short)";
210 }
211 return false;
212 }
213
214 std::array<char, 5> elfMagBuf = {{0, 0, 0, 0, 0}};
215 if (::lseek(fd_, 0, SEEK_SET) != 0 || ::read(fd_, elfMagBuf.data(), 4) != 4) {
216 if (msg) {
217 *msg = "unable to read ELF file for magic number";
218 }
219 return false;
220 }
221 if (std::strncmp(elfMagBuf.data(), ELFMAG, sizeof(ELFMAG)) != 0) {
222 if (msg) {
223 *msg = "invalid ELF magic";
224 }
225 return false;
226 }
227 if (::lseek(fd_, 0, SEEK_SET) != 0) {
228 if (msg) {
229 *msg = "unable to reset file descriptor after reading ELF magic number";
230 }
231 return false;
232 }
233
234 auto& elfHeader = this->elfHeader();
235
236#define EXPECTED_CLASS P1(ELFCLASS, __ELF_NATIVE_CLASS)
237#define P1(a, b) P2(a, b)
238#define P2(a, b) a##b
239 // Validate ELF class (32/64 bits)
240 if (elfHeader.e_ident[EI_CLASS] != EXPECTED_CLASS) {
241 if (msg) {
242 *msg = "invalid ELF class";
243 }
244 return false;
245 }
246#undef P1
247#undef P2
248#undef EXPECTED_CLASS
249
250 // Validate ELF data encoding (LSB/MSB)
251 static constexpr auto kExpectedEncoding =
252 kIsLittleEndian ? ELFDATA2LSB : ELFDATA2MSB;
253 if (elfHeader.e_ident[EI_DATA] != kExpectedEncoding) {
254 if (msg) {
255 *msg = "invalid ELF encoding";
256 }
257 return false;
258 }
259
260 // Validate ELF version (1)
261 if (elfHeader.e_ident[EI_VERSION] != EV_CURRENT ||
262 elfHeader.e_version != EV_CURRENT) {
263 if (msg) {
264 *msg = "invalid ELF version";
265 }
266 return false;
267 }
268
269 // We only support executable and shared object files
270 if (elfHeader.e_type != ET_EXEC && elfHeader.e_type != ET_DYN) {
271 if (msg) {
272 *msg = "invalid ELF file type";
273 }
274 return false;
275 }
276
277 if (elfHeader.e_phnum == 0) {
278 if (msg) {
279 *msg = "no program header!";
280 }
281 return false;
282 }
283
284 if (elfHeader.e_phentsize != sizeof(ElfPhdr)) {
285 if (msg) {
286 *msg = "invalid program header entry size";
287 }
288 return false;
289 }
290
291 if (elfHeader.e_shentsize != sizeof(ElfShdr)) {
292 if (msg) {
293 *msg = "invalid section header entry size";
294 }
295 }
296
297 // Program headers are sorted by load address, so the first PT_LOAD
298 // header gives us the base address.
299 const ElfPhdr* programHeader =
300 iterateProgramHeaders([](auto& h) { return h.p_type == PT_LOAD; });
301
302 if (!programHeader) {
303 if (msg) {
304 *msg = "could not find base address";
305 }
306 return false;
307 }
308 baseAddress_ = programHeader->p_vaddr;
309
310 return true;
311}
312
313const ElfShdr* ElfFile::getSectionByIndex(size_t idx) const {
314 FOLLY_SAFE_CHECK(idx < elfHeader().e_shnum, "invalid section index");
315 return &at<ElfShdr>(elfHeader().e_shoff + idx * sizeof(ElfShdr));
316}
317
318folly::StringPiece ElfFile::getSectionBody(const ElfShdr& section) const {
319 return folly::StringPiece(file_ + section.sh_offset, section.sh_size);
320}
321
322void ElfFile::validateStringTable(const ElfShdr& stringTable) const {
323 FOLLY_SAFE_CHECK(
324 stringTable.sh_type == SHT_STRTAB, "invalid type for string table");
325
326 const char* start = file_ + stringTable.sh_offset;
327 // First and last bytes must be 0
328 FOLLY_SAFE_CHECK(
329 stringTable.sh_size == 0 ||
330 (start[0] == '\0' && start[stringTable.sh_size - 1] == '\0'),
331 "invalid string table");
332}
333
334const char* ElfFile::getString(const ElfShdr& stringTable, size_t offset)
335 const {
336 validateStringTable(stringTable);
337 FOLLY_SAFE_CHECK(
338 offset < stringTable.sh_size, "invalid offset in string table");
339
340 return file_ + stringTable.sh_offset + offset;
341}
342
343const char* ElfFile::getSectionName(const ElfShdr& section) const {
344 if (elfHeader().e_shstrndx == SHN_UNDEF) {
345 return nullptr; // no section name string table
346 }
347
348 const ElfShdr& sectionNames = *getSectionByIndex(elfHeader().e_shstrndx);
349 return getString(sectionNames, section.sh_name);
350}
351
352const ElfShdr* ElfFile::getSectionByName(const char* name) const {
353 if (elfHeader().e_shstrndx == SHN_UNDEF) {
354 return nullptr; // no section name string table
355 }
356
357 const ElfShdr& sectionNames = *getSectionByIndex(elfHeader().e_shstrndx);
358 const char* start = file_ + sectionNames.sh_offset;
359
360 // Find section with the appropriate sh_name offset
361 const ElfShdr* foundSection = iterateSections([&](const ElfShdr& sh) {
362 if (sh.sh_name >= sectionNames.sh_size) {
363 return false;
364 }
365 return !strcmp(start + sh.sh_name, name);
366 });
367 return foundSection;
368}
369
370ElfFile::Symbol ElfFile::getDefinitionByAddress(uintptr_t address) const {
371 Symbol foundSymbol{nullptr, nullptr};
372
373 auto findSection = [&](const ElfShdr& section) {
374 auto findSymbols = [&](const ElfSym& sym) {
375 if (sym.st_shndx == SHN_UNDEF) {
376 return false; // not a definition
377 }
378 if (address >= sym.st_value && address < sym.st_value + sym.st_size) {
379 foundSymbol.first = &section;
380 foundSymbol.second = &sym;
381 return true;
382 }
383
384 return false;
385 };
386
387 return iterateSymbolsWithTypes(
388 section, {STT_OBJECT, STT_FUNC, STT_GNU_IFUNC}, findSymbols);
389 };
390
391 // Try the .dynsym section first if it exists, it's smaller.
392 (iterateSectionsWithType(SHT_DYNSYM, findSection) ||
393 iterateSectionsWithType(SHT_SYMTAB, findSection));
394
395 return foundSymbol;
396}
397
398ElfFile::Symbol ElfFile::getSymbolByName(const char* name) const {
399 Symbol foundSymbol{nullptr, nullptr};
400
401 auto findSection = [&](const ElfShdr& section) -> bool {
402 // This section has no string table associated w/ its symbols; hence we
403 // can't get names for them
404 if (section.sh_link == SHN_UNDEF) {
405 return false;
406 }
407
408 auto findSymbols = [&](const ElfSym& sym) -> bool {
409 if (sym.st_shndx == SHN_UNDEF) {
410 return false; // not a definition
411 }
412 if (sym.st_name == 0) {
413 return false; // no name for this symbol
414 }
415 const char* sym_name =
416 getString(*getSectionByIndex(section.sh_link), sym.st_name);
417 if (strcmp(sym_name, name) == 0) {
418 foundSymbol.first = &section;
419 foundSymbol.second = &sym;
420 return true;
421 }
422
423 return false;
424 };
425
426 return iterateSymbolsWithTypes(
427 section, {STT_OBJECT, STT_FUNC, STT_GNU_IFUNC}, findSymbols);
428 };
429
430 // Try the .dynsym section first if it exists, it's smaller.
431 iterateSectionsWithType(SHT_DYNSYM, findSection) ||
432 iterateSectionsWithType(SHT_SYMTAB, findSection);
433
434 return foundSymbol;
435}
436
437const ElfShdr* ElfFile::getSectionContainingAddress(ElfAddr addr) const {
438 return iterateSections([&](const ElfShdr& sh) -> bool {
439 return (addr >= sh.sh_addr) && (addr < (sh.sh_addr + sh.sh_size));
440 });
441}
442
443const char* ElfFile::getSymbolName(Symbol symbol) const {
444 if (!symbol.first || !symbol.second) {
445 return nullptr;
446 }
447
448 if (symbol.second->st_name == 0) {
449 return nullptr; // symbol has no name
450 }
451
452 if (symbol.first->sh_link == SHN_UNDEF) {
453 return nullptr; // symbol table has no strings
454 }
455
456 return getString(
457 *getSectionByIndex(symbol.first->sh_link), symbol.second->st_name);
458}
459
460} // namespace symbolizer
461} // namespace folly
462