1 | /* |
2 | * Copyright 2012-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | // ELF file parser |
18 | |
19 | #pragma once |
20 | #define FOLLY_EXPERIMENTAL_SYMBOLIZER_ELF_H_ |
21 | |
22 | #include <elf.h> |
23 | #include <link.h> // For ElfW() |
24 | |
25 | #include <cstdio> |
26 | #include <initializer_list> |
27 | #include <stdexcept> |
28 | #include <system_error> |
29 | |
30 | #include <folly/Conv.h> |
31 | #include <folly/Likely.h> |
32 | #include <folly/Range.h> |
33 | #include <folly/lang/SafeAssert.h> |
34 | |
35 | namespace folly { |
36 | namespace symbolizer { |
37 | |
38 | using ElfAddr = ElfW(Addr); |
39 | using ElfEhdr = ElfW(Ehdr); |
40 | using ElfOff = ElfW(Off); |
41 | using ElfPhdr = ElfW(Phdr); |
42 | using ElfShdr = ElfW(Shdr); |
43 | using ElfSym = ElfW(Sym); |
44 | |
45 | /** |
46 | * ELF file parser. |
47 | * |
48 | * We handle native files only (32-bit files on a 32-bit platform, 64-bit files |
49 | * on a 64-bit platform), and only executables (ET_EXEC) and shared objects |
50 | * (ET_DYN). |
51 | */ |
52 | class ElfFile { |
53 | public: |
54 | ElfFile() noexcept; |
55 | |
56 | // Note: may throw, call openNoThrow() explicitly if you don't want to throw |
57 | explicit ElfFile(const char* name, bool readOnly = true); |
58 | |
59 | // Open the ELF file. |
60 | // Returns 0 on success, kSystemError (guaranteed to be -1) (and sets errno) |
61 | // on IO error, kInvalidElfFile (and sets errno to EINVAL) for an invalid |
62 | // Elf file. On error, if msg is not nullptr, sets *msg to a static string |
63 | // indicating what failed. |
64 | enum { |
65 | kSuccess = 0, |
66 | kSystemError = -1, |
67 | kInvalidElfFile = -2, |
68 | }; |
69 | // Open the ELF file. Does not throw on error. |
70 | int openNoThrow( |
71 | const char* name, |
72 | bool readOnly = true, |
73 | const char** msg = nullptr) noexcept; |
74 | |
75 | // Like openNoThrow, but follow .gnu_debuglink if present |
76 | int openAndFollow( |
77 | const char* name, |
78 | bool readOnly = true, |
79 | const char** msg = nullptr) noexcept; |
80 | |
81 | // Open the ELF file. Throws on error. |
82 | void open(const char* name, bool readOnly = true); |
83 | |
84 | ~ElfFile(); |
85 | |
86 | ElfFile(ElfFile&& other) noexcept; |
87 | ElfFile& operator=(ElfFile&& other); |
88 | |
89 | /** Retrieve the ELF header */ |
90 | const ElfEhdr& () const { |
91 | return at<ElfEhdr>(0); |
92 | } |
93 | |
94 | /** |
95 | * Get the base address, the address where the file should be loaded if |
96 | * no relocations happened. |
97 | */ |
98 | uintptr_t getBaseAddress() const { |
99 | return baseAddress_; |
100 | } |
101 | |
102 | /** Find a section given its name */ |
103 | const ElfShdr* getSectionByName(const char* name) const; |
104 | |
105 | /** Find a section given its index in the section header table */ |
106 | const ElfShdr* getSectionByIndex(size_t idx) const; |
107 | |
108 | /** Retrieve the name of a section */ |
109 | const char* getSectionName(const ElfShdr& section) const; |
110 | |
111 | /** Get the actual section body */ |
112 | folly::StringPiece getSectionBody(const ElfShdr& section) const; |
113 | |
114 | /** Retrieve a string from a string table section */ |
115 | const char* getString(const ElfShdr& stringTable, size_t offset) const; |
116 | |
117 | /** |
118 | * Iterate over all strings in a string table section for as long as |
119 | * fn(str) returns false. |
120 | * Returns the current ("found") string when fn returned true, or nullptr |
121 | * if fn returned false for all strings in the table. |
122 | */ |
123 | template <class Fn> |
124 | const char* iterateStrings(const ElfShdr& stringTable, Fn fn) const; |
125 | |
126 | /** |
127 | * Iterate over program headers as long as fn(section) returns false. |
128 | * Returns a pointer to the current ("found") section when fn returned |
129 | * true, or nullptr if fn returned false for all sections. |
130 | */ |
131 | template <class Fn> |
132 | const ElfPhdr* (Fn fn) const; |
133 | |
134 | /** |
135 | * Iterate over all sections for as long as fn(section) returns false. |
136 | * Returns a pointer to the current ("found") section when fn returned |
137 | * true, or nullptr if fn returned false for all sections. |
138 | */ |
139 | template <class Fn> |
140 | const ElfShdr* iterateSections(Fn fn) const; |
141 | |
142 | /** |
143 | * Iterate over all sections with a given type. Similar to |
144 | * iterateSections(), but filtered only for sections with the given type. |
145 | */ |
146 | template <class Fn> |
147 | const ElfShdr* iterateSectionsWithType(uint32_t type, Fn fn) const; |
148 | |
149 | /** |
150 | * Iterate over all sections with a given types. Similar to |
151 | * iterateSectionWithTypes(), but filtered on multiple types. |
152 | */ |
153 | template <class Fn> |
154 | const ElfShdr* iterateSectionsWithTypes( |
155 | std::initializer_list<uint32_t> types, |
156 | Fn fn) const; |
157 | |
158 | /** |
159 | * Iterate over all symbols witin a given section. |
160 | * |
161 | * Returns a pointer to the current ("found") symbol when fn returned true, |
162 | * or nullptr if fn returned false for all symbols. |
163 | */ |
164 | template <class Fn> |
165 | const ElfSym* iterateSymbols(const ElfShdr& section, Fn fn) const; |
166 | template <class Fn> |
167 | const ElfSym* |
168 | iterateSymbolsWithType(const ElfShdr& section, uint32_t type, Fn fn) const; |
169 | template <class Fn> |
170 | const ElfSym* iterateSymbolsWithTypes( |
171 | const ElfShdr& section, |
172 | std::initializer_list<uint32_t> types, |
173 | Fn fn) const; |
174 | |
175 | /** |
176 | * Find symbol definition by address. |
177 | * Note that this is the file virtual address, so you need to undo |
178 | * any relocation that might have happened. |
179 | * |
180 | * Returns {nullptr, nullptr} if not found. |
181 | */ |
182 | typedef std::pair<const ElfShdr*, const ElfSym*> Symbol; |
183 | Symbol getDefinitionByAddress(uintptr_t address) const; |
184 | |
185 | /** |
186 | * Find symbol definition by name. |
187 | * |
188 | * If a symbol with this name cannot be found, a <nullptr, nullptr> Symbol |
189 | * will be returned. This is O(N) in the number of symbols in the file. |
190 | * |
191 | * Returns {nullptr, nullptr} if not found. |
192 | */ |
193 | Symbol getSymbolByName(const char* name) const; |
194 | |
195 | /** |
196 | * Get the value of a symbol. |
197 | */ |
198 | template <class T> |
199 | const T& getSymbolValue(const ElfSym* symbol) const { |
200 | const ElfShdr* section = getSectionByIndex(symbol->st_shndx); |
201 | FOLLY_SAFE_CHECK(section, "Symbol's section index is invalid" ); |
202 | |
203 | return valueAt<T>(*section, symbol->st_value); |
204 | } |
205 | |
206 | /** |
207 | * Get the value of the object stored at the given address. |
208 | * |
209 | * This is the function that you want to use in conjunction with |
210 | * getSymbolValue() to follow pointers. For example, to get the value of |
211 | * a char* symbol, you'd do something like this: |
212 | * |
213 | * auto sym = getSymbolByName("someGlobalValue"); |
214 | * auto addr = getSymbolValue<ElfAddr>(sym.second); |
215 | * const char* str = &getSymbolValue<const char>(addr); |
216 | */ |
217 | template <class T> |
218 | const T& getAddressValue(const ElfAddr addr) const { |
219 | const ElfShdr* section = getSectionContainingAddress(addr); |
220 | FOLLY_SAFE_CHECK(section, "Address does not refer to existing section" ); |
221 | |
222 | return valueAt<T>(*section, addr); |
223 | } |
224 | |
225 | /** |
226 | * Retrieve symbol name. |
227 | */ |
228 | const char* getSymbolName(Symbol symbol) const; |
229 | |
230 | /** Find the section containing the given address */ |
231 | const ElfShdr* getSectionContainingAddress(ElfAddr addr) const; |
232 | |
233 | private: |
234 | bool init(const char** msg); |
235 | void reset(); |
236 | ElfFile(const ElfFile&) = delete; |
237 | ElfFile& operator=(const ElfFile&) = delete; |
238 | |
239 | void validateStringTable(const ElfShdr& stringTable) const; |
240 | |
241 | template <class T> |
242 | const typename std::enable_if<std::is_pod<T>::value, T>::type& at( |
243 | ElfOff offset) const { |
244 | if (offset + sizeof(T) > length_) { |
245 | char msg[kFilepathMaxLen + 128]; |
246 | snprintf( |
247 | msg, |
248 | sizeof(msg), |
249 | "Offset (%zu + %zu) is not contained within our mmapped" |
250 | " file (%s) of length %zu" , |
251 | offset, |
252 | sizeof(T), |
253 | filepath_, |
254 | length_); |
255 | FOLLY_SAFE_CHECK(offset + sizeof(T) <= length_, msg); |
256 | } |
257 | |
258 | return *reinterpret_cast<T*>(file_ + offset); |
259 | } |
260 | |
261 | template <class T> |
262 | const T& valueAt(const ElfShdr& section, const ElfAddr addr) const { |
263 | // For exectuables and shared objects, st_value holds a virtual address |
264 | // that refers to the memory owned by sections. Since we didn't map the |
265 | // sections into the addresses that they're expecting (sh_addr), but |
266 | // instead just mmapped the entire file directly, we need to translate |
267 | // between addresses and offsets into the file. |
268 | // |
269 | // TODO: For other file types, st_value holds a file offset directly. Since |
270 | // I don't have a use-case for that right now, just assert that |
271 | // nobody wants this. We can always add it later. |
272 | FOLLY_SAFE_CHECK( |
273 | elfHeader().e_type == ET_EXEC || elfHeader().e_type == ET_DYN, |
274 | "Only exectuables and shared objects are supported" ); |
275 | FOLLY_SAFE_CHECK( |
276 | addr >= section.sh_addr && |
277 | (addr + sizeof(T)) <= (section.sh_addr + section.sh_size), |
278 | "Address is not contained within the provided segment" ); |
279 | |
280 | return at<T>(section.sh_offset + (addr - section.sh_addr)); |
281 | } |
282 | |
283 | static constexpr size_t kFilepathMaxLen = 512; |
284 | char filepath_[kFilepathMaxLen] = {}; |
285 | int fd_; |
286 | char* file_; // mmap() location |
287 | size_t length_; // mmap() length |
288 | |
289 | uintptr_t baseAddress_; |
290 | }; |
291 | |
292 | } // namespace symbolizer |
293 | } // namespace folly |
294 | |
295 | #include <folly/experimental/symbolizer/Elf-inl.h> |
296 | |