1// Copyright (c) 2013 Austin T. Clements. All rights reserved.
2// Use of this source code is governed by an MIT license
3// that can be found in the LICENSE file.
4
5#ifndef _ELFPP_HH_
6#define _ELFPP_HH_
7
8#include "common.hh"
9#include "data.hh"
10
11#include <cstddef>
12#include <memory>
13#include <stdexcept>
14#include <vector>
15
16ELFPP_BEGIN_NAMESPACE
17
18class elf;
19class loader;
20class section;
21class strtab;
22class symtab;
23class segment;
24// XXX Audit for binary compatibility
25
26// XXX Segments, other section types
27
28/**
29 * An exception indicating malformed ELF data.
30 */
31class format_error : public std::runtime_error
32{
33public:
34 explicit format_error(const std::string &what_arg)
35 : std::runtime_error(what_arg) { }
36 explicit format_error(const char *what_arg)
37 : std::runtime_error(what_arg) { }
38};
39
40/**
41 * An ELF file.
42 *
43 * This class is internally reference counted and efficiently
44 * copyable.
45 *
46 * Raw pointers to ELF data returned by any method of this object or
47 * any object derived from this object point directly into loaded
48 * section data. Hence, callers must ensure that the loader passed to
49 * this file remains live as long as any such pointer is in use.
50 * Keeping any object that can return such a pointer live is
51 * sufficieint to keep the loader live.
52 */
53class elf
54{
55public:
56 /**
57 * Construct an ELF file that is backed by data read from the
58 * given loader.
59 */
60 explicit elf(const std::shared_ptr<loader> &l);
61
62 /**
63 * Construct an ELF file that is initially not valid. Calling
64 * methods other than operator= and valid on this results in
65 * undefined behavior.
66 */
67 elf() = default;
68 elf(const elf &o) = default;
69 elf(elf &&o) = default;
70
71 elf& operator=(const elf &o) = default;
72
73 bool valid() const
74 {
75 return !!m;
76 }
77
78 /**
79 * Return the ELF file header in canonical form (ELF64 in
80 * native byte order).
81 */
82 const Ehdr<> &get_hdr() const;
83
84 /**
85 * Return the loader used by this file.
86 */
87 std::shared_ptr<loader> get_loader() const;
88
89 /**
90 * Return the segments in this file.
91 */
92 const std::vector<segment> &segments() const;
93
94 /**
95 * Return the segment at the given index. If no such segment
96 * is found, return an invalid segment.
97 */
98 const segment &get_segment(unsigned index) const;
99
100 /**
101 * Return the sections in this file.
102 */
103 const std::vector<section> &sections() const;
104
105 /**
106 * Return the section with the specified name. If no such
107 * section is found, return an invalid section.
108 */
109 const section &get_section(const std::string &name) const;
110
111 /**
112 * Return the section at the given index. If no such section
113 * is found, return an invalid section.
114 */
115 const section &get_section(unsigned index) const;
116
117private:
118 struct impl;
119 std::shared_ptr<impl> m;
120};
121
122/**
123 * An interface for loading sections of an ELF file.
124 */
125class loader
126{
127public:
128 virtual ~loader() { }
129
130 /**
131 * Load the requested file section into memory and return a
132 * pointer to the beginning of it. This memory must remain
133 * valid and unchanged until the loader is destroyed. If the
134 * loader cannot satisfy the full request for any reason
135 * (including a premature EOF), it must throw an exception.
136 */
137 virtual const void *load(off_t offset, size_t size) = 0;
138};
139
140/**
141 * An mmap-based loader that maps requested sections on demand. This
142 * will close fd when done, so the caller should dup the file
143 * descriptor if it intends to continue using it.
144 */
145std::shared_ptr<loader> create_mmap_loader(int fd);
146
147/**
148 * An exception indicating that a section is not of the requested type.
149 */
150class section_type_mismatch : public std::logic_error
151{
152public:
153 explicit section_type_mismatch(const std::string &what_arg)
154 : std::logic_error(what_arg) { }
155 explicit section_type_mismatch(const char *what_arg)
156 : std::logic_error(what_arg) { }
157};
158
159/**
160 * An ELF segment.
161 *
162 * This class is internally reference counted and efficiently
163 * copyable.
164 */
165class segment
166{
167public:
168 /**
169 * Construct a segment that is initially not valid. Calling
170 * methods other than operator= and valid on this results in
171 * undefined behavior.
172 */
173 segment() { }
174
175 segment(const elf &f, const void *hdr);
176 segment(const segment &o) = default;
177 segment(segment &&o) = default;
178
179 /**
180 * Return true if this segment is valid and corresponds to a
181 * segment in the ELF file.
182 */
183 bool valid() const
184 {
185 return !!m;
186 }
187
188 /**
189 * Return the ELF section header in canonical form (ELF64 in
190 * native byte order).
191 */
192 const Phdr<> &get_hdr() const;
193
194 /**
195 * Return this segment's data. The returned buffer will
196 * be file_size() bytes long.
197 */
198 const void *data() const;
199
200 /**
201 * Return the on disk size of this segment in bytes.
202 */
203 size_t file_size() const;
204
205 /**
206 * Return the in-memory size of this segment in bytes.
207 * Bytes between file_size() and mem_size() are implicity zeroes.
208 */
209 size_t mem_size() const;
210
211private:
212 struct impl;
213 std::shared_ptr<impl> m;
214};
215
216/**
217 * An ELF section.
218 *
219 * This class is internally reference counted and efficiently
220 * copyable.
221 */
222class section
223{
224public:
225 /**
226 * Construct a section that is initially not valid. Calling
227 * methods other than operator= and valid on this results in
228 * undefined behavior.
229 */
230 section() { }
231
232 section(const elf &f, const void *hdr);
233 section(const section &o) = default;
234 section(section &&o) = default;
235
236 /**
237 * Return true if this section is valid and corresponds to a
238 * section in the ELF file.
239 */
240 bool valid() const
241 {
242 return !!m;
243 }
244
245 /**
246 * Return the ELF section header in canonical form (ELF64 in
247 * native byte order).
248 */
249 const Shdr<> &get_hdr() const;
250
251 /**
252 * Return this section's name.
253 */
254 const char *get_name(size_t *len_out) const;
255 /**
256 * Return this section's name. The returned string copies its
257 * data, so loader liveness requirements don't apply.
258 */
259 std::string get_name() const;
260
261 /**
262 * Return this section's data. If this is a NOBITS section,
263 * return nullptr.
264 */
265 const void *data() const;
266 /**
267 * Return the size of this section in bytes.
268 */
269 size_t size() const;
270
271 /**
272 * Return this section as a strtab. Throws
273 * section_type_mismatch if this section is not a string
274 * table.
275 */
276 strtab as_strtab() const;
277
278 /**
279 * Return this section as a symtab. Throws
280 * section_type_mismatch if this section is not a symbol
281 * table.
282 */
283 symtab as_symtab() const;
284
285private:
286 struct impl;
287 std::shared_ptr<impl> m;
288};
289
290/**
291 * A string table.
292 *
293 * This class is internally reference counted and efficiently
294 * copyable.
295 */
296class strtab
297{
298public:
299 /**
300 * Construct a strtab that is initially not valid. Calling
301 * methods other than operator= and valid on this results in
302 * undefined behavior.
303 */
304 strtab() = default;
305 strtab(elf f, const void *data, size_t size);
306
307 bool valid() const
308 {
309 return !!m;
310 }
311
312 /**
313 * Return the string at the given offset in this string table.
314 * If the offset is out of bounds, throws std::range_error.
315 * This is very efficient since the returned pointer points
316 * directly into the loaded section, though this still
317 * verifies that the returned string is NUL-terminated.
318 */
319 const char *get(Elf64::Off offset, size_t *len_out) const;
320 /**
321 * Return the string at the given offset in this string table.
322 */
323 std::string get(Elf64::Off offset) const;
324
325private:
326 struct impl;
327 std::shared_ptr<impl> m;
328};
329
330/**
331 * A symbol from a symbol table.
332 */
333class sym
334{
335 const strtab strs;
336 Sym<> data;
337
338public:
339 sym(elf f, const void *data, strtab strs);
340
341 /**
342 * Return this symbol's raw data.
343 */
344 const Sym<> &get_data() const
345 {
346 return data;
347 }
348
349 /**
350 * Return this symbol's name.
351 *
352 * This returns a pointer into the string table and, as such,
353 * is very efficient. If len_out is non-nullptr, *len_out
354 * will be set the length of the returned string.
355 */
356 const char *get_name(size_t *len_out) const;
357
358 /**
359 * Return this symbol's name as a string.
360 */
361 std::string get_name() const;
362};
363
364/**
365 * A symbol table.
366 *
367 * This class is internally reference counted and efficiently
368 * copyable.
369 */
370class symtab
371{
372public:
373 /**
374 * Construct a symtab that is initially not valid. Calling
375 * methods other than operator= and valid on this results in
376 * undefined behavior.
377 */
378 symtab() = default;
379 symtab(elf f, const void *data, size_t size, strtab strs);
380
381 bool valid() const
382 {
383 return !!m;
384 }
385
386 class iterator
387 {
388 const elf f;
389 const strtab strs;
390 const char *pos;
391 size_t stride;
392
393 iterator(const symtab &tab, const char *pos);
394 friend class symtab;
395
396 public:
397 sym operator*() const
398 {
399 return sym(f, pos, strs);
400 }
401
402 iterator& operator++()
403 {
404 return *this += 1;
405 }
406
407 iterator operator++(int)
408 {
409 iterator cur(*this);
410 *this += 1;
411 return cur;
412 }
413
414 iterator& operator+=(std::ptrdiff_t x)
415 {
416 pos += x * stride;
417 return *this;
418 }
419
420 iterator& operator-=(std::ptrdiff_t x)
421 {
422 pos -= x * stride;
423 return *this;
424 }
425
426 bool operator==(iterator &o) const
427 {
428 return pos == o.pos;
429 }
430
431 bool operator!=(iterator &o) const
432 {
433 return pos != o.pos;
434 }
435 };
436
437 /**
438 * Return an iterator to the first symbol.
439 */
440 iterator begin() const;
441
442 /**
443 * Return an iterator just past the last symbol.
444 */
445 iterator end() const;
446
447private:
448 struct impl;
449 std::shared_ptr<impl> m;
450};
451
452ELFPP_END_NAMESPACE
453
454#endif
455