| 1 | // Copyright (c) 2013 Austin T. Clements. All rights reserved. |
| 2 | // Use of this source code is governed by an MIT license |
| 3 | // that can be found in the LICENSE file. |
| 4 | |
| 5 | #ifndef _ELFPP_HH_ |
| 6 | #define _ELFPP_HH_ |
| 7 | |
| 8 | #include "common.hh" |
| 9 | #include "data.hh" |
| 10 | |
| 11 | #include <cstddef> |
| 12 | #include <memory> |
| 13 | #include <stdexcept> |
| 14 | #include <vector> |
| 15 | |
| 16 | ELFPP_BEGIN_NAMESPACE |
| 17 | |
| 18 | class elf; |
| 19 | class loader; |
| 20 | class section; |
| 21 | class strtab; |
| 22 | class symtab; |
| 23 | class segment; |
| 24 | // XXX Audit for binary compatibility |
| 25 | |
| 26 | // XXX Segments, other section types |
| 27 | |
| 28 | /** |
| 29 | * An exception indicating malformed ELF data. |
| 30 | */ |
| 31 | class format_error : public std::runtime_error |
| 32 | { |
| 33 | public: |
| 34 | explicit format_error(const std::string &what_arg) |
| 35 | : std::runtime_error(what_arg) { } |
| 36 | explicit format_error(const char *what_arg) |
| 37 | : std::runtime_error(what_arg) { } |
| 38 | }; |
| 39 | |
| 40 | /** |
| 41 | * An ELF file. |
| 42 | * |
| 43 | * This class is internally reference counted and efficiently |
| 44 | * copyable. |
| 45 | * |
| 46 | * Raw pointers to ELF data returned by any method of this object or |
| 47 | * any object derived from this object point directly into loaded |
| 48 | * section data. Hence, callers must ensure that the loader passed to |
| 49 | * this file remains live as long as any such pointer is in use. |
| 50 | * Keeping any object that can return such a pointer live is |
| 51 | * sufficieint to keep the loader live. |
| 52 | */ |
| 53 | class elf |
| 54 | { |
| 55 | public: |
| 56 | /** |
| 57 | * Construct an ELF file that is backed by data read from the |
| 58 | * given loader. |
| 59 | */ |
| 60 | explicit elf(const std::shared_ptr<loader> &l); |
| 61 | |
| 62 | /** |
| 63 | * Construct an ELF file that is initially not valid. Calling |
| 64 | * methods other than operator= and valid on this results in |
| 65 | * undefined behavior. |
| 66 | */ |
| 67 | elf() = default; |
| 68 | elf(const elf &o) = default; |
| 69 | elf(elf &&o) = default; |
| 70 | |
| 71 | elf& operator=(const elf &o) = default; |
| 72 | |
| 73 | bool valid() const |
| 74 | { |
| 75 | return !!m; |
| 76 | } |
| 77 | |
| 78 | /** |
| 79 | * Return the ELF file header in canonical form (ELF64 in |
| 80 | * native byte order). |
| 81 | */ |
| 82 | const Ehdr<> &get_hdr() const; |
| 83 | |
| 84 | /** |
| 85 | * Return the loader used by this file. |
| 86 | */ |
| 87 | std::shared_ptr<loader> get_loader() const; |
| 88 | |
| 89 | /** |
| 90 | * Return the segments in this file. |
| 91 | */ |
| 92 | const std::vector<segment> &segments() const; |
| 93 | |
| 94 | /** |
| 95 | * Return the segment at the given index. If no such segment |
| 96 | * is found, return an invalid segment. |
| 97 | */ |
| 98 | const segment &get_segment(unsigned index) const; |
| 99 | |
| 100 | /** |
| 101 | * Return the sections in this file. |
| 102 | */ |
| 103 | const std::vector<section> §ions() const; |
| 104 | |
| 105 | /** |
| 106 | * Return the section with the specified name. If no such |
| 107 | * section is found, return an invalid section. |
| 108 | */ |
| 109 | const section &get_section(const std::string &name) const; |
| 110 | |
| 111 | /** |
| 112 | * Return the section at the given index. If no such section |
| 113 | * is found, return an invalid section. |
| 114 | */ |
| 115 | const section &get_section(unsigned index) const; |
| 116 | |
| 117 | private: |
| 118 | struct impl; |
| 119 | std::shared_ptr<impl> m; |
| 120 | }; |
| 121 | |
| 122 | /** |
| 123 | * An interface for loading sections of an ELF file. |
| 124 | */ |
| 125 | class loader |
| 126 | { |
| 127 | public: |
| 128 | virtual ~loader() { } |
| 129 | |
| 130 | /** |
| 131 | * Load the requested file section into memory and return a |
| 132 | * pointer to the beginning of it. This memory must remain |
| 133 | * valid and unchanged until the loader is destroyed. If the |
| 134 | * loader cannot satisfy the full request for any reason |
| 135 | * (including a premature EOF), it must throw an exception. |
| 136 | */ |
| 137 | virtual const void *load(off_t offset, size_t size) = 0; |
| 138 | }; |
| 139 | |
| 140 | /** |
| 141 | * An mmap-based loader that maps requested sections on demand. This |
| 142 | * will close fd when done, so the caller should dup the file |
| 143 | * descriptor if it intends to continue using it. |
| 144 | */ |
| 145 | std::shared_ptr<loader> create_mmap_loader(int fd); |
| 146 | |
| 147 | /** |
| 148 | * An exception indicating that a section is not of the requested type. |
| 149 | */ |
| 150 | class section_type_mismatch : public std::logic_error |
| 151 | { |
| 152 | public: |
| 153 | explicit section_type_mismatch(const std::string &what_arg) |
| 154 | : std::logic_error(what_arg) { } |
| 155 | explicit section_type_mismatch(const char *what_arg) |
| 156 | : std::logic_error(what_arg) { } |
| 157 | }; |
| 158 | |
| 159 | /** |
| 160 | * An ELF segment. |
| 161 | * |
| 162 | * This class is internally reference counted and efficiently |
| 163 | * copyable. |
| 164 | */ |
| 165 | class segment |
| 166 | { |
| 167 | public: |
| 168 | /** |
| 169 | * Construct a segment that is initially not valid. Calling |
| 170 | * methods other than operator= and valid on this results in |
| 171 | * undefined behavior. |
| 172 | */ |
| 173 | segment() { } |
| 174 | |
| 175 | segment(const elf &f, const void *hdr); |
| 176 | segment(const segment &o) = default; |
| 177 | segment(segment &&o) = default; |
| 178 | |
| 179 | /** |
| 180 | * Return true if this segment is valid and corresponds to a |
| 181 | * segment in the ELF file. |
| 182 | */ |
| 183 | bool valid() const |
| 184 | { |
| 185 | return !!m; |
| 186 | } |
| 187 | |
| 188 | /** |
| 189 | * Return the ELF section header in canonical form (ELF64 in |
| 190 | * native byte order). |
| 191 | */ |
| 192 | const Phdr<> &get_hdr() const; |
| 193 | |
| 194 | /** |
| 195 | * Return this segment's data. The returned buffer will |
| 196 | * be file_size() bytes long. |
| 197 | */ |
| 198 | const void *data() const; |
| 199 | |
| 200 | /** |
| 201 | * Return the on disk size of this segment in bytes. |
| 202 | */ |
| 203 | size_t file_size() const; |
| 204 | |
| 205 | /** |
| 206 | * Return the in-memory size of this segment in bytes. |
| 207 | * Bytes between file_size() and mem_size() are implicity zeroes. |
| 208 | */ |
| 209 | size_t mem_size() const; |
| 210 | |
| 211 | private: |
| 212 | struct impl; |
| 213 | std::shared_ptr<impl> m; |
| 214 | }; |
| 215 | |
| 216 | /** |
| 217 | * An ELF section. |
| 218 | * |
| 219 | * This class is internally reference counted and efficiently |
| 220 | * copyable. |
| 221 | */ |
| 222 | class section |
| 223 | { |
| 224 | public: |
| 225 | /** |
| 226 | * Construct a section that is initially not valid. Calling |
| 227 | * methods other than operator= and valid on this results in |
| 228 | * undefined behavior. |
| 229 | */ |
| 230 | section() { } |
| 231 | |
| 232 | section(const elf &f, const void *hdr); |
| 233 | section(const section &o) = default; |
| 234 | section(section &&o) = default; |
| 235 | |
| 236 | /** |
| 237 | * Return true if this section is valid and corresponds to a |
| 238 | * section in the ELF file. |
| 239 | */ |
| 240 | bool valid() const |
| 241 | { |
| 242 | return !!m; |
| 243 | } |
| 244 | |
| 245 | /** |
| 246 | * Return the ELF section header in canonical form (ELF64 in |
| 247 | * native byte order). |
| 248 | */ |
| 249 | const Shdr<> &get_hdr() const; |
| 250 | |
| 251 | /** |
| 252 | * Return this section's name. |
| 253 | */ |
| 254 | const char *get_name(size_t *len_out) const; |
| 255 | /** |
| 256 | * Return this section's name. The returned string copies its |
| 257 | * data, so loader liveness requirements don't apply. |
| 258 | */ |
| 259 | std::string get_name() const; |
| 260 | |
| 261 | /** |
| 262 | * Return this section's data. If this is a NOBITS section, |
| 263 | * return nullptr. |
| 264 | */ |
| 265 | const void *data() const; |
| 266 | /** |
| 267 | * Return the size of this section in bytes. |
| 268 | */ |
| 269 | size_t size() const; |
| 270 | |
| 271 | /** |
| 272 | * Return this section as a strtab. Throws |
| 273 | * section_type_mismatch if this section is not a string |
| 274 | * table. |
| 275 | */ |
| 276 | strtab as_strtab() const; |
| 277 | |
| 278 | /** |
| 279 | * Return this section as a symtab. Throws |
| 280 | * section_type_mismatch if this section is not a symbol |
| 281 | * table. |
| 282 | */ |
| 283 | symtab as_symtab() const; |
| 284 | |
| 285 | private: |
| 286 | struct impl; |
| 287 | std::shared_ptr<impl> m; |
| 288 | }; |
| 289 | |
| 290 | /** |
| 291 | * A string table. |
| 292 | * |
| 293 | * This class is internally reference counted and efficiently |
| 294 | * copyable. |
| 295 | */ |
| 296 | class strtab |
| 297 | { |
| 298 | public: |
| 299 | /** |
| 300 | * Construct a strtab that is initially not valid. Calling |
| 301 | * methods other than operator= and valid on this results in |
| 302 | * undefined behavior. |
| 303 | */ |
| 304 | strtab() = default; |
| 305 | strtab(elf f, const void *data, size_t size); |
| 306 | |
| 307 | bool valid() const |
| 308 | { |
| 309 | return !!m; |
| 310 | } |
| 311 | |
| 312 | /** |
| 313 | * Return the string at the given offset in this string table. |
| 314 | * If the offset is out of bounds, throws std::range_error. |
| 315 | * This is very efficient since the returned pointer points |
| 316 | * directly into the loaded section, though this still |
| 317 | * verifies that the returned string is NUL-terminated. |
| 318 | */ |
| 319 | const char *get(Elf64::Off offset, size_t *len_out) const; |
| 320 | /** |
| 321 | * Return the string at the given offset in this string table. |
| 322 | */ |
| 323 | std::string get(Elf64::Off offset) const; |
| 324 | |
| 325 | private: |
| 326 | struct impl; |
| 327 | std::shared_ptr<impl> m; |
| 328 | }; |
| 329 | |
| 330 | /** |
| 331 | * A symbol from a symbol table. |
| 332 | */ |
| 333 | class sym |
| 334 | { |
| 335 | const strtab strs; |
| 336 | Sym<> data; |
| 337 | |
| 338 | public: |
| 339 | sym(elf f, const void *data, strtab strs); |
| 340 | |
| 341 | /** |
| 342 | * Return this symbol's raw data. |
| 343 | */ |
| 344 | const Sym<> &get_data() const |
| 345 | { |
| 346 | return data; |
| 347 | } |
| 348 | |
| 349 | /** |
| 350 | * Return this symbol's name. |
| 351 | * |
| 352 | * This returns a pointer into the string table and, as such, |
| 353 | * is very efficient. If len_out is non-nullptr, *len_out |
| 354 | * will be set the length of the returned string. |
| 355 | */ |
| 356 | const char *get_name(size_t *len_out) const; |
| 357 | |
| 358 | /** |
| 359 | * Return this symbol's name as a string. |
| 360 | */ |
| 361 | std::string get_name() const; |
| 362 | }; |
| 363 | |
| 364 | /** |
| 365 | * A symbol table. |
| 366 | * |
| 367 | * This class is internally reference counted and efficiently |
| 368 | * copyable. |
| 369 | */ |
| 370 | class symtab |
| 371 | { |
| 372 | public: |
| 373 | /** |
| 374 | * Construct a symtab that is initially not valid. Calling |
| 375 | * methods other than operator= and valid on this results in |
| 376 | * undefined behavior. |
| 377 | */ |
| 378 | symtab() = default; |
| 379 | symtab(elf f, const void *data, size_t size, strtab strs); |
| 380 | |
| 381 | bool valid() const |
| 382 | { |
| 383 | return !!m; |
| 384 | } |
| 385 | |
| 386 | class iterator |
| 387 | { |
| 388 | const elf f; |
| 389 | const strtab strs; |
| 390 | const char *pos; |
| 391 | size_t stride; |
| 392 | |
| 393 | iterator(const symtab &tab, const char *pos); |
| 394 | friend class symtab; |
| 395 | |
| 396 | public: |
| 397 | sym operator*() const |
| 398 | { |
| 399 | return sym(f, pos, strs); |
| 400 | } |
| 401 | |
| 402 | iterator& operator++() |
| 403 | { |
| 404 | return *this += 1; |
| 405 | } |
| 406 | |
| 407 | iterator operator++(int) |
| 408 | { |
| 409 | iterator cur(*this); |
| 410 | *this += 1; |
| 411 | return cur; |
| 412 | } |
| 413 | |
| 414 | iterator& operator+=(std::ptrdiff_t x) |
| 415 | { |
| 416 | pos += x * stride; |
| 417 | return *this; |
| 418 | } |
| 419 | |
| 420 | iterator& operator-=(std::ptrdiff_t x) |
| 421 | { |
| 422 | pos -= x * stride; |
| 423 | return *this; |
| 424 | } |
| 425 | |
| 426 | bool operator==(iterator &o) const |
| 427 | { |
| 428 | return pos == o.pos; |
| 429 | } |
| 430 | |
| 431 | bool operator!=(iterator &o) const |
| 432 | { |
| 433 | return pos != o.pos; |
| 434 | } |
| 435 | }; |
| 436 | |
| 437 | /** |
| 438 | * Return an iterator to the first symbol. |
| 439 | */ |
| 440 | iterator begin() const; |
| 441 | |
| 442 | /** |
| 443 | * Return an iterator just past the last symbol. |
| 444 | */ |
| 445 | iterator end() const; |
| 446 | |
| 447 | private: |
| 448 | struct impl; |
| 449 | std::shared_ptr<impl> m; |
| 450 | }; |
| 451 | |
| 452 | ELFPP_END_NAMESPACE |
| 453 | |
| 454 | #endif |
| 455 | |