1// Copyright (c) 2013 Austin T. Clements. All rights reserved.
2// Use of this source code is governed by an MIT license
3// that can be found in the LICENSE file.
4
5#ifndef _DWARFPP_HH_
6#define _DWARFPP_HH_
7
8#ifndef DWARFPP_BEGIN_NAMESPACE
9#define DWARFPP_BEGIN_NAMESPACE namespace dwarf {
10#define DWARFPP_END_NAMESPACE }
11#endif
12
13#include "data.hh"
14#include "small_vector.hh"
15
16#include <initializer_list>
17#include <map>
18#include <memory>
19#include <stdexcept>
20#include <string>
21#include <vector>
22
23DWARFPP_BEGIN_NAMESPACE
24
25// Forward declarations
26class dwarf;
27class loader;
28class compilation_unit;
29class type_unit;
30class die;
31class value;
32class expr;
33class expr_context;
34class expr_result;
35class rangelist;
36class line_table;
37
38// Internal type forward-declarations
39struct section;
40struct abbrev_entry;
41struct cursor;
42
43// XXX Audit for binary-compatibility
44
45// XXX Might be able to reduce private coupling by making class
46// section public (and clean it up and maybe rename it slice) and
47// provide methods to get the backing data of things.
48//
49// XXX Make slice generic, without formatting information? Still want
50// lightweight cursors, so maybe the cursor methods that need the
51// format should take a const reference to a format stored in the
52// compilation unit?
53
54// XXX operator==/!= and hash functions
55
56// XXX Indicate DWARF4 in all spec references
57
58// XXX Big missing support: .debug_aranges, .debug_frame, loclists,
59// macros
60
61//////////////////////////////////////////////////////////////////
62// DWARF files
63//
64
65/**
66 * An exception indicating malformed DWARF data.
67 */
68class format_error : public std::runtime_error
69{
70public:
71 explicit format_error(const std::string &what_arg)
72 : std::runtime_error(what_arg) { }
73 explicit format_error(const char *what_arg)
74 : std::runtime_error(what_arg) { }
75};
76
77/**
78 * DWARF section types. These correspond to the names of ELF
79 * sections, though DWARF can be embedded in other formats.
80 */
81enum class section_type
82{
83 abbrev,
84 aranges,
85 frame,
86 info,
87 line,
88 loc,
89 macinfo,
90 pubnames,
91 pubtypes,
92 ranges,
93 str,
94 types,
95};
96
97std::string
98to_string(section_type v);
99
100/**
101 * A DWARF file. This class is internally reference counted and can
102 * be efficiently copied.
103 *
104 * Objects retrieved from this object may depend on it; the caller is
105 * responsible for keeping this object live as long as any retrieved
106 * object may be in use.
107 */
108class dwarf
109{
110public:
111 /**
112 * Construct a DWARF file that is backed by sections read from
113 * the given loader.
114 */
115 explicit dwarf(const std::shared_ptr<loader> &l);
116
117 /**
118 * Construct a DWARF file that is initially not valid.
119 */
120 dwarf() = default;
121 dwarf(const dwarf&) = default;
122 dwarf(dwarf&&) = default;
123 ~dwarf();
124
125 dwarf& operator=(const dwarf &o) = default;
126 dwarf& operator=(dwarf &&o) = default;
127
128 bool operator==(const dwarf &o) const
129 {
130 return m == o.m;
131 }
132
133 bool operator!=(const dwarf &o) const
134 {
135 return m != o.m;
136 }
137
138 /**
139 * Return true if this object represents a DWARF file.
140 * Default constructed dwarf objects are not valid.
141 */
142 bool valid() const
143 {
144 return !!m;
145 }
146
147 // XXX This allows the compilation units to be modified and
148 // ties us to a vector. Probably should return an opaque
149 // iterable collection over const references.
150 /**
151 * Return the list of compilation units in this DWARF file.
152 */
153 const std::vector<compilation_unit> &compilation_units() const;
154
155 /**
156 * Return the type unit with the given signature. If the
157 * signature does not correspond to a type unit, throws
158 * out_of_range.
159 */
160 const type_unit &get_type_unit(uint64_t type_signature) const;
161
162 /**
163 * \internal Retrieve the specified section from this file.
164 * If the section does not exist, throws format_error.
165 */
166 std::shared_ptr<section> get_section(section_type type) const;
167
168private:
169 struct impl;
170 std::shared_ptr<impl> m;
171};
172
173/**
174 * An interface for lazily loading DWARF sections.
175 */
176class loader
177{
178public:
179 virtual ~loader() { }
180
181 /**
182 * Load the requested DWARF section into memory and return a
183 * pointer to the beginning of it. This memory must remain
184 * valid and unchanged until the loader is destroyed. If the
185 * requested section does not exist, this should return
186 * nullptr. If the section exists but cannot be loaded for
187 * any reason, this should throw an exception.
188 */
189 virtual const void *load(section_type section, size_t *size_out) = 0;
190};
191
192/**
193 * The base class for a compilation unit or type unit within a DWARF
194 * file. A unit consists of a rooted tree of DIEs, plus additional
195 * metadata that depends on the type of unit.
196 */
197class unit
198{
199public:
200 virtual ~unit() = 0;
201
202 bool operator==(const unit &o) const
203 {
204 return m == o.m;
205 }
206
207 bool operator!=(const unit &o) const
208 {
209 return m != o.m;
210 }
211
212 /**
213 * Return true if this object is valid. Default constructed
214 * unit objects are not valid.
215 */
216 bool valid() const
217 {
218 return !!m;
219 }
220
221 /**
222 * Return the dwarf file this unit is in.
223 */
224 const dwarf &get_dwarf() const;
225
226 /**
227 * Return the byte offset of this unit's header in its
228 * section (.debug_info or .debug_types).
229 */
230 section_offset get_section_offset() const;
231
232 /**
233 * Return the root DIE of this unit. For a compilation unit,
234 * this should be a DW_TAG::compilation_unit or
235 * DW_TAG::partial_unit.
236 */
237 const die &root() const;
238
239 /**
240 * \internal Return the data for this unit.
241 */
242 const std::shared_ptr<section> &data() const;
243
244 /**
245 * \internal Return the abbrev for the specified abbrev
246 * code.
247 */
248 const abbrev_entry &get_abbrev(std::uint64_t acode) const;
249
250protected:
251 friend struct ::std::hash<unit>;
252 struct impl;
253 std::shared_ptr<impl> m;
254};
255
256/**
257 * A compilation unit within a DWARF file. Most of the information
258 * in a DWARF file is divided up by compilation unit. This class is
259 * internally reference counted and can be efficiently copied.
260 */
261class compilation_unit : public unit
262{
263public:
264 compilation_unit() = default;
265 compilation_unit(const compilation_unit &o) = default;
266 compilation_unit(compilation_unit &&o) = default;
267
268 compilation_unit& operator=(const compilation_unit &o) = default;
269 compilation_unit& operator=(compilation_unit &&o) = default;
270
271 /**
272 * \internal Construct a compilation unit whose header begins
273 * offset bytes into the .debug_info section of file.
274 */
275 compilation_unit(const dwarf &file, section_offset offset);
276
277 /**
278 * Return the line number table of this compilation unit.
279 * Returns an invalid line table if this unit has no line
280 * table.
281 */
282 const line_table &get_line_table() const;
283};
284
285/**
286 * A type unit. Type units allow complex type information to be
287 * shared between compilation units.
288 */
289class type_unit : public unit
290{
291public:
292 type_unit() = default;
293 type_unit(const type_unit &o) = default;
294 type_unit(type_unit &&o) = default;
295
296 type_unit &operator=(const type_unit &o) = default;
297 type_unit &operator=(type_unit &&o) = default;
298
299 /**
300 * \internal Construct a type unit whose header begins offset
301 * bytes into the .debug_types section of file.
302 */
303 type_unit(const dwarf &file, section_offset offset);
304
305 /**
306 * Return the 64-bit unique signature that identifies this
307 * type unit. This is how DIEs from other units refer to type
308 * described by this unit.
309 */
310 uint64_t get_type_signature() const;
311
312 // XXX Can a type unit contain more than one top-level DIE?
313 // The description of type_offset makes it sound like it
314 // might.
315
316 /**
317 * Return the DIE of the type described by this type unit.
318 * This may not be the root DIE of this unit if the type is
319 * nested in namespaces or other structures.
320 */
321 const die &type() const;
322};
323
324//////////////////////////////////////////////////////////////////
325// Debugging information entries (DIEs)
326//
327
328/**
329 * A Debugging Information Entry, or DIE. The basic unit of
330 * information in a DWARF file.
331 */
332class die
333{
334 // XXX Make this class better for use in maps. Currently dies
335 // are fairly big and expensive to copy, but most of that
336 // information can be constructed lazily. This is also bad
337 // for use in caches since it will keep the DWARF file alive.
338 // OTOH, maybe caches need eviction anyway.
339public:
340 DW_TAG tag;
341
342 die() : cu(nullptr), abbrev(nullptr) { }
343 die(const die &o) = default;
344 die(die &&o) = default;
345
346 die& operator=(const die &o) = default;
347 die& operator=(die &&o) = default;
348
349 /**
350 * Return true if this object represents a DIE in a DWARF
351 * file. Default constructed objects are not valid and some
352 * methods return invalid DIEs to indicate failures.
353 */
354 bool valid() const
355 {
356 return abbrev != nullptr;
357 }
358
359 /**
360 * Return the unit containing this DIE.
361 */
362 const unit &get_unit() const;
363
364 /**
365 * Return this DIE's byte offset within its compilation unit.
366 */
367 section_offset get_unit_offset() const
368 {
369 return offset;
370 }
371
372 /**
373 * Return this DIE's byte offset within its section.
374 */
375 section_offset get_section_offset() const;
376
377 /**
378 * Return true if this DIE has the requested attribute.
379 */
380 bool has(DW_AT attr) const;
381
382 /**
383 * Return the value of attr. Throws out_of_range if this DIE
384 * does not have the specified attribute. It is generally
385 * better to use the type-safe attribute getters (the global
386 * functions beginning with at_*) when possible.
387 */
388 value operator[](DW_AT attr) const;
389
390 /**
391 * Return the value of attr after resolving specification and
392 * abstract origin references. If the attribute cannot be
393 * resolved, returns an invalid value. Declaration DIEs can
394 * "complete" a previous non-defining declaration DIE and
395 * similarly inherit the non-defining declaration's attributes
396 * (DWARF4 section 2.13) Likewise, any DIE that is a child of
397 * a concrete inlined instance can specify another DIE as its
398 * "abstract origin" and the original DIE will inherit the
399 * attributes of its abstract origin (DWARF4 section 3.3.8.2).
400 */
401 value resolve(DW_AT attr) const;
402
403 class iterator;
404
405 /**
406 * Return an iterator over the children of this DIE. Note
407 * that the DIEs returned by this iterator are temporary, so
408 * if you need to store a DIE for more than one loop
409 * iteration, you must copy it.
410 */
411 iterator begin() const;
412 iterator end() const;
413
414 /**
415 * Return a vector of the attributes of this DIE.
416 */
417 const std::vector<std::pair<DW_AT, value> > attributes() const;
418
419 bool operator==(const die &o) const;
420 bool operator!=(const die &o) const;
421
422private:
423 friend class unit;
424 friend class type_unit;
425 friend class value;
426 // XXX If we can get the CU, we don't need this
427 friend struct ::std::hash<die>;
428
429 const unit *cu;
430 // The abbrev of this DIE. By convention, if this DIE
431 // represents a sibling list terminator, this is null. This
432 // object is kept live by the CU.
433 const abbrev_entry *abbrev;
434 // The beginning of this DIE, relative to the CU.
435 section_offset offset;
436 // Offsets of attributes, relative to cu's subsection. The
437 // vast majority of DIEs tend to have six or fewer attributes,
438 // so we reserve space in the DIE itself for six attributes.
439 small_vector<section_offset, 6> attrs;
440 // The offset of the next DIE, relative to cu'd subsection.
441 // This is set even for sibling list terminators.
442 section_offset next;
443
444 die(const unit *cu);
445
446 /**
447 * Read this DIE from the given offset in cu.
448 */
449 void read(section_offset off);
450};
451
452/**
453 * An iterator over a sequence of sibling DIEs.
454 */
455class die::iterator
456{
457public:
458 iterator() = default;
459 iterator(const iterator &o) = default;
460 iterator(iterator &&o) = default;
461
462 iterator& operator=(const iterator &o) = default;
463 iterator& operator=(iterator &&o) = default;
464
465 const die &operator*() const
466 {
467 return d;
468 }
469
470 const die *operator->() const
471 {
472 return &d;
473 }
474
475 // XXX Make this less confusing by implementing operator== instead
476 bool operator!=(const iterator &o) const
477 {
478 // Quick test of abbrevs. In particular, this weeds
479 // out non-end against end, which is a common
480 // comparison while iterating, though it also weeds
481 // out many other things.
482 if (d.abbrev != o.d.abbrev)
483 return true;
484
485 // Same, possibly NULL abbrev. If abbrev is NULL,
486 // then next's are uncomparable, so we need to stop
487 // now. We consider all ends to be the same, without
488 // comparing cu's.
489 if (d.abbrev == nullptr)
490 return false;
491
492 // Comparing two non-end abbrevs.
493 return d.next != o.d.next || d.cu != o.d.cu;
494 }
495
496 iterator &operator++();
497
498private:
499 friend class die;
500
501 iterator(const unit *cu, section_offset off);
502
503 die d;
504};
505
506inline die::iterator
507die::end() const
508{
509 return iterator();
510}
511
512/**
513 * An exception indicating that a value is not of the requested type.
514 */
515class value_type_mismatch : public std::logic_error
516{
517public:
518 explicit value_type_mismatch(const std::string &what_arg)
519 : std::logic_error(what_arg) { }
520 explicit value_type_mismatch(const char *what_arg)
521 : std::logic_error(what_arg) { }
522};
523
524/**
525 * The value of a DIE attribute.
526 *
527 * This is logically a union of many different types. Each type has a
528 * corresponding as_* methods that will return the value as that type
529 * or throw value_type_mismatch if the attribute is not of the
530 * requested type.
531 *
532 * Values of "constant" type are somewhat ambiguous and
533 * context-dependent. Constant forms with specified signed-ness have
534 * type "uconstant" or "sconstant", while other constant forms have
535 * type "constant". If the value's type is "constant", it can be
536 * retrieved using either as_uconstant or as_sconstant.
537 *
538 * Some other types can also be coerced. These are documented on the
539 * individual as_* methods.
540 *
541 * There is no as_line; while there is an attribute for line tables,
542 * line tables are really associated with compilation units (and
543 * require additional context from the compilation unit). Use
544 * compilation_unit::get_line_table instead.
545 */
546class value
547{
548public:
549 enum class type
550 {
551 invalid,
552 address,
553 block,
554 constant,
555 uconstant,
556 sconstant,
557 exprloc,
558 flag,
559 line,
560 loclist,
561 mac,
562 rangelist,
563 reference,
564 string
565 };
566
567 /**
568 * Construct a value with type `type::invalid`.
569 */
570 value() : cu(nullptr), typ(type::invalid) { }
571
572 value(const value &o) = default;
573 value(value &&o) = default;
574
575 value& operator=(const value &o) = default;
576 value& operator=(value &&o) = default;
577
578 /**
579 * Return true if this object represents a valid value.
580 * Default constructed line tables are not valid.
581 */
582 bool valid() const
583 {
584 return typ != type::invalid;
585 }
586
587 /**
588 * Return this value's byte offset within its compilation
589 * unit.
590 */
591 section_offset get_unit_offset() const
592 {
593 return offset;
594 }
595
596 /**
597 * Return this value's byte offset within its section.
598 */
599 section_offset get_section_offset() const;
600
601 type get_type() const
602 {
603 return typ;
604 }
605
606 /**
607 * Return this value's attribute encoding. This automatically
608 * resolves indirect encodings, so this will never return
609 * DW_FORM::indirect. Note that the mapping from forms to
610 * types is non-trivial and often depends on the attribute
611 * (especially prior to DWARF 4).
612 */
613 DW_FORM get_form() const
614 {
615 return form;
616 }
617
618 /**
619 * Return this value as a target machine address.
620 */
621 taddr as_address() const;
622
623 /**
624 * Return this value as a block. The returned pointer points
625 * directly into the section data, so the caller must ensure
626 * that remains valid as long as the data is in use.
627 * *size_out is set to the length of the returned block, in
628 * bytes.
629 *
630 * This automatically coerces "exprloc" type values by
631 * returning the raw bytes of the encoded expression.
632 */
633 const void *as_block(size_t *size_out) const;
634
635 /**
636 * Return this value as an unsigned constant. This
637 * automatically coerces "constant" type values by
638 * interpreting their bytes as unsigned.
639 */
640 uint64_t as_uconstant() const;
641
642 /**
643 * Return this value as a signed constant. This automatically
644 * coerces "constant" type values by interpreting their bytes
645 * as twos-complement signed values.
646 */
647 int64_t as_sconstant() const;
648
649 /**
650 * Return this value as an expression. This automatically
651 * coerces "block" type values by interpreting the bytes in
652 * the block as an expression (prior to DWARF 4, exprlocs were
653 * always encoded as blocks, though the library automatically
654 * distinguishes these types based on context).
655 */
656 expr as_exprloc() const;
657
658 /**
659 * Return this value as a boolean flag.
660 */
661 bool as_flag() const;
662
663 // XXX loclistptr, macptr
664
665 /**
666 * Return this value as a rangelist.
667 */
668 rangelist as_rangelist() const;
669
670 /**
671 * For a reference type value, return the referenced DIE.
672 * This DIE may be in a different compilation unit or could
673 * be a DIE in a type unit.
674 */
675 die as_reference() const;
676
677 /**
678 * Return this value as a string.
679 */
680 std::string as_string() const;
681
682 /**
683 * Fill the given string buffer with the string value of this
684 * value. This is useful to minimize allocation when reading
685 * several string-type values.
686 */
687 void as_string(std::string &buf) const;
688
689 /**
690 * Return this value as a NUL-terminated character string.
691 * The returned pointer points directly into the section data,
692 * so the caller must ensure that remains valid as long as the
693 * data is in use. *size_out, if not NULL, is set to the
694 * length of the returned string without the NUL-terminator.
695 */
696 const char *as_cstr(size_t *size_out = nullptr) const;
697
698 /**
699 * Return this value as a section offset. This is applicable
700 * to lineptr, loclistptr, macptr, and rangelistptr.
701 */
702 section_offset as_sec_offset() const;
703
704private:
705 friend class die;
706
707 value(const unit *cu,
708 DW_AT name, DW_FORM form, type typ, section_offset offset);
709
710 void resolve_indirect(DW_AT name);
711
712 const unit *cu;
713 DW_FORM form;
714 type typ;
715 section_offset offset;
716};
717
718std::string
719to_string(value::type v);
720
721std::string
722to_string(const value &v);
723
724//////////////////////////////////////////////////////////////////
725// Expressions and location descriptions
726//
727
728/**
729 * An exception during expression evaluation.
730 */
731class expr_error : public std::runtime_error
732{
733public:
734 explicit expr_error(const std::string &what_arg)
735 : std::runtime_error(what_arg) { }
736 explicit expr_error(const char *what_arg)
737 : std::runtime_error(what_arg) { }
738};
739
740/**
741 * A DWARF expression or location description.
742 */
743class expr
744{
745public:
746 /**
747 * Short-hand for evaluate(ctx, {}).
748 */
749 expr_result evaluate(expr_context *ctx) const;
750
751 /**
752 * Short-hand for evaluate(ctx, {argument}).
753 */
754 expr_result evaluate(expr_context *ctx, taddr argument) const;
755
756 /**
757 * Return the result of evaluating this expression using the
758 * specified expression context. The expression stack will be
759 * initialized with the given arguments such that the first
760 * arguments is at the top of the stack and the last argument
761 * at the bottom of the stack.
762 *
763 * Throws expr_error if there is an error evaluating the
764 * expression (such as an unknown operation, stack underflow,
765 * bounds error, etc.)
766 */
767 expr_result evaluate(expr_context *ctx, const std::initializer_list<taddr> &arguments) const;
768
769private:
770 // XXX This will need more information for some operations
771 expr(const unit *cu,
772 section_offset offset, section_length len);
773
774 friend class value;
775
776 const unit *cu;
777 section_offset offset;
778 section_length len;
779};
780
781/**
782 * An interface that provides contextual information for expression
783 * evaluation. Callers of expr::evaluate are expected to subclass
784 * this in order to provide this information to the expression
785 * evaluation engine. The default implementation throws expr_error
786 * for all methods.
787 */
788class expr_context
789{
790public:
791 virtual ~expr_context() { }
792
793 /**
794 * Return the value stored in register regnum. This is used
795 * to implement DW_OP_breg* operations.
796 */
797 virtual taddr reg(unsigned regnum)
798 {
799 (void)regnum;
800 throw expr_error("DW_OP_breg* operations not supported");
801 }
802
803 /**
804 * Implement DW_OP_deref_size.
805 */
806 virtual taddr deref_size(taddr address, unsigned size)
807 {
808 (void)address;
809 (void)size;
810 throw expr_error("DW_OP_deref_size operations not supported");
811 }
812
813 /**
814 * Implement DW_OP_xderef_size.
815 */
816 virtual taddr xderef_size(taddr address, taddr asid, unsigned size)
817 {
818 (void)address;
819 (void)asid;
820 (void)size;
821 throw expr_error("DW_OP_xderef_size operations not supported");
822 }
823
824 /**
825 * Implement DW_OP_form_tls_address.
826 */
827 virtual taddr form_tls_address(taddr address)
828 {
829 (void)address;
830 throw expr_error("DW_OP_form_tls_address operations not supported");
831 }
832};
833
834/**
835 * An instance of expr_context that throws expr_error for all methods.
836 * This is equivalent to the default construction of expr_context, but
837 * often more convenient to use.
838 */
839extern expr_context no_expr_context;
840
841// XXX Provide methods to check type and fetch value?
842/**
843 * The result of evaluating a DWARF expression or location
844 * description.
845 */
846class expr_result
847{
848public:
849 enum class type {
850 /**
851 * value specifies the address in memory of an object.
852 * This is also the result type used for general
853 * expressions that do not refer to object locations.
854 */
855 address,
856 /**
857 * value specifies a register storing an object.
858 */
859 reg,
860 /**
861 * The object does not have a location. value is the
862 * value of the object.
863 */
864 literal,
865 /**
866 * The object does not have a location. Its value is
867 * pointed to by the 'implicit' field.
868 */
869 implicit,
870 /**
871 * The object is present in the source, but not in the
872 * object code, and hence does not have a location or
873 * a value.
874 */
875 empty,
876 };
877
878 /**
879 * For location descriptions, the type of location this result
880 * describes.
881 */
882 type location_type;
883
884 /**
885 * For general-purpose expressions, the result of expression.
886 * For address location descriptions, the address in memory of
887 * the object. For register location descriptions, the
888 * register storing the object. For literal location
889 * descriptions, the value of the object.
890 */
891 taddr value;
892
893 /**
894 * For implicit location descriptions, a pointer to a block
895 * representing the value in the memory representation of the
896 * target machine.
897 */
898 const char *implicit;
899 size_t implicit_len;
900
901 // XXX Composite locations
902};
903
904std::string
905to_string(expr_result::type v);
906
907//////////////////////////////////////////////////////////////////
908// Range lists
909//
910
911/**
912 * A DWARF range list describing a set of possibly non-contiguous
913 * addresses.
914 */
915class rangelist
916{
917public:
918 /**
919 * \internal Construct a range list whose data begins at the
920 * given offset in sec. cu_addr_size is the address size of
921 * the associated compilation unit. cu_low_pc is the
922 * DW_AT::low_pc attribute of the compilation unit containing
923 * the referring DIE or 0 (this is used as the base address of
924 * the range list).
925 */
926 rangelist(const std::shared_ptr<section> &sec, section_offset off,
927 unsigned cu_addr_size, taddr cu_low_pc);
928
929 /**
930 * Construct a range list from a sequence of {low, high}
931 * pairs.
932 */
933 rangelist(const std::initializer_list<std::pair<taddr, taddr> > &ranges);
934
935 /**
936 * Construct an empty range list.
937 */
938 rangelist() = default;
939
940 /** Copy constructor */
941 rangelist(const rangelist &o) = default;
942 /** Move constructor */
943 rangelist(rangelist &&o) = default;
944
945 rangelist& operator=(const rangelist &o) = default;
946 rangelist& operator=(rangelist &&o) = default;
947
948 class entry;
949 typedef entry value_type;
950
951 class iterator;
952
953 /**
954 * Return an iterator over the entries in this range list.
955 * The ranges returned by this iterator are temporary, so if
956 * you need to store a range for more than one loop iteration,
957 * you must copy it.
958 */
959 iterator begin() const;
960
961 /**
962 * Return an iterator to one past the last entry in this range
963 * list.
964 */
965 iterator end() const;
966
967 /**
968 * Return true if this range list contains the given address.
969 */
970 bool contains(taddr addr) const;
971
972private:
973 std::vector<taddr> synthetic;
974 std::shared_ptr<section> sec;
975 taddr base_addr;
976};
977
978/**
979 * An entry in a range list. The range spans addresses [low, high).
980 */
981class rangelist::entry
982{
983public:
984 taddr low, high;
985
986 /**
987 * Return true if addr is within this entry's bounds.
988 */
989 bool contains(taddr addr) const
990 {
991 return low <= addr && addr < high;
992 }
993};
994
995/**
996 * An iterator over a sequence of ranges in a range list.
997 */
998class rangelist::iterator
999{
1000public:
1001 /**
1002 * \internal Construct an end iterator.
1003 */
1004 iterator() : sec(nullptr), base_addr(0), pos(0) { }
1005
1006 /**
1007 * \internal Construct an iterator that reads rangelist data
1008 * from the beginning of the given section and starts with the
1009 * given base address.
1010 */
1011 iterator(const std::shared_ptr<section> &sec, taddr base_addr);
1012
1013 /** Copy constructor */
1014 iterator(const iterator &o) = default;
1015 /** Move constructor */
1016 iterator(iterator &&o) = default;
1017
1018 iterator& operator=(const iterator &o) = default;
1019 iterator& operator=(iterator &&o) = default;
1020
1021 /**
1022 * Return the current range list entry. This entry is reused
1023 * internally, so the caller should copy it if it needs to
1024 * persist past the next increment.
1025 */
1026 const rangelist::entry &operator*() const
1027 {
1028 return entry;
1029 }
1030
1031 /** Dereference operator */
1032 const rangelist::entry *operator->() const
1033 {
1034 return &entry;
1035 }
1036
1037 /** Equality operator */
1038 bool operator==(const iterator &o) const
1039 {
1040 return sec == o.sec && pos == o.pos;
1041 }
1042
1043 /** Inequality operator */
1044 bool operator!=(const iterator &o) const
1045 {
1046 return !(*this == o);
1047 }
1048
1049 /**
1050 * Increment this iterator to point to the next range list
1051 * entry.
1052 */
1053 iterator &operator++();
1054
1055private:
1056 std::shared_ptr<section> sec;
1057 taddr base_addr;
1058 section_offset pos;
1059 rangelist::entry entry;
1060};
1061
1062//////////////////////////////////////////////////////////////////
1063// Line number tables
1064//
1065
1066/**
1067 * A DWARF line number table. A line number table is a list of line
1068 * table entries, broken up into "sequences". Within a sequence,
1069 * entries are in order of increasing program counter ("address") and
1070 * an entry provides information for all program counters between the
1071 * entry's address and the address of the next entry. Each sequence
1072 * is terminated by a special entry with its
1073 * line_table::entry::end_sequence flag set. The line number table
1074 * also records the set of source files for a given compilation unit,
1075 * which can be referred to from other DIE attributes.
1076 */
1077class line_table
1078{
1079public:
1080 /**
1081 * \internal Construct a line number table whose header begins
1082 * at the given offset in sec. cu_addr_size is the address
1083 * size of the associated compilation unit. cu_comp_dir and
1084 * cu_name give the DW_AT::comp_dir and DW_AT::name attributes
1085 * of the associated compilation unit.
1086 */
1087 line_table(const std::shared_ptr<section> &sec, section_offset offset,
1088 unsigned cu_addr_size, const std::string &cu_comp_dir,
1089 const std::string &cu_name);
1090
1091 /**
1092 * Construct an invalid, empty line table.
1093 */
1094 line_table() = default;
1095
1096 /** Copy constructor */
1097 line_table(const line_table &o) = default;
1098 /** Move constructor */
1099 line_table(line_table &&o) = default;
1100
1101 line_table &operator=(const line_table &o) = default;
1102 line_table &operator=(line_table &&o) = default;
1103
1104 /**
1105 * Return true if this object represents an initialized line
1106 * table. Default constructed line tables are not valid.
1107 */
1108 bool valid() const
1109 {
1110 return !!m;
1111 }
1112
1113 class file;
1114 class entry;
1115 typedef entry value_type;
1116
1117 class iterator;
1118
1119 /**
1120 * Return an iterator to the beginning of this line number
1121 * table. If called on an invalid line table, this will
1122 * return an iterator equal to end().
1123 */
1124 iterator begin() const;
1125
1126 /**
1127 * Return an iterator to one past the last entry in this line
1128 * number table.
1129 */
1130 iterator end() const;
1131
1132 /**
1133 * Return an iterator to the line table entry containing addr
1134 * (roughly, the entry with the highest address less than or
1135 * equal to addr, but accounting for end_sequence entries).
1136 * Returns end() if there is no such entry.
1137 */
1138 iterator find_address(taddr addr) const;
1139
1140 /**
1141 * Return the index'th file in the line table. These indexes
1142 * are typically used by declaration and call coordinates. If
1143 * index is out of range, throws out_of_range.
1144 */
1145 const file *get_file(unsigned index) const;
1146
1147private:
1148 friend class iterator;
1149
1150 struct impl;
1151 std::shared_ptr<impl> m;
1152};
1153
1154/**
1155 * A source file in a line table.
1156 */
1157class line_table::file
1158{
1159public:
1160 /**
1161 * The absolute path of this source file.
1162 */
1163 std::string path;
1164
1165 /**
1166 * The last modification time of this source file in an
1167 * implementation-defined encoding or 0 if unknown.
1168 */
1169 uint64_t mtime;
1170
1171 /**
1172 * The size in bytes of this source file or 0 if unknown.
1173 */
1174 uint64_t length;
1175
1176 /**
1177 * Construct a source file object.
1178 */
1179 file(std::string path = "", uint64_t mtime = 0, uint64_t length = 0);
1180};
1181
1182/**
1183 * An entry in the line table.
1184 */
1185class line_table::entry
1186{
1187public:
1188 /**
1189 * The program counter value corresponding to a machine
1190 * instruction generated by the compiler.
1191 */
1192 taddr address;
1193
1194 /**
1195 * The index of an operation within a VLIW instruction. The
1196 * index of the first operation is 0. For non-VLIW
1197 * architectures, this will always be 0.
1198 */
1199 unsigned op_index;
1200
1201 /**
1202 * The source file containing this instruction.
1203 */
1204 const line_table::file *file;
1205
1206 /**
1207 * The index of the source file containing this instruction.
1208 */
1209 unsigned file_index;
1210
1211 /**
1212 * The source line number of this instruction, starting at 1.
1213 * This may be 0 if this instruction cannot be attributed to
1214 * any source line.
1215 */
1216 unsigned line;
1217
1218 /**
1219 * The column number within this source line, starting at 1.
1220 * The value 0 indicates that a statement begins at the "left
1221 * edge" of the line, whatever that means.
1222 */
1223 unsigned column;
1224
1225 /**
1226 * True if this instruction is a recommended breakpoint
1227 * location. Typically this is the beginning of a statement.
1228 */
1229 bool is_stmt;
1230
1231 /**
1232 * True if this instruction is the beginning of a basic block.
1233 */
1234 bool basic_block;
1235
1236 /**
1237 * True if this address is the first byte after the end of a
1238 * sequence of target machine instructions. In this case, all
1239 * other fields besides address are not meaningful.
1240 */
1241 bool end_sequence;
1242
1243 /**
1244 * True if this address is one where execution should be
1245 * suspended for an entry breakpoint of a function.
1246 */
1247 bool prologue_end;
1248
1249 /**
1250 * True if this address is one where execution should be
1251 * suspended for an exit breakpoint of a function.
1252 */
1253 bool epilogue_begin;
1254
1255 /**
1256 * The instruction set architecture of this instruction. The
1257 * meaning of this field is generally defined by an
1258 * architecture's ABI.
1259 */
1260 unsigned isa;
1261
1262 /**
1263 * A number that identifies the block containing the current
1264 * instruction if multiple blocks are associated with the same
1265 * source file, line, and column.
1266 */
1267 unsigned discriminator;
1268
1269 /**
1270 * Reset this line info object to the default initial values
1271 * for all fields. is_stmt has no default value, so the
1272 * caller must provide it.
1273 */
1274 void reset(bool is_stmt);
1275
1276 /**
1277 * Return a descriptive string of the form
1278 * "filename[:line[:column]]".
1279 */
1280 std::string get_description() const;
1281};
1282
1283/**
1284 * An iterator over the entries in a line table.
1285 */
1286class line_table::iterator
1287{
1288public:
1289 /**
1290 * \internal Construct an iterator for the given line table
1291 * starting pos bytes into the table's section.
1292 */
1293 iterator(const line_table *table, section_offset pos);
1294
1295 /** Copy constructor */
1296 iterator(const iterator &o) = default;
1297 /** Move constructor */
1298 iterator(iterator &&o) = default;
1299
1300 iterator &operator=(const iterator &o) = default;
1301 iterator &operator=(iterator &&o) = default;
1302
1303 /**
1304 * Return the current line table entry. This entry is reused
1305 * internally, so the caller should copy it if it needs to
1306 * persist past the next increment.
1307 */
1308 const line_table::entry &operator*() const
1309 {
1310 return entry;
1311 }
1312
1313 /** Dereference operator */
1314 const line_table::entry *operator->() const
1315 {
1316 return &entry;
1317 }
1318
1319 /** Equality operator */
1320 bool operator==(const iterator &o) const
1321 {
1322 return o.pos == pos && o.table == table;
1323 }
1324
1325 /** Inequality operator */
1326 bool operator!=(const iterator &o) const
1327 {
1328 return !(*this == o);
1329 }
1330
1331 /**
1332 * Increment this iterator to point to the next line table
1333 * entry.
1334 */
1335 iterator &operator++();
1336
1337 /** Post-increment operator */
1338 iterator operator++(int)
1339 {
1340 iterator tmp(*this);
1341 ++(*this);
1342 return tmp;
1343 }
1344
1345private:
1346 const line_table *table;
1347 line_table::entry entry, regs;
1348 section_offset pos;
1349
1350 /**
1351 * Process the next opcode. If the opcode "adds a row to the
1352 * table", update entry to reflect the row and return true.
1353 */
1354 bool step(cursor *cur);
1355};
1356
1357//////////////////////////////////////////////////////////////////
1358// Type-safe attribute getters
1359//
1360
1361// XXX More
1362
1363die at_abstract_origin(const die &d);
1364DW_ACCESS at_accessibility(const die &d);
1365uint64_t at_allocated(const die &d, expr_context *ctx);
1366bool at_artificial(const die &d);
1367uint64_t at_associated(const die &d, expr_context *ctx);
1368uint64_t at_bit_offset(const die &d, expr_context *ctx);
1369uint64_t at_bit_size(const die &d, expr_context *ctx);
1370uint64_t at_bit_stride(const die &d, expr_context *ctx);
1371uint64_t at_byte_size(const die &d, expr_context *ctx);
1372uint64_t at_byte_stride(const die &d, expr_context *ctx);
1373DW_CC at_calling_convention(const die &d);
1374die at_common_reference(const die &d);
1375std::string at_comp_dir(const die &d);
1376value at_const_value(const die &d);
1377bool at_const_expr(const die &d);
1378die at_containing_type(const die &d);
1379uint64_t at_count(const die &d, expr_context *ctx);
1380expr_result at_data_member_location(const die &d, expr_context *ctx, taddr base, taddr pc);
1381bool at_declaration(const die &d);
1382std::string at_description(const die &d);
1383die at_discr(const die &d);
1384value at_discr_value(const die &d);
1385bool at_elemental(const die &d);
1386DW_ATE at_encoding(const die &d);
1387DW_END at_endianity(const die &d);
1388taddr at_entry_pc(const die &d);
1389bool at_enum_class(const die &d);
1390bool at_explicit(const die &d);
1391die at_extension(const die &d);
1392bool at_external(const die &d);
1393die at_friend(const die &d);
1394taddr at_high_pc(const die &d);
1395DW_ID at_identifier_case(const die &d);
1396die at_import(const die &d);
1397DW_INL at_inline(const die &d);
1398bool at_is_optional(const die &d);
1399DW_LANG at_language(const die &d);
1400std::string at_linkage_name(const die &d);
1401taddr at_low_pc(const die &d);
1402uint64_t at_lower_bound(const die &d, expr_context *ctx);
1403bool at_main_subprogram(const die &d);
1404bool at_mutable(const die &d);
1405std::string at_name(const die &d);
1406die at_namelist_item(const die &d);
1407die at_object_pointer(const die &d);
1408DW_ORD at_ordering(const die &d);
1409std::string at_picture_string(const die &d);
1410die at_priority(const die &d);
1411std::string at_producer(const die &d);
1412bool at_prototyped(const die &d);
1413bool at_pure(const die &d);
1414rangelist at_ranges(const die &d);
1415bool at_recursive(const die &d);
1416die at_sibling(const die &d);
1417die at_signature(const die &d);
1418die at_small(const die &d);
1419die at_specification(const die &d);
1420bool at_threads_scaled(const die &d);
1421die at_type(const die &d);
1422uint64_t at_upper_bound(const die &d, expr_context *ctx);
1423bool at_use_UTF8(const die &d);
1424bool at_variable_parameter(const die &d);
1425DW_VIRTUALITY at_virtuality(const die &d);
1426DW_VIS at_visibility(const die &d);
1427
1428/**
1429 * Return the PC range spanned by the code of a DIE. The DIE must
1430 * either have DW_AT::ranges or DW_AT::low_pc. It may optionally have
1431 * DW_AT::high_pc.
1432 */
1433rangelist die_pc_range(const die &d);
1434
1435//////////////////////////////////////////////////////////////////
1436// Utilities
1437//
1438
1439/**
1440 * An index of sibling DIEs by some string attribute. This index is
1441 * lazily constructed and space-efficient.
1442 */
1443class die_str_map
1444{
1445public:
1446 /**
1447 * Construct the index of the attr attribute of all immediate
1448 * children of parent whose tags are in accept.
1449 */
1450 die_str_map(const die &parent, DW_AT attr,
1451 const std::initializer_list<DW_TAG> &accept);
1452
1453 die_str_map() = default;
1454 die_str_map(const die_str_map &o) = default;
1455 die_str_map(die_str_map &&o) = default;
1456
1457 die_str_map& operator=(const die_str_map &o) = default;
1458 die_str_map& operator=(die_str_map &&o) = default;
1459
1460 /**
1461 * Construct a string map for the type names of parent's
1462 * immediate children.
1463 *
1464 * XXX This should use .debug_pubtypes if parent is a compile
1465 * unit's root DIE, but it currently does not.
1466 */
1467 static die_str_map from_type_names(const die &parent);
1468
1469 /**
1470 * Return the DIE whose attribute matches val. If no such DIE
1471 * exists, return an invalid die object.
1472 */
1473 const die &operator[](const char *val) const;
1474
1475 /**
1476 * Short-hand for [value.c_str()].
1477 */
1478 const die &operator[](const std::string &val) const
1479 {
1480 return (*this)[val.c_str()];
1481 }
1482
1483private:
1484 struct impl;
1485 std::shared_ptr<impl> m;
1486};
1487
1488//////////////////////////////////////////////////////////////////
1489// ELF support
1490//
1491
1492namespace elf
1493{
1494 /**
1495 * Translate an ELF section name info a DWARF section type.
1496 * If the section is a valid DWARF section name, sets *out to
1497 * the type and returns true. If not, returns false.
1498 */
1499 bool section_name_to_type(const char *name, section_type *out);
1500
1501 /**
1502 * Translate a DWARF section type into an ELF section name.
1503 */
1504 const char *section_type_to_name(section_type type);
1505
1506 template<typename Elf>
1507 class elf_loader : public loader
1508 {
1509 Elf f;
1510
1511 public:
1512 elf_loader(const Elf &file) : f(file) { }
1513
1514 const void *load(section_type section, size_t *size_out)
1515 {
1516 auto sec = f.get_section(section_type_to_name(section));
1517 if (!sec.valid())
1518 return nullptr;
1519 *size_out = sec.size();
1520 return sec.data();
1521 }
1522 };
1523
1524 /**
1525 * Create a DWARF section loader backed by the given ELF
1526 * file. This is templatized to eliminate a static dependency
1527 * between the libelf++ and libdwarf++, though it can only
1528 * reasonably be used with elf::elf from libelf++.
1529 */
1530 template<typename Elf>
1531 std::shared_ptr<elf_loader<Elf> > create_loader(const Elf &f)
1532 {
1533 return std::make_shared<elf_loader<Elf> >(f);
1534 }
1535};
1536
1537DWARFPP_END_NAMESPACE
1538
1539//////////////////////////////////////////////////////////////////
1540// Hash specializations
1541//
1542
1543namespace std
1544{
1545 template<>
1546 struct hash<dwarf::unit>
1547 {
1548 typedef size_t result_type;
1549 typedef const dwarf::unit &argument_type;
1550 result_type operator()(argument_type a) const
1551 {
1552 return hash<decltype(a.m)>()(a.m);
1553 }
1554 };
1555
1556 template<>
1557 struct hash<dwarf::die>
1558 {
1559 typedef size_t result_type;
1560 typedef const dwarf::die &argument_type;
1561 result_type operator()(argument_type a) const;
1562 };
1563}
1564
1565#endif
1566