1// -*- mode: c++ -*-
2
3// Copyright (c) 2010 Google Inc.
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10// * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12// * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16// * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
33
34// module.h: Define google_breakpad::Module. A Module holds debugging
35// information, and can write that information out as a Breakpad
36// symbol file.
37
38#ifndef COMMON_LINUX_MODULE_H__
39#define COMMON_LINUX_MODULE_H__
40
41#include <functional>
42#include <iostream>
43#include <limits>
44#include <map>
45#include <memory>
46#include <set>
47#include <string>
48#include <vector>
49
50#include "common/string_view.h"
51#include "common/symbol_data.h"
52#include "common/unordered.h"
53#include "common/using_std_string.h"
54#include "google_breakpad/common/breakpad_types.h"
55
56namespace google_breakpad {
57
58using std::set;
59using std::vector;
60using std::map;
61
62// A Module represents the contents of a module, and supports methods
63// for adding information produced by parsing STABS or DWARF data
64// --- possibly both from the same file --- and then writing out the
65// unified contents as a Breakpad-format symbol file.
66class Module {
67 public:
68 // The type of addresses and sizes in a symbol table.
69 typedef uint64_t Address;
70 static constexpr uint64_t kMaxAddress = std::numeric_limits<Address>::max();
71 struct File;
72 struct Function;
73 struct InlineOrigin;
74 struct Inline;
75 struct Line;
76 struct Extern;
77
78 // Addresses appearing in File, Function, and Line structures are
79 // absolute, not relative to the the module's load address. That
80 // is, if the module were loaded at its nominal load address, the
81 // addresses would be correct.
82
83 // A source file.
84 struct File {
85 explicit File(const string& name_input) : name(name_input), source_id(0) {}
86
87 // The name of the source file.
88 const string name;
89
90 // The file's source id. The Write member function clears this
91 // field and assigns source ids a fresh, so any value placed here
92 // before calling Write will be lost.
93 int source_id;
94 };
95
96 // An address range.
97 struct Range {
98 Range(const Address address_input, const Address size_input) :
99 address(address_input), size(size_input) { }
100
101 Address address;
102 Address size;
103 };
104
105 // A function.
106 struct Function {
107 Function(StringView name_input, const Address& address_input) :
108 name(name_input), address(address_input), parameter_size(0) {}
109
110 // For sorting by address. (Not style-guide compliant, but it's
111 // stupid not to put this in the struct.)
112 static bool CompareByAddress(const Function* x, const Function* y) {
113 return x->address < y->address;
114 }
115
116 // The function's name.
117 StringView name;
118
119 // The start address and the address ranges covered by the function.
120 const Address address;
121 vector<Range> ranges;
122
123 // The function's parameter size.
124 Address parameter_size;
125
126 // Source lines belonging to this function, sorted by increasing
127 // address.
128 vector<Line> lines;
129
130 // Inlined call sites belonging to this functions.
131 vector<std::unique_ptr<Inline>> inlines;
132 };
133
134 struct InlineOrigin {
135 explicit InlineOrigin(StringView name) : id(-1), name(name) {}
136
137 // A unique id for each InlineOrigin object. INLINE records use the id to
138 // refer to its INLINE_ORIGIN record.
139 int id;
140
141 // The inlined function's name.
142 StringView name;
143
144 File* file;
145
146 int getFileID() const { return file ? file->source_id : -1; }
147 };
148
149 // A inlined call site.
150 struct Inline {
151 Inline(InlineOrigin* origin,
152 const vector<Range>& ranges,
153 int call_site_line,
154 int call_site_file_id,
155 int inline_nest_level,
156 vector<std::unique_ptr<Inline>> child_inlines)
157 : origin(origin),
158 ranges(ranges),
159 call_site_line(call_site_line),
160 call_site_file_id(call_site_file_id),
161 call_site_file(nullptr),
162 inline_nest_level(inline_nest_level),
163 child_inlines(std::move(child_inlines)) {}
164
165 InlineOrigin* origin;
166
167 // The list of addresses and sizes.
168 vector<Range> ranges;
169
170 int call_site_line;
171
172 // The id is only meanful inside a CU. It's only used for looking up real
173 // File* after scanning a CU.
174 int call_site_file_id;
175
176 File* call_site_file;
177
178 int inline_nest_level;
179
180 // A list of inlines which are children of this inline.
181 vector<std::unique_ptr<Inline>> child_inlines;
182
183 int getCallSiteFileID() const {
184 return call_site_file ? call_site_file->source_id : -1;
185 }
186
187 static void InlineDFS(
188 vector<std::unique_ptr<Module::Inline>>& inlines,
189 std::function<void(std::unique_ptr<Module::Inline>&)> const& forEach) {
190 for (std::unique_ptr<Module::Inline>& in : inlines) {
191 forEach(in);
192 InlineDFS(in->child_inlines, forEach);
193 }
194 }
195 };
196
197 typedef map<uint64_t, InlineOrigin*> InlineOriginByOffset;
198
199 class InlineOriginMap {
200 public:
201 // Add INLINE ORIGIN to the module. Return a pointer to origin .
202 InlineOrigin* GetOrCreateInlineOrigin(uint64_t offset, StringView name);
203
204 // offset is the offset of a DW_TAG_subprogram. specification_offset is the
205 // value of its DW_AT_specification or equals to offset if
206 // DW_AT_specification doesn't exist in that DIE.
207 void SetReference(uint64_t offset, uint64_t specification_offset);
208
209 ~InlineOriginMap() {
210 for (const auto& iter : inline_origins_) {
211 delete iter.second;
212 }
213 }
214
215 private:
216 // A map from a DW_TAG_subprogram's offset to the DW_TAG_subprogram.
217 InlineOriginByOffset inline_origins_;
218
219 // A map from a DW_TAG_subprogram's offset to the offset of its
220 // specification or abstract origin subprogram. The set of values in this
221 // map should always be the same set of keys in inline_origins_.
222 map<uint64_t, uint64_t> references_;
223 };
224
225 InlineOriginMap inline_origin_map;
226
227 // A source line.
228 struct Line {
229 // For sorting by address. (Not style-guide compliant, but it's
230 // stupid not to put this in the struct.)
231 static bool CompareByAddress(const Module::Line& x, const Module::Line& y) {
232 return x.address < y.address;
233 }
234
235 Address address, size; // The address and size of the line's code.
236 File* file; // The source file.
237 int number; // The source line number.
238 };
239
240 // An exported symbol.
241 struct Extern {
242 explicit Extern(const Address& address_input) : address(address_input) {}
243 const Address address;
244 string name;
245 };
246
247 // A map from register names to postfix expressions that recover
248 // their their values. This can represent a complete set of rules to
249 // follow at some address, or a set of changes to be applied to an
250 // extant set of rules.
251 typedef map<string, string> RuleMap;
252
253 // A map from addresses to RuleMaps, representing changes that take
254 // effect at given addresses.
255 typedef map<Address, RuleMap> RuleChangeMap;
256
257 // A range of 'STACK CFI' stack walking information. An instance of
258 // this structure corresponds to a 'STACK CFI INIT' record and the
259 // subsequent 'STACK CFI' records that fall within its range.
260 struct StackFrameEntry {
261 // The starting address and number of bytes of machine code this
262 // entry covers.
263 Address address, size;
264
265 // The initial register recovery rules, in force at the starting
266 // address.
267 RuleMap initial_rules;
268
269 // A map from addresses to rule changes. To find the rules in
270 // force at a given address, start with initial_rules, and then
271 // apply the changes given in this map for all addresses up to and
272 // including the address you're interested in.
273 RuleChangeMap rule_changes;
274 };
275
276 struct FunctionCompare {
277 bool operator() (const Function* lhs, const Function* rhs) const {
278 if (lhs->address == rhs->address)
279 return lhs->name < rhs->name;
280 return lhs->address < rhs->address;
281 }
282 };
283
284 struct InlineOriginCompare {
285 bool operator()(const InlineOrigin* lhs, const InlineOrigin* rhs) const {
286 return lhs->name < rhs->name;
287 }
288 };
289
290 struct ExternCompare {
291 bool operator() (const Extern* lhs, const Extern* rhs) const {
292 return lhs->address < rhs->address;
293 }
294 };
295
296 // Create a new module with the given name, operating system,
297 // architecture, and ID string.
298 Module(const string& name, const string& os, const string& architecture,
299 const string& id, const string& code_id = "");
300 ~Module();
301
302 // Set the module's load address to LOAD_ADDRESS; addresses given
303 // for functions and lines will be written to the Breakpad symbol
304 // file as offsets from this address. Construction initializes this
305 // module's load address to zero: addresses written to the symbol
306 // file will be the same as they appear in the Function, Line, and
307 // StackFrameEntry structures.
308 //
309 // Note that this member function has no effect on addresses stored
310 // in the data added to this module; the Write member function
311 // simply subtracts off the load address from addresses before it
312 // prints them. Only the last load address given before calling
313 // Write is used.
314 void SetLoadAddress(Address load_address);
315
316 // Sets address filtering on elements added to the module. This allows
317 // libraries with extraneous debug symbols to generate symbol files containing
318 // only relevant symbols. For example, an LLD-generated partition library may
319 // contain debug information pertaining to all partitions derived from a
320 // single "combined" library. Filtering applies only to elements added after
321 // this method is called.
322 void SetAddressRanges(const vector<Range>& ranges);
323
324 // Add FUNCTION to the module. FUNCTION's name must not be empty.
325 // This module owns all Function objects added with this function:
326 // destroying the module destroys them as well.
327 // Return false if the function is duplicate and needs to be freed.
328 bool AddFunction(Function* function);
329
330 // Add STACK_FRAME_ENTRY to the module.
331 // This module owns all StackFrameEntry objects added with this
332 // function: destroying the module destroys them as well.
333 void AddStackFrameEntry(StackFrameEntry* stack_frame_entry);
334
335 // Add PUBLIC to the module.
336 // This module owns all Extern objects added with this function:
337 // destroying the module destroys them as well.
338 void AddExtern(Extern* ext);
339
340 // If this module has a file named NAME, return a pointer to it. If
341 // it has none, then create one and return a pointer to the new
342 // file. This module owns all File objects created using these
343 // functions; destroying the module destroys them as well.
344 File* FindFile(const string& name);
345 File* FindFile(const char* name);
346
347 // If this module has a file named NAME, return a pointer to it.
348 // Otherwise, return NULL.
349 File* FindExistingFile(const string& name);
350
351 // Insert pointers to the functions added to this module at I in
352 // VEC. The pointed-to Functions are still owned by this module.
353 // (Since this is effectively a copy of the function list, this is
354 // mostly useful for testing; other uses should probably get a more
355 // appropriate interface.)
356 void GetFunctions(vector<Function*>* vec, vector<Function*>::iterator i);
357
358 // Insert pointers to the externs added to this module at I in
359 // VEC. The pointed-to Externs are still owned by this module.
360 // (Since this is effectively a copy of the extern list, this is
361 // mostly useful for testing; other uses should probably get a more
362 // appropriate interface.)
363 void GetExterns(vector<Extern*>* vec, vector<Extern*>::iterator i);
364
365 // Clear VEC and fill it with pointers to the Files added to this
366 // module, sorted by name. The pointed-to Files are still owned by
367 // this module. (Since this is effectively a copy of the file list,
368 // this is mostly useful for testing; other uses should probably get
369 // a more appropriate interface.)
370 void GetFiles(vector<File*>* vec);
371
372 // Clear VEC and fill it with pointers to the StackFrameEntry
373 // objects that have been added to this module. (Since this is
374 // effectively a copy of the stack frame entry list, this is mostly
375 // useful for testing; other uses should probably get
376 // a more appropriate interface.)
377 void GetStackFrameEntries(vector<StackFrameEntry*>* vec) const;
378
379 // Find those files in this module that are actually referred to by
380 // functions' line number data, and assign them source id numbers.
381 // Set the source id numbers for all other files --- unused by the
382 // source line data --- to -1. We do this before writing out the
383 // symbol file, at which point we omit any unused files.
384 void AssignSourceIds(set<InlineOrigin*, InlineOriginCompare>& inline_origins);
385
386 // This function should be called before AssignSourceIds() to get the set of
387 // valid InlineOrigins*.
388 void CreateInlineOrigins(
389 set<InlineOrigin*, InlineOriginCompare>& inline_origins);
390
391 // Call AssignSourceIds, and write this module to STREAM in the
392 // breakpad symbol format. Return true if all goes well, or false if
393 // an error occurs. This method writes out:
394 // - a header based on the values given to the constructor,
395 // If symbol_data is not CFI then:
396 // - the source files added via FindFile,
397 // - the functions added via AddFunctions, each with its lines,
398 // - all public records,
399 // If symbol_data is CFI then:
400 // - all CFI records.
401 // Addresses in the output are all relative to the load address
402 // established by SetLoadAddress.
403 bool Write(std::ostream& stream, SymbolData symbol_data);
404
405 // Place the name in the global set of strings. Return a StringView points to
406 // a string inside the pool.
407 StringView AddStringToPool(const string& str) {
408 auto result = common_strings_.insert(str);
409 return *(result.first);
410 }
411
412 string name() const { return name_; }
413 string os() const { return os_; }
414 string architecture() const { return architecture_; }
415 string identifier() const { return id_; }
416 string code_identifier() const { return code_id_; }
417
418 private:
419 // Report an error that has occurred writing the symbol file, using
420 // errno to find the appropriate cause. Return false.
421 static bool ReportError();
422
423 // Write RULE_MAP to STREAM, in the form appropriate for 'STACK CFI'
424 // records, without a final newline. Return true if all goes well;
425 // if an error occurs, return false, and leave errno set.
426 static bool WriteRuleMap(const RuleMap& rule_map, std::ostream& stream);
427
428 // Returns true of the specified address resides with an specified address
429 // range, or if no ranges have been specified.
430 bool AddressIsInModule(Address address) const;
431
432 // Module header entries.
433 string name_, os_, architecture_, id_, code_id_;
434
435 // The module's nominal load address. Addresses for functions and
436 // lines are absolute, assuming the module is loaded at this
437 // address.
438 Address load_address_;
439
440 // The set of valid address ranges of the module. If specified, attempts to
441 // add elements residing outside these ranges will be silently filtered.
442 vector<Range> address_ranges_;
443
444 // Relation for maps whose keys are strings shared with some other
445 // structure.
446 struct CompareStringPtrs {
447 bool operator()(const string* x, const string* y) const { return *x < *y; }
448 };
449
450 // A map from filenames to File structures. The map's keys are
451 // pointers to the Files' names.
452 typedef map<const string*, File*, CompareStringPtrs> FileByNameMap;
453
454 // A set containing Function structures, sorted by address.
455 typedef set<Function*, FunctionCompare> FunctionSet;
456
457 // A set containing Extern structures, sorted by address.
458 typedef set<Extern*, ExternCompare> ExternSet;
459
460 // The module owns all the files and functions that have been added
461 // to it; destroying the module frees the Files and Functions these
462 // point to.
463 FileByNameMap files_; // This module's source files.
464 FunctionSet functions_; // This module's functions.
465
466 // The module owns all the call frame info entries that have been
467 // added to it.
468 vector<StackFrameEntry*> stack_frame_entries_;
469
470 // The module owns all the externs that have been added to it;
471 // destroying the module frees the Externs these point to.
472 ExternSet externs_;
473
474 unordered_set<string> common_strings_;
475};
476
477} // namespace google_breakpad
478
479#endif // COMMON_LINUX_MODULE_H__
480