1 | // -*- mode: c++ -*- |
2 | |
3 | // Copyright (c) 2010 Google Inc. |
4 | // All rights reserved. |
5 | // |
6 | // Redistribution and use in source and binary forms, with or without |
7 | // modification, are permitted provided that the following conditions are |
8 | // met: |
9 | // |
10 | // * Redistributions of source code must retain the above copyright |
11 | // notice, this list of conditions and the following disclaimer. |
12 | // * Redistributions in binary form must reproduce the above |
13 | // copyright notice, this list of conditions and the following disclaimer |
14 | // in the documentation and/or other materials provided with the |
15 | // distribution. |
16 | // * Neither the name of Google Inc. nor the names of its |
17 | // contributors may be used to endorse or promote products derived from |
18 | // this software without specific prior written permission. |
19 | // |
20 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
21 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
22 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
23 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
24 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
25 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
26 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
27 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
28 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
29 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
30 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
31 | |
32 | // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> |
33 | |
34 | // module.h: Define google_breakpad::Module. A Module holds debugging |
35 | // information, and can write that information out as a Breakpad |
36 | // symbol file. |
37 | |
38 | #ifndef COMMON_LINUX_MODULE_H__ |
39 | #define COMMON_LINUX_MODULE_H__ |
40 | |
41 | #include <functional> |
42 | #include <iostream> |
43 | #include <limits> |
44 | #include <map> |
45 | #include <memory> |
46 | #include <set> |
47 | #include <string> |
48 | #include <vector> |
49 | |
50 | #include "common/string_view.h" |
51 | #include "common/symbol_data.h" |
52 | #include "common/unordered.h" |
53 | #include "common/using_std_string.h" |
54 | #include "google_breakpad/common/breakpad_types.h" |
55 | |
56 | namespace google_breakpad { |
57 | |
58 | using std::set; |
59 | using std::vector; |
60 | using std::map; |
61 | |
62 | // A Module represents the contents of a module, and supports methods |
63 | // for adding information produced by parsing STABS or DWARF data |
64 | // --- possibly both from the same file --- and then writing out the |
65 | // unified contents as a Breakpad-format symbol file. |
66 | class Module { |
67 | public: |
68 | // The type of addresses and sizes in a symbol table. |
69 | typedef uint64_t Address; |
70 | static constexpr uint64_t kMaxAddress = std::numeric_limits<Address>::max(); |
71 | struct File; |
72 | struct Function; |
73 | struct InlineOrigin; |
74 | struct Inline; |
75 | struct Line; |
76 | struct Extern; |
77 | |
78 | // Addresses appearing in File, Function, and Line structures are |
79 | // absolute, not relative to the the module's load address. That |
80 | // is, if the module were loaded at its nominal load address, the |
81 | // addresses would be correct. |
82 | |
83 | // A source file. |
84 | struct File { |
85 | explicit File(const string& name_input) : name(name_input), source_id(0) {} |
86 | |
87 | // The name of the source file. |
88 | const string name; |
89 | |
90 | // The file's source id. The Write member function clears this |
91 | // field and assigns source ids a fresh, so any value placed here |
92 | // before calling Write will be lost. |
93 | int source_id; |
94 | }; |
95 | |
96 | // An address range. |
97 | struct Range { |
98 | Range(const Address address_input, const Address size_input) : |
99 | address(address_input), size(size_input) { } |
100 | |
101 | Address address; |
102 | Address size; |
103 | }; |
104 | |
105 | // A function. |
106 | struct Function { |
107 | Function(StringView name_input, const Address& address_input) : |
108 | name(name_input), address(address_input), parameter_size(0) {} |
109 | |
110 | // For sorting by address. (Not style-guide compliant, but it's |
111 | // stupid not to put this in the struct.) |
112 | static bool CompareByAddress(const Function* x, const Function* y) { |
113 | return x->address < y->address; |
114 | } |
115 | |
116 | // The function's name. |
117 | StringView name; |
118 | |
119 | // The start address and the address ranges covered by the function. |
120 | const Address address; |
121 | vector<Range> ranges; |
122 | |
123 | // The function's parameter size. |
124 | Address parameter_size; |
125 | |
126 | // Source lines belonging to this function, sorted by increasing |
127 | // address. |
128 | vector<Line> lines; |
129 | |
130 | // Inlined call sites belonging to this functions. |
131 | vector<std::unique_ptr<Inline>> inlines; |
132 | }; |
133 | |
134 | struct InlineOrigin { |
135 | explicit InlineOrigin(StringView name) : id(-1), name(name) {} |
136 | |
137 | // A unique id for each InlineOrigin object. INLINE records use the id to |
138 | // refer to its INLINE_ORIGIN record. |
139 | int id; |
140 | |
141 | // The inlined function's name. |
142 | StringView name; |
143 | |
144 | File* file; |
145 | |
146 | int getFileID() const { return file ? file->source_id : -1; } |
147 | }; |
148 | |
149 | // A inlined call site. |
150 | struct Inline { |
151 | Inline(InlineOrigin* origin, |
152 | const vector<Range>& ranges, |
153 | int call_site_line, |
154 | int call_site_file_id, |
155 | int inline_nest_level, |
156 | vector<std::unique_ptr<Inline>> child_inlines) |
157 | : origin(origin), |
158 | ranges(ranges), |
159 | call_site_line(call_site_line), |
160 | call_site_file_id(call_site_file_id), |
161 | call_site_file(nullptr), |
162 | inline_nest_level(inline_nest_level), |
163 | child_inlines(std::move(child_inlines)) {} |
164 | |
165 | InlineOrigin* origin; |
166 | |
167 | // The list of addresses and sizes. |
168 | vector<Range> ranges; |
169 | |
170 | int call_site_line; |
171 | |
172 | // The id is only meanful inside a CU. It's only used for looking up real |
173 | // File* after scanning a CU. |
174 | int call_site_file_id; |
175 | |
176 | File* call_site_file; |
177 | |
178 | int inline_nest_level; |
179 | |
180 | // A list of inlines which are children of this inline. |
181 | vector<std::unique_ptr<Inline>> child_inlines; |
182 | |
183 | int getCallSiteFileID() const { |
184 | return call_site_file ? call_site_file->source_id : -1; |
185 | } |
186 | |
187 | static void InlineDFS( |
188 | vector<std::unique_ptr<Module::Inline>>& inlines, |
189 | std::function<void(std::unique_ptr<Module::Inline>&)> const& forEach) { |
190 | for (std::unique_ptr<Module::Inline>& in : inlines) { |
191 | forEach(in); |
192 | InlineDFS(in->child_inlines, forEach); |
193 | } |
194 | } |
195 | }; |
196 | |
197 | typedef map<uint64_t, InlineOrigin*> InlineOriginByOffset; |
198 | |
199 | class InlineOriginMap { |
200 | public: |
201 | // Add INLINE ORIGIN to the module. Return a pointer to origin . |
202 | InlineOrigin* GetOrCreateInlineOrigin(uint64_t offset, StringView name); |
203 | |
204 | // offset is the offset of a DW_TAG_subprogram. specification_offset is the |
205 | // value of its DW_AT_specification or equals to offset if |
206 | // DW_AT_specification doesn't exist in that DIE. |
207 | void SetReference(uint64_t offset, uint64_t specification_offset); |
208 | |
209 | ~InlineOriginMap() { |
210 | for (const auto& iter : inline_origins_) { |
211 | delete iter.second; |
212 | } |
213 | } |
214 | |
215 | private: |
216 | // A map from a DW_TAG_subprogram's offset to the DW_TAG_subprogram. |
217 | InlineOriginByOffset inline_origins_; |
218 | |
219 | // A map from a DW_TAG_subprogram's offset to the offset of its |
220 | // specification or abstract origin subprogram. The set of values in this |
221 | // map should always be the same set of keys in inline_origins_. |
222 | map<uint64_t, uint64_t> references_; |
223 | }; |
224 | |
225 | InlineOriginMap inline_origin_map; |
226 | |
227 | // A source line. |
228 | struct Line { |
229 | // For sorting by address. (Not style-guide compliant, but it's |
230 | // stupid not to put this in the struct.) |
231 | static bool CompareByAddress(const Module::Line& x, const Module::Line& y) { |
232 | return x.address < y.address; |
233 | } |
234 | |
235 | Address address, size; // The address and size of the line's code. |
236 | File* file; // The source file. |
237 | int number; // The source line number. |
238 | }; |
239 | |
240 | // An exported symbol. |
241 | struct Extern { |
242 | explicit Extern(const Address& address_input) : address(address_input) {} |
243 | const Address address; |
244 | string name; |
245 | }; |
246 | |
247 | // A map from register names to postfix expressions that recover |
248 | // their their values. This can represent a complete set of rules to |
249 | // follow at some address, or a set of changes to be applied to an |
250 | // extant set of rules. |
251 | typedef map<string, string> RuleMap; |
252 | |
253 | // A map from addresses to RuleMaps, representing changes that take |
254 | // effect at given addresses. |
255 | typedef map<Address, RuleMap> RuleChangeMap; |
256 | |
257 | // A range of 'STACK CFI' stack walking information. An instance of |
258 | // this structure corresponds to a 'STACK CFI INIT' record and the |
259 | // subsequent 'STACK CFI' records that fall within its range. |
260 | struct StackFrameEntry { |
261 | // The starting address and number of bytes of machine code this |
262 | // entry covers. |
263 | Address address, size; |
264 | |
265 | // The initial register recovery rules, in force at the starting |
266 | // address. |
267 | RuleMap initial_rules; |
268 | |
269 | // A map from addresses to rule changes. To find the rules in |
270 | // force at a given address, start with initial_rules, and then |
271 | // apply the changes given in this map for all addresses up to and |
272 | // including the address you're interested in. |
273 | RuleChangeMap rule_changes; |
274 | }; |
275 | |
276 | struct FunctionCompare { |
277 | bool operator() (const Function* lhs, const Function* rhs) const { |
278 | if (lhs->address == rhs->address) |
279 | return lhs->name < rhs->name; |
280 | return lhs->address < rhs->address; |
281 | } |
282 | }; |
283 | |
284 | struct InlineOriginCompare { |
285 | bool operator()(const InlineOrigin* lhs, const InlineOrigin* rhs) const { |
286 | return lhs->name < rhs->name; |
287 | } |
288 | }; |
289 | |
290 | struct ExternCompare { |
291 | bool operator() (const Extern* lhs, const Extern* rhs) const { |
292 | return lhs->address < rhs->address; |
293 | } |
294 | }; |
295 | |
296 | // Create a new module with the given name, operating system, |
297 | // architecture, and ID string. |
298 | Module(const string& name, const string& os, const string& architecture, |
299 | const string& id, const string& code_id = "" ); |
300 | ~Module(); |
301 | |
302 | // Set the module's load address to LOAD_ADDRESS; addresses given |
303 | // for functions and lines will be written to the Breakpad symbol |
304 | // file as offsets from this address. Construction initializes this |
305 | // module's load address to zero: addresses written to the symbol |
306 | // file will be the same as they appear in the Function, Line, and |
307 | // StackFrameEntry structures. |
308 | // |
309 | // Note that this member function has no effect on addresses stored |
310 | // in the data added to this module; the Write member function |
311 | // simply subtracts off the load address from addresses before it |
312 | // prints them. Only the last load address given before calling |
313 | // Write is used. |
314 | void SetLoadAddress(Address load_address); |
315 | |
316 | // Sets address filtering on elements added to the module. This allows |
317 | // libraries with extraneous debug symbols to generate symbol files containing |
318 | // only relevant symbols. For example, an LLD-generated partition library may |
319 | // contain debug information pertaining to all partitions derived from a |
320 | // single "combined" library. Filtering applies only to elements added after |
321 | // this method is called. |
322 | void SetAddressRanges(const vector<Range>& ranges); |
323 | |
324 | // Add FUNCTION to the module. FUNCTION's name must not be empty. |
325 | // This module owns all Function objects added with this function: |
326 | // destroying the module destroys them as well. |
327 | // Return false if the function is duplicate and needs to be freed. |
328 | bool AddFunction(Function* function); |
329 | |
330 | // Add STACK_FRAME_ENTRY to the module. |
331 | // This module owns all StackFrameEntry objects added with this |
332 | // function: destroying the module destroys them as well. |
333 | void AddStackFrameEntry(StackFrameEntry* stack_frame_entry); |
334 | |
335 | // Add PUBLIC to the module. |
336 | // This module owns all Extern objects added with this function: |
337 | // destroying the module destroys them as well. |
338 | void AddExtern(Extern* ext); |
339 | |
340 | // If this module has a file named NAME, return a pointer to it. If |
341 | // it has none, then create one and return a pointer to the new |
342 | // file. This module owns all File objects created using these |
343 | // functions; destroying the module destroys them as well. |
344 | File* FindFile(const string& name); |
345 | File* FindFile(const char* name); |
346 | |
347 | // If this module has a file named NAME, return a pointer to it. |
348 | // Otherwise, return NULL. |
349 | File* FindExistingFile(const string& name); |
350 | |
351 | // Insert pointers to the functions added to this module at I in |
352 | // VEC. The pointed-to Functions are still owned by this module. |
353 | // (Since this is effectively a copy of the function list, this is |
354 | // mostly useful for testing; other uses should probably get a more |
355 | // appropriate interface.) |
356 | void GetFunctions(vector<Function*>* vec, vector<Function*>::iterator i); |
357 | |
358 | // Insert pointers to the externs added to this module at I in |
359 | // VEC. The pointed-to Externs are still owned by this module. |
360 | // (Since this is effectively a copy of the extern list, this is |
361 | // mostly useful for testing; other uses should probably get a more |
362 | // appropriate interface.) |
363 | void GetExterns(vector<Extern*>* vec, vector<Extern*>::iterator i); |
364 | |
365 | // Clear VEC and fill it with pointers to the Files added to this |
366 | // module, sorted by name. The pointed-to Files are still owned by |
367 | // this module. (Since this is effectively a copy of the file list, |
368 | // this is mostly useful for testing; other uses should probably get |
369 | // a more appropriate interface.) |
370 | void GetFiles(vector<File*>* vec); |
371 | |
372 | // Clear VEC and fill it with pointers to the StackFrameEntry |
373 | // objects that have been added to this module. (Since this is |
374 | // effectively a copy of the stack frame entry list, this is mostly |
375 | // useful for testing; other uses should probably get |
376 | // a more appropriate interface.) |
377 | void GetStackFrameEntries(vector<StackFrameEntry*>* vec) const; |
378 | |
379 | // Find those files in this module that are actually referred to by |
380 | // functions' line number data, and assign them source id numbers. |
381 | // Set the source id numbers for all other files --- unused by the |
382 | // source line data --- to -1. We do this before writing out the |
383 | // symbol file, at which point we omit any unused files. |
384 | void AssignSourceIds(set<InlineOrigin*, InlineOriginCompare>& inline_origins); |
385 | |
386 | // This function should be called before AssignSourceIds() to get the set of |
387 | // valid InlineOrigins*. |
388 | void CreateInlineOrigins( |
389 | set<InlineOrigin*, InlineOriginCompare>& inline_origins); |
390 | |
391 | // Call AssignSourceIds, and write this module to STREAM in the |
392 | // breakpad symbol format. Return true if all goes well, or false if |
393 | // an error occurs. This method writes out: |
394 | // - a header based on the values given to the constructor, |
395 | // If symbol_data is not CFI then: |
396 | // - the source files added via FindFile, |
397 | // - the functions added via AddFunctions, each with its lines, |
398 | // - all public records, |
399 | // If symbol_data is CFI then: |
400 | // - all CFI records. |
401 | // Addresses in the output are all relative to the load address |
402 | // established by SetLoadAddress. |
403 | bool Write(std::ostream& stream, SymbolData symbol_data); |
404 | |
405 | // Place the name in the global set of strings. Return a StringView points to |
406 | // a string inside the pool. |
407 | StringView AddStringToPool(const string& str) { |
408 | auto result = common_strings_.insert(str); |
409 | return *(result.first); |
410 | } |
411 | |
412 | string name() const { return name_; } |
413 | string os() const { return os_; } |
414 | string architecture() const { return architecture_; } |
415 | string identifier() const { return id_; } |
416 | string code_identifier() const { return code_id_; } |
417 | |
418 | private: |
419 | // Report an error that has occurred writing the symbol file, using |
420 | // errno to find the appropriate cause. Return false. |
421 | static bool ReportError(); |
422 | |
423 | // Write RULE_MAP to STREAM, in the form appropriate for 'STACK CFI' |
424 | // records, without a final newline. Return true if all goes well; |
425 | // if an error occurs, return false, and leave errno set. |
426 | static bool WriteRuleMap(const RuleMap& rule_map, std::ostream& stream); |
427 | |
428 | // Returns true of the specified address resides with an specified address |
429 | // range, or if no ranges have been specified. |
430 | bool AddressIsInModule(Address address) const; |
431 | |
432 | // Module header entries. |
433 | string name_, os_, architecture_, id_, code_id_; |
434 | |
435 | // The module's nominal load address. Addresses for functions and |
436 | // lines are absolute, assuming the module is loaded at this |
437 | // address. |
438 | Address load_address_; |
439 | |
440 | // The set of valid address ranges of the module. If specified, attempts to |
441 | // add elements residing outside these ranges will be silently filtered. |
442 | vector<Range> address_ranges_; |
443 | |
444 | // Relation for maps whose keys are strings shared with some other |
445 | // structure. |
446 | struct CompareStringPtrs { |
447 | bool operator()(const string* x, const string* y) const { return *x < *y; } |
448 | }; |
449 | |
450 | // A map from filenames to File structures. The map's keys are |
451 | // pointers to the Files' names. |
452 | typedef map<const string*, File*, CompareStringPtrs> FileByNameMap; |
453 | |
454 | // A set containing Function structures, sorted by address. |
455 | typedef set<Function*, FunctionCompare> FunctionSet; |
456 | |
457 | // A set containing Extern structures, sorted by address. |
458 | typedef set<Extern*, ExternCompare> ExternSet; |
459 | |
460 | // The module owns all the files and functions that have been added |
461 | // to it; destroying the module frees the Files and Functions these |
462 | // point to. |
463 | FileByNameMap files_; // This module's source files. |
464 | FunctionSet functions_; // This module's functions. |
465 | |
466 | // The module owns all the call frame info entries that have been |
467 | // added to it. |
468 | vector<StackFrameEntry*> stack_frame_entries_; |
469 | |
470 | // The module owns all the externs that have been added to it; |
471 | // destroying the module frees the Externs these point to. |
472 | ExternSet externs_; |
473 | |
474 | unordered_set<string> common_strings_; |
475 | }; |
476 | |
477 | } // namespace google_breakpad |
478 | |
479 | #endif // COMMON_LINUX_MODULE_H__ |
480 | |