1 | // -*- mode: c++ -*- |
2 | |
3 | // Copyright (c) 2010 Google Inc. All Rights Reserved. |
4 | // |
5 | // Redistribution and use in source and binary forms, with or without |
6 | // modification, are permitted provided that the following conditions are |
7 | // met: |
8 | // |
9 | // * Redistributions of source code must retain the above copyright |
10 | // notice, this list of conditions and the following disclaimer. |
11 | // * Redistributions in binary form must reproduce the above |
12 | // copyright notice, this list of conditions and the following disclaimer |
13 | // in the documentation and/or other materials provided with the |
14 | // distribution. |
15 | // * Neither the name of Google Inc. nor the names of its |
16 | // contributors may be used to endorse or promote products derived from |
17 | // this software without specific prior written permission. |
18 | // |
19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
30 | |
31 | // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> |
32 | |
33 | // stabs_reader.h: Define StabsReader, a parser for STABS debugging |
34 | // information. A description of the STABS debugging format can be |
35 | // found at: |
36 | // |
37 | // http://sourceware.org/gdb/current/onlinedocs/stabs_toc.html |
38 | // |
39 | // The comments here assume you understand the format. |
40 | // |
41 | // This parser can handle big-endian and little-endian data, and the symbol |
42 | // values may be either 32 or 64 bits long. It handles both STABS in |
43 | // sections (as used on Linux) and STABS appearing directly in an |
44 | // a.out-like symbol table (as used in Darwin OS X Mach-O files). |
45 | |
46 | #ifndef COMMON_STABS_READER_H__ |
47 | #define COMMON_STABS_READER_H__ |
48 | |
49 | #include <stddef.h> |
50 | #include <stdint.h> |
51 | |
52 | #ifdef HAVE_CONFIG_H |
53 | #include <config.h> |
54 | #endif |
55 | |
56 | #ifdef HAVE_MACH_O_NLIST_H |
57 | #include <mach-o/nlist.h> |
58 | #elif defined(HAVE_A_OUT_H) |
59 | #include <a.out.h> |
60 | #endif |
61 | |
62 | #include <string> |
63 | #include <vector> |
64 | |
65 | #include "common/byte_cursor.h" |
66 | #include "common/using_std_string.h" |
67 | |
68 | namespace google_breakpad { |
69 | |
70 | class StabsHandler; |
71 | |
72 | class StabsReader { |
73 | public: |
74 | // Create a reader for the STABS debug information whose .stab section is |
75 | // being traversed by ITERATOR, and whose .stabstr section is referred to |
76 | // by STRINGS. The reader will call the member functions of HANDLER to |
77 | // report the information it finds, when the reader's 'Process' member |
78 | // function is called. |
79 | // |
80 | // BIG_ENDIAN should be true if the entries in the .stab section are in |
81 | // big-endian form, or false if they are in little-endian form. |
82 | // |
83 | // VALUE_SIZE should be either 4 or 8, indicating the size of the 'value' |
84 | // field in each entry in bytes. |
85 | // |
86 | // UNITIZED should be true if the STABS data is stored in units with |
87 | // N_UNDF headers. This is usually the case for STABS stored in sections, |
88 | // like .stab/.stabstr, and usually not the case for STABS stored in the |
89 | // actual symbol table; UNITIZED should be true when parsing Linux stabs, |
90 | // false when parsing Mac OS X STABS. For details, see: |
91 | // http://sourceware.org/gdb/current/onlinedocs/stabs/Stab-Section-Basics.html |
92 | // |
93 | // Note that, in ELF, the .stabstr section should be found using the |
94 | // 'sh_link' field of the .stab section header, not by name. |
95 | StabsReader(const uint8_t* stab, size_t stab_size, |
96 | const uint8_t* stabstr, size_t stabstr_size, |
97 | bool big_endian, size_t value_size, bool unitized, |
98 | StabsHandler* handler); |
99 | |
100 | // Process the STABS data, calling the handler's member functions to |
101 | // report what we find. While the handler functions return true, |
102 | // continue to process until we reach the end of the section. If we |
103 | // processed the entire section and all handlers returned true, |
104 | // return true. If any handler returned false, return false. |
105 | // |
106 | // This is only meant to be called once per StabsReader instance; |
107 | // resuming a prior processing pass that stopped abruptly isn't supported. |
108 | bool Process(); |
109 | |
110 | private: |
111 | |
112 | // An class for walking arrays of STABS entries. This isolates the main |
113 | // STABS reader from the exact format (size; endianness) of the entries |
114 | // themselves. |
115 | class EntryIterator { |
116 | public: |
117 | // The contents of a STABS entry, adjusted for the host's endianness, |
118 | // word size, 'struct nlist' layout, and so on. |
119 | struct Entry { |
120 | // True if this iterator has reached the end of the entry array. When |
121 | // this is set, the other members of this structure are not valid. |
122 | bool at_end; |
123 | |
124 | // The number of this entry within the list. |
125 | size_t index; |
126 | |
127 | // The current entry's name offset. This is the offset within the |
128 | // current compilation unit's strings, as establish by the N_UNDF entries. |
129 | size_t name_offset; |
130 | |
131 | // The current entry's type, 'other' field, descriptor, and value. |
132 | unsigned char type; |
133 | unsigned char other; |
134 | short descriptor; |
135 | uint64_t value; |
136 | }; |
137 | |
138 | // Create a EntryIterator walking the entries in BUFFER. Treat the |
139 | // entries as big-endian if BIG_ENDIAN is true, as little-endian |
140 | // otherwise. Assume each entry has a 'value' field whose size is |
141 | // VALUE_SIZE. |
142 | // |
143 | // This would not be terribly clean to extend to other format variations, |
144 | // but it's enough to handle Linux and Mac, and we'd like STABS to die |
145 | // anyway. |
146 | // |
147 | // For the record: on Linux, STABS entry values are always 32 bits, |
148 | // regardless of the architecture address size (don't ask me why); on |
149 | // Mac, they are 32 or 64 bits long. Oddly, the section header's entry |
150 | // size for a Linux ELF .stab section varies according to the ELF class |
151 | // from 12 to 20 even as the actual entries remain unchanged. |
152 | EntryIterator(const ByteBuffer* buffer, bool big_endian, size_t value_size); |
153 | |
154 | // Move to the next entry. This function's behavior is undefined if |
155 | // at_end() is true when it is called. |
156 | EntryIterator& operator++() { Fetch(); entry_.index++; return *this; } |
157 | |
158 | // Dereferencing this iterator produces a reference to an Entry structure |
159 | // that holds the current entry's values. The entry is owned by this |
160 | // EntryIterator, and will be invalidated at the next call to operator++. |
161 | const Entry& operator*() const { return entry_; } |
162 | const Entry* operator->() const { return &entry_; } |
163 | |
164 | private: |
165 | // Read the STABS entry at cursor_, and set entry_ appropriately. |
166 | void Fetch(); |
167 | |
168 | // The size of entries' value field, in bytes. |
169 | size_t value_size_; |
170 | |
171 | // A byte cursor traversing buffer_. |
172 | ByteCursor cursor_; |
173 | |
174 | // Values for the entry this iterator refers to. |
175 | Entry entry_; |
176 | }; |
177 | |
178 | // A source line, saved to be reported later. |
179 | struct Line { |
180 | uint64_t address; |
181 | const char* filename; |
182 | int number; |
183 | }; |
184 | |
185 | // Return the name of the current symbol. |
186 | const char* SymbolString(); |
187 | |
188 | // Process a compilation unit starting at symbol_. Return true |
189 | // to continue processing, or false to abort. |
190 | bool ProcessCompilationUnit(); |
191 | |
192 | // Process a function in current_source_file_ starting at symbol_. |
193 | // Return true to continue processing, or false to abort. |
194 | bool ProcessFunction(); |
195 | |
196 | // Process an exported function symbol. |
197 | // Return true to continue processing, or false to abort. |
198 | bool ProcessExtern(); |
199 | |
200 | // The STABS entries being parsed. |
201 | ByteBuffer entries_; |
202 | |
203 | // The string section to which the entries refer. |
204 | ByteBuffer strings_; |
205 | |
206 | // The iterator walking the STABS entries. |
207 | EntryIterator iterator_; |
208 | |
209 | // True if the data is "unitized"; see the explanation in the comment for |
210 | // StabsReader::StabsReader. |
211 | bool unitized_; |
212 | |
213 | StabsHandler* handler_; |
214 | |
215 | // The offset of the current compilation unit's strings within stabstr_. |
216 | size_t string_offset_; |
217 | |
218 | // The value string_offset_ should have for the next compilation unit, |
219 | // as established by N_UNDF entries. |
220 | size_t next_cu_string_offset_; |
221 | |
222 | // The current source file name. |
223 | const char* current_source_file_; |
224 | |
225 | // Mac OS X STABS place SLINE records before functions; we accumulate a |
226 | // vector of these until we see the FUN record, and then report them |
227 | // after the StartFunction call. |
228 | std::vector<Line> queued_lines_; |
229 | }; |
230 | |
231 | // Consumer-provided callback structure for the STABS reader. Clients |
232 | // of the STABS reader provide an instance of this structure. The |
233 | // reader then invokes the member functions of that instance to report |
234 | // the information it finds. |
235 | // |
236 | // The default definitions of the member functions do nothing, and return |
237 | // true so processing will continue. |
238 | class StabsHandler { |
239 | public: |
240 | StabsHandler() { } |
241 | virtual ~StabsHandler() { } |
242 | |
243 | // Some general notes about the handler callback functions: |
244 | |
245 | // Processing proceeds until the end of the .stabs section, or until |
246 | // one of these functions returns false. |
247 | |
248 | // The addresses given are as reported in the STABS info, without |
249 | // regard for whether the module may be loaded at different |
250 | // addresses at different times (a shared library, say). When |
251 | // processing STABS from an ELF shared library, the addresses given |
252 | // all assume the library is loaded at its nominal load address. |
253 | // They are *not* offsets from the nominal load address. If you |
254 | // want offsets, you must subtract off the library's nominal load |
255 | // address. |
256 | |
257 | // The arguments to these functions named FILENAME are all |
258 | // references to strings stored in the .stabstr section. Because |
259 | // both the Linux and Solaris linkers factor out duplicate strings |
260 | // from the .stabstr section, the consumer can assume that if two |
261 | // FILENAME values are different addresses, they represent different |
262 | // file names. |
263 | // |
264 | // Thus, it's safe to use (say) std::map<char*, ...>, which does |
265 | // string address comparisons, not string content comparisons. |
266 | // Since all the strings are in same array of characters --- the |
267 | // .stabstr section --- comparing their addresses produces |
268 | // predictable, if not lexicographically meaningful, results. |
269 | |
270 | // Begin processing a compilation unit whose main source file is |
271 | // named FILENAME, and whose base address is ADDRESS. If |
272 | // BUILD_DIRECTORY is non-NULL, it is the name of the build |
273 | // directory in which the compilation occurred. |
274 | virtual bool StartCompilationUnit(const char* filename, uint64_t address, |
275 | const char* build_directory) { |
276 | return true; |
277 | } |
278 | |
279 | // Finish processing the compilation unit. If ADDRESS is non-zero, |
280 | // it is the ending address of the compilation unit. If ADDRESS is |
281 | // zero, then the compilation unit's ending address is not |
282 | // available, and the consumer must infer it by other means. |
283 | virtual bool EndCompilationUnit(uint64_t address) { return true; } |
284 | |
285 | // Begin processing a function named NAME, whose starting address is |
286 | // ADDRESS. This function belongs to the compilation unit that was |
287 | // most recently started but not ended. |
288 | // |
289 | // Note that, unlike filenames, NAME is not a pointer into the |
290 | // .stabstr section; this is because the name as it appears in the |
291 | // STABS data is followed by type information. The value passed to |
292 | // StartFunction is the function name alone. |
293 | // |
294 | // In languages that use name mangling, like C++, NAME is mangled. |
295 | virtual bool StartFunction(const string& name, uint64_t address) { |
296 | return true; |
297 | } |
298 | |
299 | // Finish processing the function. If ADDRESS is non-zero, it is |
300 | // the ending address for the function. If ADDRESS is zero, then |
301 | // the function's ending address is not available, and the consumer |
302 | // must infer it by other means. |
303 | virtual bool EndFunction(uint64_t address) { return true; } |
304 | |
305 | // Report that the code at ADDRESS is attributable to line NUMBER of |
306 | // the source file named FILENAME. The caller must infer the ending |
307 | // address of the line. |
308 | virtual bool Line(uint64_t address, const char* filename, int number) { |
309 | return true; |
310 | } |
311 | |
312 | // Report that an exported function NAME is present at ADDRESS. |
313 | // The size of the function is unknown. |
314 | virtual bool Extern(const string& name, uint64_t address) { |
315 | return true; |
316 | } |
317 | |
318 | // Report a warning. FORMAT is a printf-like format string, |
319 | // specifying how to format the subsequent arguments. |
320 | virtual void Warning(const char* format, ...) = 0; |
321 | }; |
322 | |
323 | } // namespace google_breakpad |
324 | |
325 | #endif // COMMON_STABS_READER_H__ |
326 | |