1// -*- mode: c++ -*-
2
3// Copyright (c) 2010 Google Inc. All Rights Reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
32
33// stabs_reader.h: Define StabsReader, a parser for STABS debugging
34// information. A description of the STABS debugging format can be
35// found at:
36//
37// http://sourceware.org/gdb/current/onlinedocs/stabs_toc.html
38//
39// The comments here assume you understand the format.
40//
41// This parser can handle big-endian and little-endian data, and the symbol
42// values may be either 32 or 64 bits long. It handles both STABS in
43// sections (as used on Linux) and STABS appearing directly in an
44// a.out-like symbol table (as used in Darwin OS X Mach-O files).
45
46#ifndef COMMON_STABS_READER_H__
47#define COMMON_STABS_READER_H__
48
49#include <stddef.h>
50#include <stdint.h>
51
52#ifdef HAVE_CONFIG_H
53#include <config.h>
54#endif
55
56#ifdef HAVE_MACH_O_NLIST_H
57#include <mach-o/nlist.h>
58#elif defined(HAVE_A_OUT_H)
59#include <a.out.h>
60#endif
61
62#include <string>
63#include <vector>
64
65#include "common/byte_cursor.h"
66#include "common/using_std_string.h"
67
68namespace google_breakpad {
69
70class StabsHandler;
71
72class StabsReader {
73 public:
74 // Create a reader for the STABS debug information whose .stab section is
75 // being traversed by ITERATOR, and whose .stabstr section is referred to
76 // by STRINGS. The reader will call the member functions of HANDLER to
77 // report the information it finds, when the reader's 'Process' member
78 // function is called.
79 //
80 // BIG_ENDIAN should be true if the entries in the .stab section are in
81 // big-endian form, or false if they are in little-endian form.
82 //
83 // VALUE_SIZE should be either 4 or 8, indicating the size of the 'value'
84 // field in each entry in bytes.
85 //
86 // UNITIZED should be true if the STABS data is stored in units with
87 // N_UNDF headers. This is usually the case for STABS stored in sections,
88 // like .stab/.stabstr, and usually not the case for STABS stored in the
89 // actual symbol table; UNITIZED should be true when parsing Linux stabs,
90 // false when parsing Mac OS X STABS. For details, see:
91 // http://sourceware.org/gdb/current/onlinedocs/stabs/Stab-Section-Basics.html
92 //
93 // Note that, in ELF, the .stabstr section should be found using the
94 // 'sh_link' field of the .stab section header, not by name.
95 StabsReader(const uint8_t* stab, size_t stab_size,
96 const uint8_t* stabstr, size_t stabstr_size,
97 bool big_endian, size_t value_size, bool unitized,
98 StabsHandler* handler);
99
100 // Process the STABS data, calling the handler's member functions to
101 // report what we find. While the handler functions return true,
102 // continue to process until we reach the end of the section. If we
103 // processed the entire section and all handlers returned true,
104 // return true. If any handler returned false, return false.
105 //
106 // This is only meant to be called once per StabsReader instance;
107 // resuming a prior processing pass that stopped abruptly isn't supported.
108 bool Process();
109
110 private:
111
112 // An class for walking arrays of STABS entries. This isolates the main
113 // STABS reader from the exact format (size; endianness) of the entries
114 // themselves.
115 class EntryIterator {
116 public:
117 // The contents of a STABS entry, adjusted for the host's endianness,
118 // word size, 'struct nlist' layout, and so on.
119 struct Entry {
120 // True if this iterator has reached the end of the entry array. When
121 // this is set, the other members of this structure are not valid.
122 bool at_end;
123
124 // The number of this entry within the list.
125 size_t index;
126
127 // The current entry's name offset. This is the offset within the
128 // current compilation unit's strings, as establish by the N_UNDF entries.
129 size_t name_offset;
130
131 // The current entry's type, 'other' field, descriptor, and value.
132 unsigned char type;
133 unsigned char other;
134 short descriptor;
135 uint64_t value;
136 };
137
138 // Create a EntryIterator walking the entries in BUFFER. Treat the
139 // entries as big-endian if BIG_ENDIAN is true, as little-endian
140 // otherwise. Assume each entry has a 'value' field whose size is
141 // VALUE_SIZE.
142 //
143 // This would not be terribly clean to extend to other format variations,
144 // but it's enough to handle Linux and Mac, and we'd like STABS to die
145 // anyway.
146 //
147 // For the record: on Linux, STABS entry values are always 32 bits,
148 // regardless of the architecture address size (don't ask me why); on
149 // Mac, they are 32 or 64 bits long. Oddly, the section header's entry
150 // size for a Linux ELF .stab section varies according to the ELF class
151 // from 12 to 20 even as the actual entries remain unchanged.
152 EntryIterator(const ByteBuffer* buffer, bool big_endian, size_t value_size);
153
154 // Move to the next entry. This function's behavior is undefined if
155 // at_end() is true when it is called.
156 EntryIterator& operator++() { Fetch(); entry_.index++; return *this; }
157
158 // Dereferencing this iterator produces a reference to an Entry structure
159 // that holds the current entry's values. The entry is owned by this
160 // EntryIterator, and will be invalidated at the next call to operator++.
161 const Entry& operator*() const { return entry_; }
162 const Entry* operator->() const { return &entry_; }
163
164 private:
165 // Read the STABS entry at cursor_, and set entry_ appropriately.
166 void Fetch();
167
168 // The size of entries' value field, in bytes.
169 size_t value_size_;
170
171 // A byte cursor traversing buffer_.
172 ByteCursor cursor_;
173
174 // Values for the entry this iterator refers to.
175 Entry entry_;
176 };
177
178 // A source line, saved to be reported later.
179 struct Line {
180 uint64_t address;
181 const char* filename;
182 int number;
183 };
184
185 // Return the name of the current symbol.
186 const char* SymbolString();
187
188 // Process a compilation unit starting at symbol_. Return true
189 // to continue processing, or false to abort.
190 bool ProcessCompilationUnit();
191
192 // Process a function in current_source_file_ starting at symbol_.
193 // Return true to continue processing, or false to abort.
194 bool ProcessFunction();
195
196 // Process an exported function symbol.
197 // Return true to continue processing, or false to abort.
198 bool ProcessExtern();
199
200 // The STABS entries being parsed.
201 ByteBuffer entries_;
202
203 // The string section to which the entries refer.
204 ByteBuffer strings_;
205
206 // The iterator walking the STABS entries.
207 EntryIterator iterator_;
208
209 // True if the data is "unitized"; see the explanation in the comment for
210 // StabsReader::StabsReader.
211 bool unitized_;
212
213 StabsHandler* handler_;
214
215 // The offset of the current compilation unit's strings within stabstr_.
216 size_t string_offset_;
217
218 // The value string_offset_ should have for the next compilation unit,
219 // as established by N_UNDF entries.
220 size_t next_cu_string_offset_;
221
222 // The current source file name.
223 const char* current_source_file_;
224
225 // Mac OS X STABS place SLINE records before functions; we accumulate a
226 // vector of these until we see the FUN record, and then report them
227 // after the StartFunction call.
228 std::vector<Line> queued_lines_;
229};
230
231// Consumer-provided callback structure for the STABS reader. Clients
232// of the STABS reader provide an instance of this structure. The
233// reader then invokes the member functions of that instance to report
234// the information it finds.
235//
236// The default definitions of the member functions do nothing, and return
237// true so processing will continue.
238class StabsHandler {
239 public:
240 StabsHandler() { }
241 virtual ~StabsHandler() { }
242
243 // Some general notes about the handler callback functions:
244
245 // Processing proceeds until the end of the .stabs section, or until
246 // one of these functions returns false.
247
248 // The addresses given are as reported in the STABS info, without
249 // regard for whether the module may be loaded at different
250 // addresses at different times (a shared library, say). When
251 // processing STABS from an ELF shared library, the addresses given
252 // all assume the library is loaded at its nominal load address.
253 // They are *not* offsets from the nominal load address. If you
254 // want offsets, you must subtract off the library's nominal load
255 // address.
256
257 // The arguments to these functions named FILENAME are all
258 // references to strings stored in the .stabstr section. Because
259 // both the Linux and Solaris linkers factor out duplicate strings
260 // from the .stabstr section, the consumer can assume that if two
261 // FILENAME values are different addresses, they represent different
262 // file names.
263 //
264 // Thus, it's safe to use (say) std::map<char*, ...>, which does
265 // string address comparisons, not string content comparisons.
266 // Since all the strings are in same array of characters --- the
267 // .stabstr section --- comparing their addresses produces
268 // predictable, if not lexicographically meaningful, results.
269
270 // Begin processing a compilation unit whose main source file is
271 // named FILENAME, and whose base address is ADDRESS. If
272 // BUILD_DIRECTORY is non-NULL, it is the name of the build
273 // directory in which the compilation occurred.
274 virtual bool StartCompilationUnit(const char* filename, uint64_t address,
275 const char* build_directory) {
276 return true;
277 }
278
279 // Finish processing the compilation unit. If ADDRESS is non-zero,
280 // it is the ending address of the compilation unit. If ADDRESS is
281 // zero, then the compilation unit's ending address is not
282 // available, and the consumer must infer it by other means.
283 virtual bool EndCompilationUnit(uint64_t address) { return true; }
284
285 // Begin processing a function named NAME, whose starting address is
286 // ADDRESS. This function belongs to the compilation unit that was
287 // most recently started but not ended.
288 //
289 // Note that, unlike filenames, NAME is not a pointer into the
290 // .stabstr section; this is because the name as it appears in the
291 // STABS data is followed by type information. The value passed to
292 // StartFunction is the function name alone.
293 //
294 // In languages that use name mangling, like C++, NAME is mangled.
295 virtual bool StartFunction(const string& name, uint64_t address) {
296 return true;
297 }
298
299 // Finish processing the function. If ADDRESS is non-zero, it is
300 // the ending address for the function. If ADDRESS is zero, then
301 // the function's ending address is not available, and the consumer
302 // must infer it by other means.
303 virtual bool EndFunction(uint64_t address) { return true; }
304
305 // Report that the code at ADDRESS is attributable to line NUMBER of
306 // the source file named FILENAME. The caller must infer the ending
307 // address of the line.
308 virtual bool Line(uint64_t address, const char* filename, int number) {
309 return true;
310 }
311
312 // Report that an exported function NAME is present at ADDRESS.
313 // The size of the function is unknown.
314 virtual bool Extern(const string& name, uint64_t address) {
315 return true;
316 }
317
318 // Report a warning. FORMAT is a printf-like format string,
319 // specifying how to format the subsequent arguments.
320 virtual void Warning(const char* format, ...) = 0;
321};
322
323} // namespace google_breakpad
324
325#endif // COMMON_STABS_READER_H__
326