1 | // Copyright (c) 2010 Google Inc. All Rights Reserved. |
2 | // |
3 | // Redistribution and use in source and binary forms, with or without |
4 | // modification, are permitted provided that the following conditions are |
5 | // met: |
6 | // |
7 | // * Redistributions of source code must retain the above copyright |
8 | // notice, this list of conditions and the following disclaimer. |
9 | // * Redistributions in binary form must reproduce the above |
10 | // copyright notice, this list of conditions and the following disclaimer |
11 | // in the documentation and/or other materials provided with the |
12 | // distribution. |
13 | // * Neither the name of Google Inc. nor the names of its |
14 | // contributors may be used to endorse or promote products derived from |
15 | // this software without specific prior written permission. |
16 | // |
17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 | |
29 | // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com> |
30 | |
31 | // This file implements the google_breakpad::StabsReader class. |
32 | // See stabs_reader.h. |
33 | |
34 | #include "common/stabs_reader.h" |
35 | |
36 | #include <assert.h> |
37 | #include <stab.h> |
38 | #include <string.h> |
39 | |
40 | #include <string> |
41 | |
42 | #include "common/using_std_string.h" |
43 | |
44 | using std::vector; |
45 | |
46 | namespace google_breakpad { |
47 | |
48 | StabsReader::EntryIterator::EntryIterator(const ByteBuffer* buffer, |
49 | bool big_endian, size_t value_size) |
50 | : value_size_(value_size), cursor_(buffer, big_endian) { |
51 | // Actually, we could handle weird sizes just fine, but they're |
52 | // probably mistakes --- expressed in bits, say. |
53 | assert(value_size == 4 || value_size == 8); |
54 | entry_.index = 0; |
55 | Fetch(); |
56 | } |
57 | |
58 | void StabsReader::EntryIterator::Fetch() { |
59 | cursor_ |
60 | .Read(4, false, &entry_.name_offset) |
61 | .Read(1, false, &entry_.type) |
62 | .Read(1, false, &entry_.other) |
63 | .Read(2, false, &entry_.descriptor) |
64 | .Read(value_size_, false, &entry_.value); |
65 | entry_.at_end = !cursor_; |
66 | } |
67 | |
68 | StabsReader::StabsReader(const uint8_t* stab, size_t stab_size, |
69 | const uint8_t* stabstr, size_t stabstr_size, |
70 | bool big_endian, size_t value_size, bool unitized, |
71 | StabsHandler* handler) |
72 | : entries_(stab, stab_size), |
73 | strings_(stabstr, stabstr_size), |
74 | iterator_(&entries_, big_endian, value_size), |
75 | unitized_(unitized), |
76 | handler_(handler), |
77 | string_offset_(0), |
78 | next_cu_string_offset_(0), |
79 | current_source_file_(NULL) { } |
80 | |
81 | const char* StabsReader::SymbolString() { |
82 | ptrdiff_t offset = string_offset_ + iterator_->name_offset; |
83 | if (offset < 0 || (size_t) offset >= strings_.Size()) { |
84 | handler_->Warning("symbol %d: name offset outside the string section\n" , |
85 | iterator_->index); |
86 | // Return our null string, to keep our promise about all names being |
87 | // taken from the string section. |
88 | offset = 0; |
89 | } |
90 | return reinterpret_cast<const char*>(strings_.start + offset); |
91 | } |
92 | |
93 | bool StabsReader::Process() { |
94 | while (!iterator_->at_end) { |
95 | if (iterator_->type == N_SO) { |
96 | if (! ProcessCompilationUnit()) |
97 | return false; |
98 | } else if (iterator_->type == N_UNDF && unitized_) { |
99 | // In unitized STABS (including Linux STABS, and pretty much anything |
100 | // else that puts STABS data in sections), at the head of each |
101 | // compilation unit's entries there is an N_UNDF stab giving the |
102 | // number of symbols in the compilation unit, and the number of bytes |
103 | // that compilation unit's strings take up in the .stabstr section. |
104 | // Each CU's strings are separate; the n_strx values are offsets |
105 | // within the current CU's portion of the .stabstr section. |
106 | // |
107 | // As an optimization, the GNU linker combines all the |
108 | // compilation units into one, with a single N_UNDF at the |
109 | // beginning. However, other linkers, like Gold, do not perform |
110 | // this optimization. |
111 | string_offset_ = next_cu_string_offset_; |
112 | next_cu_string_offset_ = iterator_->value; |
113 | ++iterator_; |
114 | } |
115 | #if defined(HAVE_MACH_O_NLIST_H) |
116 | // Export symbols in Mach-O binaries look like this. |
117 | // This is necessary in order to be able to dump symbols |
118 | // from OS X system libraries. |
119 | else if ((iterator_->type & N_STAB) == 0 && |
120 | (iterator_->type & N_TYPE) == N_SECT) { |
121 | ProcessExtern(); |
122 | } |
123 | #endif |
124 | else { |
125 | ++iterator_; |
126 | } |
127 | } |
128 | return true; |
129 | } |
130 | |
131 | bool StabsReader::ProcessCompilationUnit() { |
132 | assert(!iterator_->at_end && iterator_->type == N_SO); |
133 | |
134 | // There may be an N_SO entry whose name ends with a slash, |
135 | // indicating the directory in which the compilation occurred. |
136 | // The build directory defaults to NULL. |
137 | const char* build_directory = NULL; |
138 | { |
139 | const char* name = SymbolString(); |
140 | if (name[0] && name[strlen(name) - 1] == '/') { |
141 | build_directory = name; |
142 | ++iterator_; |
143 | } |
144 | } |
145 | |
146 | // We expect to see an N_SO entry with a filename next, indicating |
147 | // the start of the compilation unit. |
148 | { |
149 | if (iterator_->at_end || iterator_->type != N_SO) |
150 | return true; |
151 | const char* name = SymbolString(); |
152 | if (name[0] == '\0') { |
153 | // This seems to be a stray end-of-compilation-unit marker; |
154 | // consume it, but don't report the end, since we didn't see a |
155 | // beginning. |
156 | ++iterator_; |
157 | return true; |
158 | } |
159 | current_source_file_ = name; |
160 | } |
161 | |
162 | if (! handler_->StartCompilationUnit(current_source_file_, |
163 | iterator_->value, |
164 | build_directory)) |
165 | return false; |
166 | |
167 | ++iterator_; |
168 | |
169 | // The STABS documentation says that some compilers may emit |
170 | // additional N_SO entries with names immediately following the |
171 | // first, and that they should be ignored. However, the original |
172 | // Breakpad STABS reader doesn't ignore them, so we won't either. |
173 | |
174 | // Process the body of the compilation unit, up to the next N_SO. |
175 | while (!iterator_->at_end && iterator_->type != N_SO) { |
176 | if (iterator_->type == N_FUN) { |
177 | if (! ProcessFunction()) |
178 | return false; |
179 | } else if (iterator_->type == N_SLINE) { |
180 | // Mac OS X STABS place SLINE records before functions. |
181 | Line line; |
182 | // The value of an N_SLINE entry that appears outside a function is |
183 | // the absolute address of the line. |
184 | line.address = iterator_->value; |
185 | line.filename = current_source_file_; |
186 | // The n_desc of a N_SLINE entry is the line number. It's a |
187 | // signed 16-bit field; line numbers from 32768 to 65535 are |
188 | // stored as n-65536. |
189 | line.number = (uint16_t) iterator_->descriptor; |
190 | queued_lines_.push_back(line); |
191 | ++iterator_; |
192 | } else if (iterator_->type == N_SOL) { |
193 | current_source_file_ = SymbolString(); |
194 | ++iterator_; |
195 | } else { |
196 | // Ignore anything else. |
197 | ++iterator_; |
198 | } |
199 | } |
200 | |
201 | // An N_SO with an empty name indicates the end of the compilation |
202 | // unit. Default to zero. |
203 | uint64_t ending_address = 0; |
204 | if (!iterator_->at_end) { |
205 | assert(iterator_->type == N_SO); |
206 | const char* name = SymbolString(); |
207 | if (name[0] == '\0') { |
208 | ending_address = iterator_->value; |
209 | ++iterator_; |
210 | } |
211 | } |
212 | |
213 | if (! handler_->EndCompilationUnit(ending_address)) |
214 | return false; |
215 | |
216 | queued_lines_.clear(); |
217 | |
218 | return true; |
219 | } |
220 | |
221 | bool StabsReader::ProcessFunction() { |
222 | assert(!iterator_->at_end && iterator_->type == N_FUN); |
223 | |
224 | uint64_t function_address = iterator_->value; |
225 | // The STABS string for an N_FUN entry is the name of the function, |
226 | // followed by a colon, followed by type information for the |
227 | // function. We want to pass the name alone to StartFunction. |
228 | const char* stab_string = SymbolString(); |
229 | const char* name_end = strchr(stab_string, ':'); |
230 | if (! name_end) |
231 | name_end = stab_string + strlen(stab_string); |
232 | string name(stab_string, name_end - stab_string); |
233 | if (! handler_->StartFunction(name, function_address)) |
234 | return false; |
235 | ++iterator_; |
236 | |
237 | // If there were any SLINE records given before the function, report them now. |
238 | for (vector<Line>::const_iterator it = queued_lines_.begin(); |
239 | it != queued_lines_.end(); it++) { |
240 | if (!handler_->Line(it->address, it->filename, it->number)) |
241 | return false; |
242 | } |
243 | queued_lines_.clear(); |
244 | |
245 | while (!iterator_->at_end) { |
246 | if (iterator_->type == N_SO || iterator_->type == N_FUN) |
247 | break; |
248 | else if (iterator_->type == N_SLINE) { |
249 | // The value of an N_SLINE entry is the offset of the line from |
250 | // the function's start address. |
251 | uint64_t line_address = function_address + iterator_->value; |
252 | // The n_desc of a N_SLINE entry is the line number. It's a |
253 | // signed 16-bit field; line numbers from 32768 to 65535 are |
254 | // stored as n-65536. |
255 | uint16_t line_number = iterator_->descriptor; |
256 | if (! handler_->Line(line_address, current_source_file_, line_number)) |
257 | return false; |
258 | ++iterator_; |
259 | } else if (iterator_->type == N_SOL) { |
260 | current_source_file_ = SymbolString(); |
261 | ++iterator_; |
262 | } else |
263 | // Ignore anything else. |
264 | ++iterator_; |
265 | } |
266 | |
267 | // We've reached the end of the function. See if we can figure out its |
268 | // ending address. |
269 | uint64_t ending_address = 0; |
270 | if (!iterator_->at_end) { |
271 | assert(iterator_->type == N_SO || iterator_->type == N_FUN); |
272 | if (iterator_->type == N_FUN) { |
273 | const char* symbol_name = SymbolString(); |
274 | if (symbol_name[0] == '\0') { |
275 | // An N_FUN entry with no name is a terminator for this function; |
276 | // its value is the function's size. |
277 | ending_address = function_address + iterator_->value; |
278 | ++iterator_; |
279 | } else { |
280 | // An N_FUN entry with a name is the next function, and we can take |
281 | // its value as our ending address. Don't advance the iterator, as |
282 | // we'll use this symbol to start the next function as well. |
283 | ending_address = iterator_->value; |
284 | } |
285 | } else { |
286 | // An N_SO entry could be an end-of-compilation-unit marker, or the |
287 | // start of the next compilation unit, but in either case, its value |
288 | // is our ending address. We don't advance the iterator; |
289 | // ProcessCompilationUnit will decide what to do with this symbol. |
290 | ending_address = iterator_->value; |
291 | } |
292 | } |
293 | |
294 | if (! handler_->EndFunction(ending_address)) |
295 | return false; |
296 | |
297 | return true; |
298 | } |
299 | |
300 | bool StabsReader::ProcessExtern() { |
301 | #if defined(HAVE_MACH_O_NLIST_H) |
302 | assert(!iterator_->at_end && |
303 | (iterator_->type & N_STAB) == 0 && |
304 | (iterator_->type & N_TYPE) == N_SECT); |
305 | #endif |
306 | |
307 | // TODO(mark): only do symbols in the text section? |
308 | if (!handler_->Extern(SymbolString(), iterator_->value)) |
309 | return false; |
310 | |
311 | ++iterator_; |
312 | return true; |
313 | } |
314 | |
315 | } // namespace google_breakpad |
316 | |