1 | // Copyright (c) 2010 Google Inc. |
2 | // All rights reserved. |
3 | // |
4 | // Redistribution and use in source and binary forms, with or without |
5 | // modification, are permitted provided that the following conditions are |
6 | // met: |
7 | // |
8 | // * Redistributions of source code must retain the above copyright |
9 | // notice, this list of conditions and the following disclaimer. |
10 | // * Redistributions in binary form must reproduce the above |
11 | // copyright notice, this list of conditions and the following disclaimer |
12 | // in the documentation and/or other materials provided with the |
13 | // distribution. |
14 | // * Neither the name of Google Inc. nor the names of its |
15 | // contributors may be used to endorse or promote products derived from |
16 | // this software without specific prior written permission. |
17 | // |
18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | // |
30 | // basic_source_line_resolver.cc: BasicSourceLineResolver implementation. |
31 | // |
32 | // See basic_source_line_resolver.h and basic_source_line_resolver_types.h |
33 | // for documentation. |
34 | |
35 | #include <assert.h> |
36 | #include <stdio.h> |
37 | #include <string.h> |
38 | #include <sys/types.h> |
39 | #include <sys/stat.h> |
40 | |
41 | #include <limits> |
42 | #include <map> |
43 | #include <memory> |
44 | #include <utility> |
45 | #include <vector> |
46 | |
47 | #include "google_breakpad/processor/basic_source_line_resolver.h" |
48 | #include "processor/basic_source_line_resolver_types.h" |
49 | #include "processor/module_factory.h" |
50 | |
51 | #include "processor/tokenize.h" |
52 | |
53 | using std::deque; |
54 | using std::make_pair; |
55 | using std::map; |
56 | using std::unique_ptr; |
57 | using std::vector; |
58 | |
59 | namespace google_breakpad { |
60 | |
61 | #ifdef _WIN32 |
62 | #ifdef _MSC_VER |
63 | #define strtok_r strtok_s |
64 | #endif |
65 | #define strtoull _strtoui64 |
66 | #endif |
67 | |
68 | namespace { |
69 | |
70 | // Utility function to tokenize given the presence of an optional initial |
71 | // field. In this case, optional_field is the expected string for the optional |
72 | // field, and max_tokens is the maximum number of tokens including the optional |
73 | // field. Refer to the documentation for Tokenize for descriptions of the other |
74 | // arguments. |
75 | bool TokenizeWithOptionalField(char* line, |
76 | const char* optional_field, |
77 | const char* separators, |
78 | int max_tokens, |
79 | vector<char*>* tokens) { |
80 | // First tokenize assuming the optional field is not present. If we then see |
81 | // the optional field, additionally tokenize the last token into two tokens. |
82 | if (!Tokenize(line, separators, max_tokens - 1, tokens)) { |
83 | return false; |
84 | } |
85 | |
86 | if (strcmp(tokens->front(), optional_field) == 0) { |
87 | // The optional field is present. Split the last token in two to recover the |
88 | // field prior to the last. |
89 | vector<char*> last_tokens; |
90 | if (!Tokenize(tokens->back(), separators, 2, &last_tokens)) { |
91 | return false; |
92 | } |
93 | // Replace the previous last token with the two new tokens. |
94 | tokens->pop_back(); |
95 | tokens->push_back(last_tokens[0]); |
96 | tokens->push_back(last_tokens[1]); |
97 | } |
98 | |
99 | return true; |
100 | } |
101 | |
102 | } // namespace |
103 | |
104 | static const char* kWhitespace = " \r\n" ; |
105 | static const int kMaxErrorsPrinted = 5; |
106 | static const int kMaxErrorsBeforeBailing = 100; |
107 | |
108 | BasicSourceLineResolver::BasicSourceLineResolver() : |
109 | SourceLineResolverBase(new BasicModuleFactory) { } |
110 | |
111 | // static |
112 | void BasicSourceLineResolver::Module::LogParseError( |
113 | const string& message, |
114 | int line_number, |
115 | int* num_errors) { |
116 | if (++(*num_errors) <= kMaxErrorsPrinted) { |
117 | if (line_number > 0) { |
118 | BPLOG(ERROR) << "Line " << line_number << ": " << message; |
119 | } else { |
120 | BPLOG(ERROR) << message; |
121 | } |
122 | } |
123 | } |
124 | |
125 | bool BasicSourceLineResolver::Module::LoadMapFromMemory( |
126 | char* memory_buffer, |
127 | size_t memory_buffer_size) { |
128 | linked_ptr<Function> cur_func; |
129 | int line_number = 0; |
130 | int num_errors = 0; |
131 | int inline_num_errors = 0; |
132 | char* save_ptr; |
133 | |
134 | // If the length is 0, we can still pretend we have a symbol file. This is |
135 | // for scenarios that want to test symbol lookup, but don't necessarily care |
136 | // if certain modules do not have any information, like system libraries. |
137 | if (memory_buffer_size == 0) { |
138 | return true; |
139 | } |
140 | |
141 | // Make sure the last character is null terminator. |
142 | size_t last_null_terminator = memory_buffer_size - 1; |
143 | if (memory_buffer[last_null_terminator] != '\0') { |
144 | memory_buffer[last_null_terminator] = '\0'; |
145 | } |
146 | |
147 | // Skip any null terminators at the end of the memory buffer, and make sure |
148 | // there are no other null terminators in the middle of the memory buffer. |
149 | bool has_null_terminator_in_the_middle = false; |
150 | while (last_null_terminator > 0 && |
151 | memory_buffer[last_null_terminator - 1] == '\0') { |
152 | last_null_terminator--; |
153 | } |
154 | for (size_t i = 0; i < last_null_terminator; i++) { |
155 | if (memory_buffer[i] == '\0') { |
156 | memory_buffer[i] = '_'; |
157 | has_null_terminator_in_the_middle = true; |
158 | } |
159 | } |
160 | if (has_null_terminator_in_the_middle) { |
161 | LogParseError( |
162 | "Null terminator is not expected in the middle of the symbol data" , |
163 | line_number, |
164 | &num_errors); |
165 | } |
166 | |
167 | char* buffer; |
168 | buffer = strtok_r(memory_buffer, "\r\n" , &save_ptr); |
169 | |
170 | while (buffer != NULL) { |
171 | ++line_number; |
172 | |
173 | if (strncmp(buffer, "FILE " , 5) == 0) { |
174 | if (!ParseFile(buffer)) { |
175 | LogParseError("ParseFile on buffer failed" , line_number, &num_errors); |
176 | } |
177 | } else if (strncmp(buffer, "STACK " , 6) == 0) { |
178 | if (!ParseStackInfo(buffer)) { |
179 | LogParseError("ParseStackInfo failed" , line_number, &num_errors); |
180 | } |
181 | } else if (strncmp(buffer, "FUNC " , 5) == 0) { |
182 | cur_func.reset(ParseFunction(buffer)); |
183 | if (!cur_func.get()) { |
184 | LogParseError("ParseFunction failed" , line_number, &num_errors); |
185 | } else { |
186 | // StoreRange will fail if the function has an invalid address or size. |
187 | // We'll silently ignore this, the function and any corresponding lines |
188 | // will be destroyed when cur_func is released. |
189 | functions_.StoreRange(cur_func->address, cur_func->size, cur_func); |
190 | } |
191 | } else if (strncmp(buffer, "PUBLIC " , 7) == 0) { |
192 | // Clear cur_func: public symbols don't contain line number information. |
193 | cur_func.reset(); |
194 | |
195 | if (!ParsePublicSymbol(buffer)) { |
196 | LogParseError("ParsePublicSymbol failed" , line_number, &num_errors); |
197 | } |
198 | } else if (strncmp(buffer, "MODULE " , 7) == 0) { |
199 | // Ignore these. They're not of any use to BasicSourceLineResolver, |
200 | // which is fed modules by a SymbolSupplier. These lines are present to |
201 | // aid other tools in properly placing symbol files so that they can |
202 | // be accessed by a SymbolSupplier. |
203 | // |
204 | // MODULE <guid> <age> <filename> |
205 | } else if (strncmp(buffer, "INFO " , 5) == 0) { |
206 | // Ignore these as well, they're similarly just for housekeeping. |
207 | // |
208 | // INFO CODE_ID <code id> <filename> |
209 | } else if (strncmp(buffer, "INLINE " , 7) == 0) { |
210 | linked_ptr<Inline> in = ParseInline(buffer); |
211 | if (!in.get()) |
212 | LogParseError("ParseInline failed" , line_number, &inline_num_errors); |
213 | else |
214 | cur_func->AppendInline(in); |
215 | } else if (strncmp(buffer, "INLINE_ORIGIN " , 14) == 0) { |
216 | if (!ParseInlineOrigin(buffer)) { |
217 | LogParseError("ParseInlineOrigin failed" , line_number, |
218 | &inline_num_errors); |
219 | } |
220 | } else { |
221 | if (!cur_func.get()) { |
222 | LogParseError("Found source line data without a function" , |
223 | line_number, &num_errors); |
224 | } else { |
225 | Line* line = ParseLine(buffer); |
226 | if (!line) { |
227 | LogParseError("ParseLine failed" , line_number, &num_errors); |
228 | } else { |
229 | cur_func->lines.StoreRange(line->address, line->size, |
230 | linked_ptr<Line>(line)); |
231 | } |
232 | } |
233 | } |
234 | if (num_errors > kMaxErrorsBeforeBailing) { |
235 | break; |
236 | } |
237 | buffer = strtok_r(NULL, "\r\n" , &save_ptr); |
238 | } |
239 | is_corrupt_ = num_errors > 0; |
240 | return true; |
241 | } |
242 | |
243 | void BasicSourceLineResolver::Module::ConstructInlineFrames( |
244 | StackFrame* frame, |
245 | MemAddr address, |
246 | const ContainedRangeMap<uint64_t, linked_ptr<Inline>>& inline_map, |
247 | deque<unique_ptr<StackFrame>>* inlined_frames) const { |
248 | vector<const linked_ptr<Inline>*> inlines; |
249 | if (!inline_map.RetrieveRanges(address, inlines)) { |
250 | return; |
251 | } |
252 | |
253 | for (const linked_ptr<Inline>* const in : inlines) { |
254 | unique_ptr<StackFrame> new_frame = |
255 | unique_ptr<StackFrame>(new StackFrame(*frame)); |
256 | auto origin = inline_origins_.find(in->get()->origin_id); |
257 | if (origin != inline_origins_.end()) { |
258 | new_frame->function_name = origin->second->name; |
259 | } else { |
260 | new_frame->function_name = "<name omitted>" ; |
261 | } |
262 | |
263 | // Store call site file and line in current frame, which will be updated |
264 | // later. |
265 | new_frame->source_line = in->get()->call_site_line; |
266 | if (in->get()->has_call_site_file_id) { |
267 | auto file = files_.find(in->get()->call_site_file_id); |
268 | if (file != files_.end()) { |
269 | new_frame->source_file_name = file->second; |
270 | } |
271 | } |
272 | |
273 | // Use the starting address of the inlined range as inlined function base. |
274 | new_frame->function_base = new_frame->module->base_address(); |
275 | for (const auto& range : in->get()->inline_ranges) { |
276 | if (address >= range.first && address < range.first + range.second) { |
277 | new_frame->function_base += range.first; |
278 | break; |
279 | } |
280 | } |
281 | new_frame->trust = StackFrame::FRAME_TRUST_INLINE; |
282 | |
283 | // The inlines vector has an order from innermost entry to outermost entry. |
284 | // By push_back, we will have inlined_frames with the same order. |
285 | inlined_frames->push_back(std::move(new_frame)); |
286 | } |
287 | |
288 | // Update the source file and source line for each inlined frame. |
289 | if (!inlined_frames->empty()) { |
290 | string parent_frame_source_file_name = frame->source_file_name; |
291 | int parent_frame_source_line = frame->source_line; |
292 | frame->source_file_name = inlined_frames->back()->source_file_name; |
293 | frame->source_line = inlined_frames->back()->source_line; |
294 | for (unique_ptr<StackFrame>& inlined_frame : *inlined_frames) { |
295 | std::swap(inlined_frame->source_file_name, parent_frame_source_file_name); |
296 | std::swap(inlined_frame->source_line, parent_frame_source_line); |
297 | } |
298 | } |
299 | } |
300 | |
301 | void BasicSourceLineResolver::Module::LookupAddress( |
302 | StackFrame* frame, |
303 | deque<unique_ptr<StackFrame>>* inlined_frames) const { |
304 | MemAddr address = frame->instruction - frame->module->base_address(); |
305 | |
306 | // First, look for a FUNC record that covers address. Use |
307 | // RetrieveNearestRange instead of RetrieveRange so that, if there |
308 | // is no such function, we can use the next function to bound the |
309 | // extent of the PUBLIC symbol we find, below. This does mean we |
310 | // need to check that address indeed falls within the function we |
311 | // find; do the range comparison in an overflow-friendly way. |
312 | linked_ptr<Function> func; |
313 | linked_ptr<PublicSymbol> public_symbol; |
314 | MemAddr function_base; |
315 | MemAddr function_size; |
316 | MemAddr public_address; |
317 | if (functions_.RetrieveNearestRange(address, &func, &function_base, |
318 | NULL /* delta */, &function_size) && |
319 | address >= function_base && address - function_base < function_size) { |
320 | frame->function_name = func->name; |
321 | frame->function_base = frame->module->base_address() + function_base; |
322 | frame->is_multiple = func->is_multiple; |
323 | |
324 | linked_ptr<Line> line; |
325 | MemAddr line_base; |
326 | if (func->lines.RetrieveRange(address, &line, &line_base, NULL /* delta */, |
327 | NULL /* size */)) { |
328 | FileMap::const_iterator it = files_.find(line->source_file_id); |
329 | if (it != files_.end()) { |
330 | frame->source_file_name = files_.find(line->source_file_id)->second; |
331 | } |
332 | frame->source_line = line->line; |
333 | frame->source_line_base = frame->module->base_address() + line_base; |
334 | } |
335 | |
336 | // Check if this is inlined function call. |
337 | if (inlined_frames) { |
338 | ConstructInlineFrames(frame, address, func->inlines, inlined_frames); |
339 | } |
340 | } else if (public_symbols_.Retrieve(address, |
341 | &public_symbol, &public_address) && |
342 | (!func.get() || public_address > function_base)) { |
343 | frame->function_name = public_symbol->name; |
344 | frame->function_base = frame->module->base_address() + public_address; |
345 | frame->is_multiple = public_symbol->is_multiple; |
346 | } |
347 | } |
348 | |
349 | WindowsFrameInfo* BasicSourceLineResolver::Module::FindWindowsFrameInfo( |
350 | const StackFrame* frame) const { |
351 | MemAddr address = frame->instruction - frame->module->base_address(); |
352 | scoped_ptr<WindowsFrameInfo> result(new WindowsFrameInfo()); |
353 | |
354 | // We only know about WindowsFrameInfo::STACK_INFO_FRAME_DATA and |
355 | // WindowsFrameInfo::STACK_INFO_FPO. Prefer them in this order. |
356 | // WindowsFrameInfo::STACK_INFO_FRAME_DATA is the newer type that |
357 | // includes its own program string. |
358 | // WindowsFrameInfo::STACK_INFO_FPO is the older type |
359 | // corresponding to the FPO_DATA struct. See stackwalker_x86.cc. |
360 | linked_ptr<WindowsFrameInfo> frame_info; |
361 | if ((windows_frame_info_[WindowsFrameInfo::STACK_INFO_FRAME_DATA] |
362 | .RetrieveRange(address, &frame_info)) |
363 | || (windows_frame_info_[WindowsFrameInfo::STACK_INFO_FPO] |
364 | .RetrieveRange(address, &frame_info))) { |
365 | result->CopyFrom(*frame_info.get()); |
366 | return result.release(); |
367 | } |
368 | |
369 | // Even without a relevant STACK line, many functions contain |
370 | // information about how much space their parameters consume on the |
371 | // stack. Use RetrieveNearestRange instead of RetrieveRange, so that |
372 | // we can use the function to bound the extent of the PUBLIC symbol, |
373 | // below. However, this does mean we need to check that ADDRESS |
374 | // falls within the retrieved function's range; do the range |
375 | // comparison in an overflow-friendly way. |
376 | linked_ptr<Function> function; |
377 | MemAddr function_base, function_size; |
378 | if (functions_.RetrieveNearestRange(address, &function, &function_base, |
379 | NULL /* delta */, &function_size) && |
380 | address >= function_base && address - function_base < function_size) { |
381 | result->parameter_size = function->parameter_size; |
382 | result->valid |= WindowsFrameInfo::VALID_PARAMETER_SIZE; |
383 | return result.release(); |
384 | } |
385 | |
386 | // PUBLIC symbols might have a parameter size. Use the function we |
387 | // found above to limit the range the public symbol covers. |
388 | linked_ptr<PublicSymbol> public_symbol; |
389 | MemAddr public_address; |
390 | if (public_symbols_.Retrieve(address, &public_symbol, &public_address) && |
391 | (!function.get() || public_address > function_base)) { |
392 | result->parameter_size = public_symbol->parameter_size; |
393 | } |
394 | |
395 | return NULL; |
396 | } |
397 | |
398 | CFIFrameInfo* BasicSourceLineResolver::Module::FindCFIFrameInfo( |
399 | const StackFrame* frame) const { |
400 | MemAddr address = frame->instruction - frame->module->base_address(); |
401 | MemAddr initial_base, initial_size; |
402 | string initial_rules; |
403 | |
404 | // Find the initial rule whose range covers this address. That |
405 | // provides an initial set of register recovery rules. Then, walk |
406 | // forward from the initial rule's starting address to frame's |
407 | // instruction address, applying delta rules. |
408 | if (!cfi_initial_rules_.RetrieveRange(address, &initial_rules, &initial_base, |
409 | NULL /* delta */, &initial_size)) { |
410 | return NULL; |
411 | } |
412 | |
413 | // Create a frame info structure, and populate it with the rules from |
414 | // the STACK CFI INIT record. |
415 | scoped_ptr<CFIFrameInfo> rules(new CFIFrameInfo()); |
416 | if (!ParseCFIRuleSet(initial_rules, rules.get())) |
417 | return NULL; |
418 | |
419 | // Find the first delta rule that falls within the initial rule's range. |
420 | map<MemAddr, string>::const_iterator delta = |
421 | cfi_delta_rules_.lower_bound(initial_base); |
422 | |
423 | // Apply delta rules up to and including the frame's address. |
424 | while (delta != cfi_delta_rules_.end() && delta->first <= address) { |
425 | ParseCFIRuleSet(delta->second, rules.get()); |
426 | delta++; |
427 | } |
428 | |
429 | return rules.release(); |
430 | } |
431 | |
432 | bool BasicSourceLineResolver::Module::ParseFile(char* file_line) { |
433 | long index; |
434 | char* filename; |
435 | if (SymbolParseHelper::ParseFile(file_line, &index, &filename)) { |
436 | files_.insert(make_pair(index, string(filename))); |
437 | return true; |
438 | } |
439 | return false; |
440 | } |
441 | |
442 | bool BasicSourceLineResolver::Module::ParseInlineOrigin( |
443 | char* inline_origin_line) { |
444 | bool has_file_id; |
445 | long origin_id; |
446 | long source_file_id; |
447 | char* origin_name; |
448 | if (SymbolParseHelper::ParseInlineOrigin(inline_origin_line, &has_file_id, |
449 | &origin_id, &source_file_id, |
450 | &origin_name)) { |
451 | inline_origins_.insert(make_pair( |
452 | origin_id, |
453 | new InlineOrigin(has_file_id, source_file_id, origin_name))); |
454 | return true; |
455 | } |
456 | return false; |
457 | } |
458 | |
459 | linked_ptr<BasicSourceLineResolver::Inline> |
460 | BasicSourceLineResolver::Module::ParseInline(char* inline_line) { |
461 | bool has_call_site_file_id; |
462 | long inline_nest_level; |
463 | long call_site_line; |
464 | long call_site_file_id; |
465 | long origin_id; |
466 | vector<std::pair<MemAddr, MemAddr>> ranges; |
467 | if (SymbolParseHelper::ParseInline(inline_line, &has_call_site_file_id, |
468 | &inline_nest_level, &call_site_line, |
469 | &call_site_file_id, &origin_id, &ranges)) { |
470 | return linked_ptr<Inline>(new Inline(has_call_site_file_id, |
471 | inline_nest_level, call_site_line, |
472 | call_site_file_id, origin_id, ranges)); |
473 | } |
474 | return linked_ptr<Inline>(); |
475 | } |
476 | |
477 | BasicSourceLineResolver::Function* |
478 | BasicSourceLineResolver::Module::ParseFunction(char* function_line) { |
479 | bool is_multiple; |
480 | uint64_t address; |
481 | uint64_t size; |
482 | long stack_param_size; |
483 | char* name; |
484 | if (SymbolParseHelper::ParseFunction(function_line, &is_multiple, &address, |
485 | &size, &stack_param_size, &name)) { |
486 | return new Function(name, address, size, stack_param_size, is_multiple); |
487 | } |
488 | return NULL; |
489 | } |
490 | |
491 | BasicSourceLineResolver::Line* BasicSourceLineResolver::Module::ParseLine( |
492 | char* line_line) { |
493 | uint64_t address; |
494 | uint64_t size; |
495 | long line_number; |
496 | long source_file; |
497 | |
498 | if (SymbolParseHelper::ParseLine(line_line, &address, &size, &line_number, |
499 | &source_file)) { |
500 | return new Line(address, size, source_file, line_number); |
501 | } |
502 | return NULL; |
503 | } |
504 | |
505 | bool BasicSourceLineResolver::Module::ParsePublicSymbol(char* public_line) { |
506 | bool is_multiple; |
507 | uint64_t address; |
508 | long stack_param_size; |
509 | char* name; |
510 | |
511 | if (SymbolParseHelper::ParsePublicSymbol(public_line, &is_multiple, &address, |
512 | &stack_param_size, &name)) { |
513 | // A few public symbols show up with an address of 0. This has been seen |
514 | // in the dumped output of ntdll.pdb for symbols such as _CIlog, _CIpow, |
515 | // RtlDescribeChunkLZNT1, and RtlReserveChunkLZNT1. They would conflict |
516 | // with one another if they were allowed into the public_symbols_ map, |
517 | // but since the address is obviously invalid, gracefully accept them |
518 | // as input without putting them into the map. |
519 | if (address == 0) { |
520 | return true; |
521 | } |
522 | |
523 | linked_ptr<PublicSymbol> symbol(new PublicSymbol(name, address, |
524 | stack_param_size, |
525 | is_multiple)); |
526 | return public_symbols_.Store(address, symbol); |
527 | } |
528 | return false; |
529 | } |
530 | |
531 | bool BasicSourceLineResolver::Module::ParseStackInfo(char* stack_info_line) { |
532 | // Skip "STACK " prefix. |
533 | stack_info_line += 6; |
534 | |
535 | // Find the token indicating what sort of stack frame walking |
536 | // information this is. |
537 | while (*stack_info_line == ' ') |
538 | stack_info_line++; |
539 | const char* platform = stack_info_line; |
540 | while (!strchr(kWhitespace, *stack_info_line)) |
541 | stack_info_line++; |
542 | *stack_info_line++ = '\0'; |
543 | |
544 | // MSVC stack frame info. |
545 | if (strcmp(platform, "WIN" ) == 0) { |
546 | int type = 0; |
547 | uint64_t rva, code_size; |
548 | linked_ptr<WindowsFrameInfo> |
549 | stack_frame_info(WindowsFrameInfo::ParseFromString(stack_info_line, |
550 | type, |
551 | rva, |
552 | code_size)); |
553 | if (stack_frame_info == NULL) |
554 | return false; |
555 | |
556 | // TODO(mmentovai): I wanted to use StoreRange's return value as this |
557 | // method's return value, but MSVC infrequently outputs stack info that |
558 | // violates the containment rules. This happens with a section of code |
559 | // in strncpy_s in test_app.cc (testdata/minidump2). There, problem looks |
560 | // like this: |
561 | // STACK WIN 4 4242 1a a 0 ... (STACK WIN 4 base size prolog 0 ...) |
562 | // STACK WIN 4 4243 2e 9 0 ... |
563 | // ContainedRangeMap treats these two blocks as conflicting. In reality, |
564 | // when the prolog lengths are taken into account, the actual code of |
565 | // these blocks doesn't conflict. However, we can't take the prolog lengths |
566 | // into account directly here because we'd wind up with a different set |
567 | // of range conflicts when MSVC outputs stack info like this: |
568 | // STACK WIN 4 1040 73 33 0 ... |
569 | // STACK WIN 4 105a 59 19 0 ... |
570 | // because in both of these entries, the beginning of the code after the |
571 | // prolog is at 0x1073, and the last byte of contained code is at 0x10b2. |
572 | // Perhaps we could get away with storing ranges by rva + prolog_size |
573 | // if ContainedRangeMap were modified to allow replacement of |
574 | // already-stored values. |
575 | |
576 | windows_frame_info_[type].StoreRange(rva, code_size, stack_frame_info); |
577 | return true; |
578 | } else if (strcmp(platform, "CFI" ) == 0) { |
579 | // DWARF CFI stack frame info |
580 | return ParseCFIFrameInfo(stack_info_line); |
581 | } else { |
582 | // Something unrecognized. |
583 | return false; |
584 | } |
585 | } |
586 | |
587 | bool BasicSourceLineResolver::Module::ParseCFIFrameInfo( |
588 | char* stack_info_line) { |
589 | char* cursor; |
590 | |
591 | // Is this an INIT record or a delta record? |
592 | char* init_or_address = strtok_r(stack_info_line, " \r\n" , &cursor); |
593 | if (!init_or_address) |
594 | return false; |
595 | |
596 | if (strcmp(init_or_address, "INIT" ) == 0) { |
597 | // This record has the form "STACK INIT <address> <size> <rules...>". |
598 | char* address_field = strtok_r(NULL, " \r\n" , &cursor); |
599 | if (!address_field) return false; |
600 | |
601 | char* size_field = strtok_r(NULL, " \r\n" , &cursor); |
602 | if (!size_field) return false; |
603 | |
604 | char* initial_rules = strtok_r(NULL, "\r\n" , &cursor); |
605 | if (!initial_rules) return false; |
606 | |
607 | MemAddr address = strtoul(address_field, NULL, 16); |
608 | MemAddr size = strtoul(size_field, NULL, 16); |
609 | cfi_initial_rules_.StoreRange(address, size, initial_rules); |
610 | return true; |
611 | } |
612 | |
613 | // This record has the form "STACK <address> <rules...>". |
614 | char* address_field = init_or_address; |
615 | char* delta_rules = strtok_r(NULL, "\r\n" , &cursor); |
616 | if (!delta_rules) return false; |
617 | MemAddr address = strtoul(address_field, NULL, 16); |
618 | cfi_delta_rules_[address] = delta_rules; |
619 | return true; |
620 | } |
621 | |
622 | bool BasicSourceLineResolver::Function::AppendInline(linked_ptr<Inline> in) { |
623 | // This happends if in's parent wasn't added due to a malformed INLINE record. |
624 | if (in->inline_nest_level > last_added_inline_nest_level + 1) |
625 | return false; |
626 | |
627 | last_added_inline_nest_level = in->inline_nest_level; |
628 | |
629 | // Store all ranges into current level of inlines. |
630 | for (auto range : in->inline_ranges) |
631 | inlines.StoreRange(range.first, range.second, in); |
632 | return true; |
633 | } |
634 | |
635 | // static |
636 | bool SymbolParseHelper::ParseFile(char* file_line, long* index, |
637 | char** filename) { |
638 | // FILE <id> <filename> |
639 | assert(strncmp(file_line, "FILE " , 5) == 0); |
640 | file_line += 5; // skip prefix |
641 | |
642 | vector<char*> tokens; |
643 | if (!Tokenize(file_line, kWhitespace, 2, &tokens)) { |
644 | return false; |
645 | } |
646 | |
647 | char* after_number; |
648 | *index = strtol(tokens[0], &after_number, 10); |
649 | if (!IsValidAfterNumber(after_number) || *index < 0 || |
650 | *index == std::numeric_limits<long>::max()) { |
651 | return false; |
652 | } |
653 | |
654 | *filename = tokens[1]; |
655 | if (!*filename) { |
656 | return false; |
657 | } |
658 | |
659 | return true; |
660 | } |
661 | |
662 | // static |
663 | bool SymbolParseHelper::ParseInlineOrigin(char* inline_origin_line, |
664 | bool* has_file_id, |
665 | long* origin_id, |
666 | long* file_id, |
667 | char** name) { |
668 | // Old INLINE_ORIGIN format: |
669 | // INLINE_ORIGIN <origin_id> <file_id> <name> |
670 | // New INLINE_ORIGIN format: |
671 | // INLINE_ORIGIN <origin_id> <name> |
672 | assert(strncmp(inline_origin_line, "INLINE_ORIGIN " , 14) == 0); |
673 | inline_origin_line += 14; // skip prefix |
674 | vector<char*> tokens; |
675 | // Split the line into two parts so that the first token is "<origin_id>", and |
676 | // second token is either "<file_id> <name>"" or "<name>"" depending on the |
677 | // format version. |
678 | if (!Tokenize(inline_origin_line, kWhitespace, 2, &tokens)) { |
679 | return false; |
680 | } |
681 | |
682 | char* after_number; |
683 | *origin_id = strtol(tokens[0], &after_number, 10); |
684 | if (!IsValidAfterNumber(after_number) || *origin_id < 0 || |
685 | *origin_id == std::numeric_limits<long>::max()) { |
686 | return false; |
687 | } |
688 | |
689 | // If the field after origin_id is a number, then it's old format. |
690 | char* remaining_line = tokens[1]; |
691 | *has_file_id = true; |
692 | for (size_t i = 0; |
693 | i < strlen(remaining_line) && remaining_line[i] != ' ' && *has_file_id; |
694 | ++i) { |
695 | // If the file id is -1, it might be an artificial function that doesn't |
696 | // have file id. So, we consider -1 as a valid special case. |
697 | if (remaining_line[i] == '-' && i == 0) { |
698 | continue; |
699 | } |
700 | *has_file_id = isdigit(remaining_line[i]); |
701 | } |
702 | |
703 | if (*has_file_id) { |
704 | // If it's old format, split "<file_id> <name>" to {"<field_id>", "<name>"}. |
705 | if (!Tokenize(remaining_line, kWhitespace, 2, &tokens)) { |
706 | return false; |
707 | } |
708 | *file_id = strtol(tokens[0], &after_number, 10); |
709 | // If the file id is -1, it might be an artificial function that doesn't |
710 | // have file id. So, we consider -1 as a valid special case. |
711 | if (!IsValidAfterNumber(after_number) || *file_id < -1 || |
712 | *file_id == std::numeric_limits<long>::max()) { |
713 | return false; |
714 | } |
715 | } |
716 | |
717 | *name = tokens[1]; |
718 | if (!*name) { |
719 | return false; |
720 | } |
721 | |
722 | return true; |
723 | } |
724 | |
725 | // static |
726 | bool SymbolParseHelper::ParseInline( |
727 | char* inline_line, |
728 | bool* has_call_site_file_id, |
729 | long* inline_nest_level, |
730 | long* call_site_line, |
731 | long* call_site_file_id, |
732 | long* origin_id, |
733 | vector<std::pair<MemAddr, MemAddr>>* ranges) { |
734 | // Old INLINE format: |
735 | // INLINE <inline_nest_level> <call_site_line> <origin_id> [<address> <size>]+ |
736 | // New INLINE format: |
737 | // INLINE <inline_nest_level> <call_site_line> <call_site_file_id> <origin_id> |
738 | // [<address> <size>]+ |
739 | assert(strncmp(inline_line, "INLINE " , 7) == 0); |
740 | inline_line += 7; // skip prefix |
741 | |
742 | vector<char*> tokens; |
743 | // Increase max_tokens if necessary. |
744 | Tokenize(inline_line, kWhitespace, 512, &tokens); |
745 | |
746 | // Determine the version of INLINE record by parity of the vector length. |
747 | *has_call_site_file_id = tokens.size() % 2 == 0; |
748 | |
749 | // The length of the vector should be at least 5. |
750 | if (tokens.size() < 5) { |
751 | return false; |
752 | } |
753 | |
754 | char* after_number; |
755 | size_t next_idx = 0; |
756 | |
757 | *inline_nest_level = strtol(tokens[next_idx++], &after_number, 10); |
758 | if (!IsValidAfterNumber(after_number) || *inline_nest_level < 0 || |
759 | *inline_nest_level == std::numeric_limits<long>::max()) { |
760 | return false; |
761 | } |
762 | |
763 | *call_site_line = strtol(tokens[next_idx++], &after_number, 10); |
764 | if (!IsValidAfterNumber(after_number) || *call_site_line < 0 || |
765 | *call_site_line == std::numeric_limits<long>::max()) { |
766 | return false; |
767 | } |
768 | |
769 | if (*has_call_site_file_id) { |
770 | *call_site_file_id = strtol(tokens[next_idx++], &after_number, 10); |
771 | // If the file id is -1, it might be an artificial function that doesn't |
772 | // have file id. So, we consider -1 as a valid special case. |
773 | if (!IsValidAfterNumber(after_number) || *call_site_file_id < -1 || |
774 | *call_site_file_id == std::numeric_limits<long>::max()) { |
775 | return false; |
776 | } |
777 | } |
778 | |
779 | *origin_id = strtol(tokens[next_idx++], &after_number, 10); |
780 | if (!IsValidAfterNumber(after_number) || *origin_id < 0 || |
781 | *origin_id == std::numeric_limits<long>::max()) { |
782 | return false; |
783 | } |
784 | |
785 | while (next_idx < tokens.size()) { |
786 | MemAddr address = strtoull(tokens[next_idx++], &after_number, 16); |
787 | if (!IsValidAfterNumber(after_number) || |
788 | address == std::numeric_limits<unsigned long long>::max()) { |
789 | return false; |
790 | } |
791 | MemAddr size = strtoull(tokens[next_idx++], &after_number, 16); |
792 | if (!IsValidAfterNumber(after_number) || |
793 | size == std::numeric_limits<unsigned long long>::max()) { |
794 | return false; |
795 | } |
796 | ranges->push_back({address, size}); |
797 | } |
798 | |
799 | return true; |
800 | } |
801 | |
802 | // static |
803 | bool SymbolParseHelper::ParseFunction(char* function_line, bool* is_multiple, |
804 | uint64_t* address, uint64_t* size, |
805 | long* stack_param_size, char** name) { |
806 | // FUNC [<multiple>] <address> <size> <stack_param_size> <name> |
807 | assert(strncmp(function_line, "FUNC " , 5) == 0); |
808 | function_line += 5; // skip prefix |
809 | |
810 | vector<char*> tokens; |
811 | if (!TokenizeWithOptionalField(function_line, "m" , kWhitespace, 5, &tokens)) { |
812 | return false; |
813 | } |
814 | |
815 | *is_multiple = strcmp(tokens[0], "m" ) == 0; |
816 | int next_token = *is_multiple ? 1 : 0; |
817 | |
818 | char* after_number; |
819 | *address = strtoull(tokens[next_token++], &after_number, 16); |
820 | if (!IsValidAfterNumber(after_number) || |
821 | *address == std::numeric_limits<unsigned long long>::max()) { |
822 | return false; |
823 | } |
824 | *size = strtoull(tokens[next_token++], &after_number, 16); |
825 | if (!IsValidAfterNumber(after_number) || |
826 | *size == std::numeric_limits<unsigned long long>::max()) { |
827 | return false; |
828 | } |
829 | *stack_param_size = strtol(tokens[next_token++], &after_number, 16); |
830 | if (!IsValidAfterNumber(after_number) || |
831 | *stack_param_size == std::numeric_limits<long>::max() || |
832 | *stack_param_size < 0) { |
833 | return false; |
834 | } |
835 | *name = tokens[next_token++]; |
836 | |
837 | return true; |
838 | } |
839 | |
840 | // static |
841 | bool SymbolParseHelper::ParseLine(char* line_line, uint64_t* address, |
842 | uint64_t* size, long* line_number, |
843 | long* source_file) { |
844 | // <address> <size> <line number> <source file id> |
845 | vector<char*> tokens; |
846 | if (!Tokenize(line_line, kWhitespace, 4, &tokens)) { |
847 | return false; |
848 | } |
849 | |
850 | char* after_number; |
851 | *address = strtoull(tokens[0], &after_number, 16); |
852 | if (!IsValidAfterNumber(after_number) || |
853 | *address == std::numeric_limits<unsigned long long>::max()) { |
854 | return false; |
855 | } |
856 | *size = strtoull(tokens[1], &after_number, 16); |
857 | if (!IsValidAfterNumber(after_number) || |
858 | *size == std::numeric_limits<unsigned long long>::max()) { |
859 | return false; |
860 | } |
861 | *line_number = strtol(tokens[2], &after_number, 10); |
862 | if (!IsValidAfterNumber(after_number) || |
863 | *line_number == std::numeric_limits<long>::max()) { |
864 | return false; |
865 | } |
866 | *source_file = strtol(tokens[3], &after_number, 10); |
867 | if (!IsValidAfterNumber(after_number) || *source_file < 0 || |
868 | *source_file == std::numeric_limits<long>::max()) { |
869 | return false; |
870 | } |
871 | |
872 | // Valid line numbers normally start from 1, however there are functions that |
873 | // are associated with a source file but not associated with any line number |
874 | // (block helper function) and for such functions the symbol file contains 0 |
875 | // for the line numbers. Hence, 0 should be treated as a valid line number. |
876 | // For more information on block helper functions, please, take a look at: |
877 | // http://clang.llvm.org/docs/Block-ABI-Apple.html |
878 | if (*line_number < 0) { |
879 | return false; |
880 | } |
881 | |
882 | return true; |
883 | } |
884 | |
885 | // static |
886 | bool SymbolParseHelper::ParsePublicSymbol(char* public_line, bool* is_multiple, |
887 | uint64_t* address, |
888 | long* stack_param_size, |
889 | char** name) { |
890 | // PUBLIC [<multiple>] <address> <stack_param_size> <name> |
891 | assert(strncmp(public_line, "PUBLIC " , 7) == 0); |
892 | public_line += 7; // skip prefix |
893 | |
894 | vector<char*> tokens; |
895 | if (!TokenizeWithOptionalField(public_line, "m" , kWhitespace, 4, &tokens)) { |
896 | return false; |
897 | } |
898 | |
899 | *is_multiple = strcmp(tokens[0], "m" ) == 0; |
900 | int next_token = *is_multiple ? 1 : 0; |
901 | |
902 | char* after_number; |
903 | *address = strtoull(tokens[next_token++], &after_number, 16); |
904 | if (!IsValidAfterNumber(after_number) || |
905 | *address == std::numeric_limits<unsigned long long>::max()) { |
906 | return false; |
907 | } |
908 | *stack_param_size = strtol(tokens[next_token++], &after_number, 16); |
909 | if (!IsValidAfterNumber(after_number) || |
910 | *stack_param_size == std::numeric_limits<long>::max() || |
911 | *stack_param_size < 0) { |
912 | return false; |
913 | } |
914 | *name = tokens[next_token++]; |
915 | |
916 | return true; |
917 | } |
918 | |
919 | // static |
920 | bool SymbolParseHelper::IsValidAfterNumber(char* after_number) { |
921 | if (after_number != NULL && strchr(kWhitespace, *after_number) != NULL) { |
922 | return true; |
923 | } |
924 | return false; |
925 | } |
926 | |
927 | } // namespace google_breakpad |
928 | |