1// Copyright (c) 2010 Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29//
30// basic_source_line_resolver.cc: BasicSourceLineResolver implementation.
31//
32// See basic_source_line_resolver.h and basic_source_line_resolver_types.h
33// for documentation.
34
35#include <assert.h>
36#include <stdio.h>
37#include <string.h>
38#include <sys/types.h>
39#include <sys/stat.h>
40
41#include <limits>
42#include <map>
43#include <memory>
44#include <utility>
45#include <vector>
46
47#include "google_breakpad/processor/basic_source_line_resolver.h"
48#include "processor/basic_source_line_resolver_types.h"
49#include "processor/module_factory.h"
50
51#include "processor/tokenize.h"
52
53using std::deque;
54using std::make_pair;
55using std::map;
56using std::unique_ptr;
57using std::vector;
58
59namespace google_breakpad {
60
61#ifdef _WIN32
62#ifdef _MSC_VER
63#define strtok_r strtok_s
64#endif
65#define strtoull _strtoui64
66#endif
67
68namespace {
69
70// Utility function to tokenize given the presence of an optional initial
71// field. In this case, optional_field is the expected string for the optional
72// field, and max_tokens is the maximum number of tokens including the optional
73// field. Refer to the documentation for Tokenize for descriptions of the other
74// arguments.
75bool TokenizeWithOptionalField(char* line,
76 const char* optional_field,
77 const char* separators,
78 int max_tokens,
79 vector<char*>* tokens) {
80 // First tokenize assuming the optional field is not present. If we then see
81 // the optional field, additionally tokenize the last token into two tokens.
82 if (!Tokenize(line, separators, max_tokens - 1, tokens)) {
83 return false;
84 }
85
86 if (strcmp(tokens->front(), optional_field) == 0) {
87 // The optional field is present. Split the last token in two to recover the
88 // field prior to the last.
89 vector<char*> last_tokens;
90 if (!Tokenize(tokens->back(), separators, 2, &last_tokens)) {
91 return false;
92 }
93 // Replace the previous last token with the two new tokens.
94 tokens->pop_back();
95 tokens->push_back(last_tokens[0]);
96 tokens->push_back(last_tokens[1]);
97 }
98
99 return true;
100}
101
102} // namespace
103
104static const char* kWhitespace = " \r\n";
105static const int kMaxErrorsPrinted = 5;
106static const int kMaxErrorsBeforeBailing = 100;
107
108BasicSourceLineResolver::BasicSourceLineResolver() :
109 SourceLineResolverBase(new BasicModuleFactory) { }
110
111// static
112void BasicSourceLineResolver::Module::LogParseError(
113 const string& message,
114 int line_number,
115 int* num_errors) {
116 if (++(*num_errors) <= kMaxErrorsPrinted) {
117 if (line_number > 0) {
118 BPLOG(ERROR) << "Line " << line_number << ": " << message;
119 } else {
120 BPLOG(ERROR) << message;
121 }
122 }
123}
124
125bool BasicSourceLineResolver::Module::LoadMapFromMemory(
126 char* memory_buffer,
127 size_t memory_buffer_size) {
128 linked_ptr<Function> cur_func;
129 int line_number = 0;
130 int num_errors = 0;
131 int inline_num_errors = 0;
132 char* save_ptr;
133
134 // If the length is 0, we can still pretend we have a symbol file. This is
135 // for scenarios that want to test symbol lookup, but don't necessarily care
136 // if certain modules do not have any information, like system libraries.
137 if (memory_buffer_size == 0) {
138 return true;
139 }
140
141 // Make sure the last character is null terminator.
142 size_t last_null_terminator = memory_buffer_size - 1;
143 if (memory_buffer[last_null_terminator] != '\0') {
144 memory_buffer[last_null_terminator] = '\0';
145 }
146
147 // Skip any null terminators at the end of the memory buffer, and make sure
148 // there are no other null terminators in the middle of the memory buffer.
149 bool has_null_terminator_in_the_middle = false;
150 while (last_null_terminator > 0 &&
151 memory_buffer[last_null_terminator - 1] == '\0') {
152 last_null_terminator--;
153 }
154 for (size_t i = 0; i < last_null_terminator; i++) {
155 if (memory_buffer[i] == '\0') {
156 memory_buffer[i] = '_';
157 has_null_terminator_in_the_middle = true;
158 }
159 }
160 if (has_null_terminator_in_the_middle) {
161 LogParseError(
162 "Null terminator is not expected in the middle of the symbol data",
163 line_number,
164 &num_errors);
165 }
166
167 char* buffer;
168 buffer = strtok_r(memory_buffer, "\r\n", &save_ptr);
169
170 while (buffer != NULL) {
171 ++line_number;
172
173 if (strncmp(buffer, "FILE ", 5) == 0) {
174 if (!ParseFile(buffer)) {
175 LogParseError("ParseFile on buffer failed", line_number, &num_errors);
176 }
177 } else if (strncmp(buffer, "STACK ", 6) == 0) {
178 if (!ParseStackInfo(buffer)) {
179 LogParseError("ParseStackInfo failed", line_number, &num_errors);
180 }
181 } else if (strncmp(buffer, "FUNC ", 5) == 0) {
182 cur_func.reset(ParseFunction(buffer));
183 if (!cur_func.get()) {
184 LogParseError("ParseFunction failed", line_number, &num_errors);
185 } else {
186 // StoreRange will fail if the function has an invalid address or size.
187 // We'll silently ignore this, the function and any corresponding lines
188 // will be destroyed when cur_func is released.
189 functions_.StoreRange(cur_func->address, cur_func->size, cur_func);
190 }
191 } else if (strncmp(buffer, "PUBLIC ", 7) == 0) {
192 // Clear cur_func: public symbols don't contain line number information.
193 cur_func.reset();
194
195 if (!ParsePublicSymbol(buffer)) {
196 LogParseError("ParsePublicSymbol failed", line_number, &num_errors);
197 }
198 } else if (strncmp(buffer, "MODULE ", 7) == 0) {
199 // Ignore these. They're not of any use to BasicSourceLineResolver,
200 // which is fed modules by a SymbolSupplier. These lines are present to
201 // aid other tools in properly placing symbol files so that they can
202 // be accessed by a SymbolSupplier.
203 //
204 // MODULE <guid> <age> <filename>
205 } else if (strncmp(buffer, "INFO ", 5) == 0) {
206 // Ignore these as well, they're similarly just for housekeeping.
207 //
208 // INFO CODE_ID <code id> <filename>
209 } else if (strncmp(buffer, "INLINE ", 7) == 0) {
210 linked_ptr<Inline> in = ParseInline(buffer);
211 if (!in.get())
212 LogParseError("ParseInline failed", line_number, &inline_num_errors);
213 else
214 cur_func->AppendInline(in);
215 } else if (strncmp(buffer, "INLINE_ORIGIN ", 14) == 0) {
216 if (!ParseInlineOrigin(buffer)) {
217 LogParseError("ParseInlineOrigin failed", line_number,
218 &inline_num_errors);
219 }
220 } else {
221 if (!cur_func.get()) {
222 LogParseError("Found source line data without a function",
223 line_number, &num_errors);
224 } else {
225 Line* line = ParseLine(buffer);
226 if (!line) {
227 LogParseError("ParseLine failed", line_number, &num_errors);
228 } else {
229 cur_func->lines.StoreRange(line->address, line->size,
230 linked_ptr<Line>(line));
231 }
232 }
233 }
234 if (num_errors > kMaxErrorsBeforeBailing) {
235 break;
236 }
237 buffer = strtok_r(NULL, "\r\n", &save_ptr);
238 }
239 is_corrupt_ = num_errors > 0;
240 return true;
241}
242
243void BasicSourceLineResolver::Module::ConstructInlineFrames(
244 StackFrame* frame,
245 MemAddr address,
246 const ContainedRangeMap<uint64_t, linked_ptr<Inline>>& inline_map,
247 deque<unique_ptr<StackFrame>>* inlined_frames) const {
248 vector<const linked_ptr<Inline>*> inlines;
249 if (!inline_map.RetrieveRanges(address, inlines)) {
250 return;
251 }
252
253 for (const linked_ptr<Inline>* const in : inlines) {
254 unique_ptr<StackFrame> new_frame =
255 unique_ptr<StackFrame>(new StackFrame(*frame));
256 auto origin = inline_origins_.find(in->get()->origin_id);
257 if (origin != inline_origins_.end()) {
258 new_frame->function_name = origin->second->name;
259 } else {
260 new_frame->function_name = "<name omitted>";
261 }
262
263 // Store call site file and line in current frame, which will be updated
264 // later.
265 new_frame->source_line = in->get()->call_site_line;
266 if (in->get()->has_call_site_file_id) {
267 auto file = files_.find(in->get()->call_site_file_id);
268 if (file != files_.end()) {
269 new_frame->source_file_name = file->second;
270 }
271 }
272
273 // Use the starting address of the inlined range as inlined function base.
274 new_frame->function_base = new_frame->module->base_address();
275 for (const auto& range : in->get()->inline_ranges) {
276 if (address >= range.first && address < range.first + range.second) {
277 new_frame->function_base += range.first;
278 break;
279 }
280 }
281 new_frame->trust = StackFrame::FRAME_TRUST_INLINE;
282
283 // The inlines vector has an order from innermost entry to outermost entry.
284 // By push_back, we will have inlined_frames with the same order.
285 inlined_frames->push_back(std::move(new_frame));
286 }
287
288 // Update the source file and source line for each inlined frame.
289 if (!inlined_frames->empty()) {
290 string parent_frame_source_file_name = frame->source_file_name;
291 int parent_frame_source_line = frame->source_line;
292 frame->source_file_name = inlined_frames->back()->source_file_name;
293 frame->source_line = inlined_frames->back()->source_line;
294 for (unique_ptr<StackFrame>& inlined_frame : *inlined_frames) {
295 std::swap(inlined_frame->source_file_name, parent_frame_source_file_name);
296 std::swap(inlined_frame->source_line, parent_frame_source_line);
297 }
298 }
299}
300
301void BasicSourceLineResolver::Module::LookupAddress(
302 StackFrame* frame,
303 deque<unique_ptr<StackFrame>>* inlined_frames) const {
304 MemAddr address = frame->instruction - frame->module->base_address();
305
306 // First, look for a FUNC record that covers address. Use
307 // RetrieveNearestRange instead of RetrieveRange so that, if there
308 // is no such function, we can use the next function to bound the
309 // extent of the PUBLIC symbol we find, below. This does mean we
310 // need to check that address indeed falls within the function we
311 // find; do the range comparison in an overflow-friendly way.
312 linked_ptr<Function> func;
313 linked_ptr<PublicSymbol> public_symbol;
314 MemAddr function_base;
315 MemAddr function_size;
316 MemAddr public_address;
317 if (functions_.RetrieveNearestRange(address, &func, &function_base,
318 NULL /* delta */, &function_size) &&
319 address >= function_base && address - function_base < function_size) {
320 frame->function_name = func->name;
321 frame->function_base = frame->module->base_address() + function_base;
322 frame->is_multiple = func->is_multiple;
323
324 linked_ptr<Line> line;
325 MemAddr line_base;
326 if (func->lines.RetrieveRange(address, &line, &line_base, NULL /* delta */,
327 NULL /* size */)) {
328 FileMap::const_iterator it = files_.find(line->source_file_id);
329 if (it != files_.end()) {
330 frame->source_file_name = files_.find(line->source_file_id)->second;
331 }
332 frame->source_line = line->line;
333 frame->source_line_base = frame->module->base_address() + line_base;
334 }
335
336 // Check if this is inlined function call.
337 if (inlined_frames) {
338 ConstructInlineFrames(frame, address, func->inlines, inlined_frames);
339 }
340 } else if (public_symbols_.Retrieve(address,
341 &public_symbol, &public_address) &&
342 (!func.get() || public_address > function_base)) {
343 frame->function_name = public_symbol->name;
344 frame->function_base = frame->module->base_address() + public_address;
345 frame->is_multiple = public_symbol->is_multiple;
346 }
347}
348
349WindowsFrameInfo* BasicSourceLineResolver::Module::FindWindowsFrameInfo(
350 const StackFrame* frame) const {
351 MemAddr address = frame->instruction - frame->module->base_address();
352 scoped_ptr<WindowsFrameInfo> result(new WindowsFrameInfo());
353
354 // We only know about WindowsFrameInfo::STACK_INFO_FRAME_DATA and
355 // WindowsFrameInfo::STACK_INFO_FPO. Prefer them in this order.
356 // WindowsFrameInfo::STACK_INFO_FRAME_DATA is the newer type that
357 // includes its own program string.
358 // WindowsFrameInfo::STACK_INFO_FPO is the older type
359 // corresponding to the FPO_DATA struct. See stackwalker_x86.cc.
360 linked_ptr<WindowsFrameInfo> frame_info;
361 if ((windows_frame_info_[WindowsFrameInfo::STACK_INFO_FRAME_DATA]
362 .RetrieveRange(address, &frame_info))
363 || (windows_frame_info_[WindowsFrameInfo::STACK_INFO_FPO]
364 .RetrieveRange(address, &frame_info))) {
365 result->CopyFrom(*frame_info.get());
366 return result.release();
367 }
368
369 // Even without a relevant STACK line, many functions contain
370 // information about how much space their parameters consume on the
371 // stack. Use RetrieveNearestRange instead of RetrieveRange, so that
372 // we can use the function to bound the extent of the PUBLIC symbol,
373 // below. However, this does mean we need to check that ADDRESS
374 // falls within the retrieved function's range; do the range
375 // comparison in an overflow-friendly way.
376 linked_ptr<Function> function;
377 MemAddr function_base, function_size;
378 if (functions_.RetrieveNearestRange(address, &function, &function_base,
379 NULL /* delta */, &function_size) &&
380 address >= function_base && address - function_base < function_size) {
381 result->parameter_size = function->parameter_size;
382 result->valid |= WindowsFrameInfo::VALID_PARAMETER_SIZE;
383 return result.release();
384 }
385
386 // PUBLIC symbols might have a parameter size. Use the function we
387 // found above to limit the range the public symbol covers.
388 linked_ptr<PublicSymbol> public_symbol;
389 MemAddr public_address;
390 if (public_symbols_.Retrieve(address, &public_symbol, &public_address) &&
391 (!function.get() || public_address > function_base)) {
392 result->parameter_size = public_symbol->parameter_size;
393 }
394
395 return NULL;
396}
397
398CFIFrameInfo* BasicSourceLineResolver::Module::FindCFIFrameInfo(
399 const StackFrame* frame) const {
400 MemAddr address = frame->instruction - frame->module->base_address();
401 MemAddr initial_base, initial_size;
402 string initial_rules;
403
404 // Find the initial rule whose range covers this address. That
405 // provides an initial set of register recovery rules. Then, walk
406 // forward from the initial rule's starting address to frame's
407 // instruction address, applying delta rules.
408 if (!cfi_initial_rules_.RetrieveRange(address, &initial_rules, &initial_base,
409 NULL /* delta */, &initial_size)) {
410 return NULL;
411 }
412
413 // Create a frame info structure, and populate it with the rules from
414 // the STACK CFI INIT record.
415 scoped_ptr<CFIFrameInfo> rules(new CFIFrameInfo());
416 if (!ParseCFIRuleSet(initial_rules, rules.get()))
417 return NULL;
418
419 // Find the first delta rule that falls within the initial rule's range.
420 map<MemAddr, string>::const_iterator delta =
421 cfi_delta_rules_.lower_bound(initial_base);
422
423 // Apply delta rules up to and including the frame's address.
424 while (delta != cfi_delta_rules_.end() && delta->first <= address) {
425 ParseCFIRuleSet(delta->second, rules.get());
426 delta++;
427 }
428
429 return rules.release();
430}
431
432bool BasicSourceLineResolver::Module::ParseFile(char* file_line) {
433 long index;
434 char* filename;
435 if (SymbolParseHelper::ParseFile(file_line, &index, &filename)) {
436 files_.insert(make_pair(index, string(filename)));
437 return true;
438 }
439 return false;
440}
441
442bool BasicSourceLineResolver::Module::ParseInlineOrigin(
443 char* inline_origin_line) {
444 bool has_file_id;
445 long origin_id;
446 long source_file_id;
447 char* origin_name;
448 if (SymbolParseHelper::ParseInlineOrigin(inline_origin_line, &has_file_id,
449 &origin_id, &source_file_id,
450 &origin_name)) {
451 inline_origins_.insert(make_pair(
452 origin_id,
453 new InlineOrigin(has_file_id, source_file_id, origin_name)));
454 return true;
455 }
456 return false;
457}
458
459linked_ptr<BasicSourceLineResolver::Inline>
460BasicSourceLineResolver::Module::ParseInline(char* inline_line) {
461 bool has_call_site_file_id;
462 long inline_nest_level;
463 long call_site_line;
464 long call_site_file_id;
465 long origin_id;
466 vector<std::pair<MemAddr, MemAddr>> ranges;
467 if (SymbolParseHelper::ParseInline(inline_line, &has_call_site_file_id,
468 &inline_nest_level, &call_site_line,
469 &call_site_file_id, &origin_id, &ranges)) {
470 return linked_ptr<Inline>(new Inline(has_call_site_file_id,
471 inline_nest_level, call_site_line,
472 call_site_file_id, origin_id, ranges));
473 }
474 return linked_ptr<Inline>();
475}
476
477BasicSourceLineResolver::Function*
478BasicSourceLineResolver::Module::ParseFunction(char* function_line) {
479 bool is_multiple;
480 uint64_t address;
481 uint64_t size;
482 long stack_param_size;
483 char* name;
484 if (SymbolParseHelper::ParseFunction(function_line, &is_multiple, &address,
485 &size, &stack_param_size, &name)) {
486 return new Function(name, address, size, stack_param_size, is_multiple);
487 }
488 return NULL;
489}
490
491BasicSourceLineResolver::Line* BasicSourceLineResolver::Module::ParseLine(
492 char* line_line) {
493 uint64_t address;
494 uint64_t size;
495 long line_number;
496 long source_file;
497
498 if (SymbolParseHelper::ParseLine(line_line, &address, &size, &line_number,
499 &source_file)) {
500 return new Line(address, size, source_file, line_number);
501 }
502 return NULL;
503}
504
505bool BasicSourceLineResolver::Module::ParsePublicSymbol(char* public_line) {
506 bool is_multiple;
507 uint64_t address;
508 long stack_param_size;
509 char* name;
510
511 if (SymbolParseHelper::ParsePublicSymbol(public_line, &is_multiple, &address,
512 &stack_param_size, &name)) {
513 // A few public symbols show up with an address of 0. This has been seen
514 // in the dumped output of ntdll.pdb for symbols such as _CIlog, _CIpow,
515 // RtlDescribeChunkLZNT1, and RtlReserveChunkLZNT1. They would conflict
516 // with one another if they were allowed into the public_symbols_ map,
517 // but since the address is obviously invalid, gracefully accept them
518 // as input without putting them into the map.
519 if (address == 0) {
520 return true;
521 }
522
523 linked_ptr<PublicSymbol> symbol(new PublicSymbol(name, address,
524 stack_param_size,
525 is_multiple));
526 return public_symbols_.Store(address, symbol);
527 }
528 return false;
529}
530
531bool BasicSourceLineResolver::Module::ParseStackInfo(char* stack_info_line) {
532 // Skip "STACK " prefix.
533 stack_info_line += 6;
534
535 // Find the token indicating what sort of stack frame walking
536 // information this is.
537 while (*stack_info_line == ' ')
538 stack_info_line++;
539 const char* platform = stack_info_line;
540 while (!strchr(kWhitespace, *stack_info_line))
541 stack_info_line++;
542 *stack_info_line++ = '\0';
543
544 // MSVC stack frame info.
545 if (strcmp(platform, "WIN") == 0) {
546 int type = 0;
547 uint64_t rva, code_size;
548 linked_ptr<WindowsFrameInfo>
549 stack_frame_info(WindowsFrameInfo::ParseFromString(stack_info_line,
550 type,
551 rva,
552 code_size));
553 if (stack_frame_info == NULL)
554 return false;
555
556 // TODO(mmentovai): I wanted to use StoreRange's return value as this
557 // method's return value, but MSVC infrequently outputs stack info that
558 // violates the containment rules. This happens with a section of code
559 // in strncpy_s in test_app.cc (testdata/minidump2). There, problem looks
560 // like this:
561 // STACK WIN 4 4242 1a a 0 ... (STACK WIN 4 base size prolog 0 ...)
562 // STACK WIN 4 4243 2e 9 0 ...
563 // ContainedRangeMap treats these two blocks as conflicting. In reality,
564 // when the prolog lengths are taken into account, the actual code of
565 // these blocks doesn't conflict. However, we can't take the prolog lengths
566 // into account directly here because we'd wind up with a different set
567 // of range conflicts when MSVC outputs stack info like this:
568 // STACK WIN 4 1040 73 33 0 ...
569 // STACK WIN 4 105a 59 19 0 ...
570 // because in both of these entries, the beginning of the code after the
571 // prolog is at 0x1073, and the last byte of contained code is at 0x10b2.
572 // Perhaps we could get away with storing ranges by rva + prolog_size
573 // if ContainedRangeMap were modified to allow replacement of
574 // already-stored values.
575
576 windows_frame_info_[type].StoreRange(rva, code_size, stack_frame_info);
577 return true;
578 } else if (strcmp(platform, "CFI") == 0) {
579 // DWARF CFI stack frame info
580 return ParseCFIFrameInfo(stack_info_line);
581 } else {
582 // Something unrecognized.
583 return false;
584 }
585}
586
587bool BasicSourceLineResolver::Module::ParseCFIFrameInfo(
588 char* stack_info_line) {
589 char* cursor;
590
591 // Is this an INIT record or a delta record?
592 char* init_or_address = strtok_r(stack_info_line, " \r\n", &cursor);
593 if (!init_or_address)
594 return false;
595
596 if (strcmp(init_or_address, "INIT") == 0) {
597 // This record has the form "STACK INIT <address> <size> <rules...>".
598 char* address_field = strtok_r(NULL, " \r\n", &cursor);
599 if (!address_field) return false;
600
601 char* size_field = strtok_r(NULL, " \r\n", &cursor);
602 if (!size_field) return false;
603
604 char* initial_rules = strtok_r(NULL, "\r\n", &cursor);
605 if (!initial_rules) return false;
606
607 MemAddr address = strtoul(address_field, NULL, 16);
608 MemAddr size = strtoul(size_field, NULL, 16);
609 cfi_initial_rules_.StoreRange(address, size, initial_rules);
610 return true;
611 }
612
613 // This record has the form "STACK <address> <rules...>".
614 char* address_field = init_or_address;
615 char* delta_rules = strtok_r(NULL, "\r\n", &cursor);
616 if (!delta_rules) return false;
617 MemAddr address = strtoul(address_field, NULL, 16);
618 cfi_delta_rules_[address] = delta_rules;
619 return true;
620}
621
622bool BasicSourceLineResolver::Function::AppendInline(linked_ptr<Inline> in) {
623 // This happends if in's parent wasn't added due to a malformed INLINE record.
624 if (in->inline_nest_level > last_added_inline_nest_level + 1)
625 return false;
626
627 last_added_inline_nest_level = in->inline_nest_level;
628
629 // Store all ranges into current level of inlines.
630 for (auto range : in->inline_ranges)
631 inlines.StoreRange(range.first, range.second, in);
632 return true;
633}
634
635// static
636bool SymbolParseHelper::ParseFile(char* file_line, long* index,
637 char** filename) {
638 // FILE <id> <filename>
639 assert(strncmp(file_line, "FILE ", 5) == 0);
640 file_line += 5; // skip prefix
641
642 vector<char*> tokens;
643 if (!Tokenize(file_line, kWhitespace, 2, &tokens)) {
644 return false;
645 }
646
647 char* after_number;
648 *index = strtol(tokens[0], &after_number, 10);
649 if (!IsValidAfterNumber(after_number) || *index < 0 ||
650 *index == std::numeric_limits<long>::max()) {
651 return false;
652 }
653
654 *filename = tokens[1];
655 if (!*filename) {
656 return false;
657 }
658
659 return true;
660}
661
662// static
663bool SymbolParseHelper::ParseInlineOrigin(char* inline_origin_line,
664 bool* has_file_id,
665 long* origin_id,
666 long* file_id,
667 char** name) {
668 // Old INLINE_ORIGIN format:
669 // INLINE_ORIGIN <origin_id> <file_id> <name>
670 // New INLINE_ORIGIN format:
671 // INLINE_ORIGIN <origin_id> <name>
672 assert(strncmp(inline_origin_line, "INLINE_ORIGIN ", 14) == 0);
673 inline_origin_line += 14; // skip prefix
674 vector<char*> tokens;
675 // Split the line into two parts so that the first token is "<origin_id>", and
676 // second token is either "<file_id> <name>"" or "<name>"" depending on the
677 // format version.
678 if (!Tokenize(inline_origin_line, kWhitespace, 2, &tokens)) {
679 return false;
680 }
681
682 char* after_number;
683 *origin_id = strtol(tokens[0], &after_number, 10);
684 if (!IsValidAfterNumber(after_number) || *origin_id < 0 ||
685 *origin_id == std::numeric_limits<long>::max()) {
686 return false;
687 }
688
689 // If the field after origin_id is a number, then it's old format.
690 char* remaining_line = tokens[1];
691 *has_file_id = true;
692 for (size_t i = 0;
693 i < strlen(remaining_line) && remaining_line[i] != ' ' && *has_file_id;
694 ++i) {
695 // If the file id is -1, it might be an artificial function that doesn't
696 // have file id. So, we consider -1 as a valid special case.
697 if (remaining_line[i] == '-' && i == 0) {
698 continue;
699 }
700 *has_file_id = isdigit(remaining_line[i]);
701 }
702
703 if (*has_file_id) {
704 // If it's old format, split "<file_id> <name>" to {"<field_id>", "<name>"}.
705 if (!Tokenize(remaining_line, kWhitespace, 2, &tokens)) {
706 return false;
707 }
708 *file_id = strtol(tokens[0], &after_number, 10);
709 // If the file id is -1, it might be an artificial function that doesn't
710 // have file id. So, we consider -1 as a valid special case.
711 if (!IsValidAfterNumber(after_number) || *file_id < -1 ||
712 *file_id == std::numeric_limits<long>::max()) {
713 return false;
714 }
715 }
716
717 *name = tokens[1];
718 if (!*name) {
719 return false;
720 }
721
722 return true;
723}
724
725// static
726bool SymbolParseHelper::ParseInline(
727 char* inline_line,
728 bool* has_call_site_file_id,
729 long* inline_nest_level,
730 long* call_site_line,
731 long* call_site_file_id,
732 long* origin_id,
733 vector<std::pair<MemAddr, MemAddr>>* ranges) {
734 // Old INLINE format:
735 // INLINE <inline_nest_level> <call_site_line> <origin_id> [<address> <size>]+
736 // New INLINE format:
737 // INLINE <inline_nest_level> <call_site_line> <call_site_file_id> <origin_id>
738 // [<address> <size>]+
739 assert(strncmp(inline_line, "INLINE ", 7) == 0);
740 inline_line += 7; // skip prefix
741
742 vector<char*> tokens;
743 // Increase max_tokens if necessary.
744 Tokenize(inline_line, kWhitespace, 512, &tokens);
745
746 // Determine the version of INLINE record by parity of the vector length.
747 *has_call_site_file_id = tokens.size() % 2 == 0;
748
749 // The length of the vector should be at least 5.
750 if (tokens.size() < 5) {
751 return false;
752 }
753
754 char* after_number;
755 size_t next_idx = 0;
756
757 *inline_nest_level = strtol(tokens[next_idx++], &after_number, 10);
758 if (!IsValidAfterNumber(after_number) || *inline_nest_level < 0 ||
759 *inline_nest_level == std::numeric_limits<long>::max()) {
760 return false;
761 }
762
763 *call_site_line = strtol(tokens[next_idx++], &after_number, 10);
764 if (!IsValidAfterNumber(after_number) || *call_site_line < 0 ||
765 *call_site_line == std::numeric_limits<long>::max()) {
766 return false;
767 }
768
769 if (*has_call_site_file_id) {
770 *call_site_file_id = strtol(tokens[next_idx++], &after_number, 10);
771 // If the file id is -1, it might be an artificial function that doesn't
772 // have file id. So, we consider -1 as a valid special case.
773 if (!IsValidAfterNumber(after_number) || *call_site_file_id < -1 ||
774 *call_site_file_id == std::numeric_limits<long>::max()) {
775 return false;
776 }
777 }
778
779 *origin_id = strtol(tokens[next_idx++], &after_number, 10);
780 if (!IsValidAfterNumber(after_number) || *origin_id < 0 ||
781 *origin_id == std::numeric_limits<long>::max()) {
782 return false;
783 }
784
785 while (next_idx < tokens.size()) {
786 MemAddr address = strtoull(tokens[next_idx++], &after_number, 16);
787 if (!IsValidAfterNumber(after_number) ||
788 address == std::numeric_limits<unsigned long long>::max()) {
789 return false;
790 }
791 MemAddr size = strtoull(tokens[next_idx++], &after_number, 16);
792 if (!IsValidAfterNumber(after_number) ||
793 size == std::numeric_limits<unsigned long long>::max()) {
794 return false;
795 }
796 ranges->push_back({address, size});
797 }
798
799 return true;
800}
801
802// static
803bool SymbolParseHelper::ParseFunction(char* function_line, bool* is_multiple,
804 uint64_t* address, uint64_t* size,
805 long* stack_param_size, char** name) {
806 // FUNC [<multiple>] <address> <size> <stack_param_size> <name>
807 assert(strncmp(function_line, "FUNC ", 5) == 0);
808 function_line += 5; // skip prefix
809
810 vector<char*> tokens;
811 if (!TokenizeWithOptionalField(function_line, "m", kWhitespace, 5, &tokens)) {
812 return false;
813 }
814
815 *is_multiple = strcmp(tokens[0], "m") == 0;
816 int next_token = *is_multiple ? 1 : 0;
817
818 char* after_number;
819 *address = strtoull(tokens[next_token++], &after_number, 16);
820 if (!IsValidAfterNumber(after_number) ||
821 *address == std::numeric_limits<unsigned long long>::max()) {
822 return false;
823 }
824 *size = strtoull(tokens[next_token++], &after_number, 16);
825 if (!IsValidAfterNumber(after_number) ||
826 *size == std::numeric_limits<unsigned long long>::max()) {
827 return false;
828 }
829 *stack_param_size = strtol(tokens[next_token++], &after_number, 16);
830 if (!IsValidAfterNumber(after_number) ||
831 *stack_param_size == std::numeric_limits<long>::max() ||
832 *stack_param_size < 0) {
833 return false;
834 }
835 *name = tokens[next_token++];
836
837 return true;
838}
839
840// static
841bool SymbolParseHelper::ParseLine(char* line_line, uint64_t* address,
842 uint64_t* size, long* line_number,
843 long* source_file) {
844 // <address> <size> <line number> <source file id>
845 vector<char*> tokens;
846 if (!Tokenize(line_line, kWhitespace, 4, &tokens)) {
847 return false;
848 }
849
850 char* after_number;
851 *address = strtoull(tokens[0], &after_number, 16);
852 if (!IsValidAfterNumber(after_number) ||
853 *address == std::numeric_limits<unsigned long long>::max()) {
854 return false;
855 }
856 *size = strtoull(tokens[1], &after_number, 16);
857 if (!IsValidAfterNumber(after_number) ||
858 *size == std::numeric_limits<unsigned long long>::max()) {
859 return false;
860 }
861 *line_number = strtol(tokens[2], &after_number, 10);
862 if (!IsValidAfterNumber(after_number) ||
863 *line_number == std::numeric_limits<long>::max()) {
864 return false;
865 }
866 *source_file = strtol(tokens[3], &after_number, 10);
867 if (!IsValidAfterNumber(after_number) || *source_file < 0 ||
868 *source_file == std::numeric_limits<long>::max()) {
869 return false;
870 }
871
872 // Valid line numbers normally start from 1, however there are functions that
873 // are associated with a source file but not associated with any line number
874 // (block helper function) and for such functions the symbol file contains 0
875 // for the line numbers. Hence, 0 should be treated as a valid line number.
876 // For more information on block helper functions, please, take a look at:
877 // http://clang.llvm.org/docs/Block-ABI-Apple.html
878 if (*line_number < 0) {
879 return false;
880 }
881
882 return true;
883}
884
885// static
886bool SymbolParseHelper::ParsePublicSymbol(char* public_line, bool* is_multiple,
887 uint64_t* address,
888 long* stack_param_size,
889 char** name) {
890 // PUBLIC [<multiple>] <address> <stack_param_size> <name>
891 assert(strncmp(public_line, "PUBLIC ", 7) == 0);
892 public_line += 7; // skip prefix
893
894 vector<char*> tokens;
895 if (!TokenizeWithOptionalField(public_line, "m", kWhitespace, 4, &tokens)) {
896 return false;
897 }
898
899 *is_multiple = strcmp(tokens[0], "m") == 0;
900 int next_token = *is_multiple ? 1 : 0;
901
902 char* after_number;
903 *address = strtoull(tokens[next_token++], &after_number, 16);
904 if (!IsValidAfterNumber(after_number) ||
905 *address == std::numeric_limits<unsigned long long>::max()) {
906 return false;
907 }
908 *stack_param_size = strtol(tokens[next_token++], &after_number, 16);
909 if (!IsValidAfterNumber(after_number) ||
910 *stack_param_size == std::numeric_limits<long>::max() ||
911 *stack_param_size < 0) {
912 return false;
913 }
914 *name = tokens[next_token++];
915
916 return true;
917}
918
919// static
920bool SymbolParseHelper::IsValidAfterNumber(char* after_number) {
921 if (after_number != NULL && strchr(kWhitespace, *after_number) != NULL) {
922 return true;
923 }
924 return false;
925}
926
927} // namespace google_breakpad
928