1// SPDX-FileCopyrightText: 2023 UnionTech Software Technology Co., Ltd.
2//
3// SPDX-License-Identifier: GPL-3.0-or-later
4
5#include <assert.h>
6#include <math.h>
7#include <fnmatch.h>
8#include <limits.h>
9#include <linux/net.h>
10#include <linux/seccomp.h>
11#include <linux/if_tun.h>
12#include <linux/mman.h>
13#include <sys/socket.h>
14#include <sys/types.h>
15#include <sys/ptrace.h>
16
17#if defined(__aarch64__)
18#include "./aarch64/syscall.h"
19#else
20#include <sys/syscall.h>
21#endif
22
23#include <sys/mman.h>
24#include <sys/wait.h>
25#include <sys/un.h>
26
27#include "cpu.h"
28#include "debug.h"
29#include "session.h"
30#include "utils.h"
31#include "event_man.h"
32#include "WaitStatus.h"
33#include "easylogging++.h"
34#include "syscall_name.h"
35#include "syscall_filter.h"
36
37using namespace std;
38
39static int g_nonce = 0;
40
41///////////////////////////////////////////////////////////////////////
42// syscall filter utility
43#if defined(__x86_64__)
44#include "./x86_64/syscall_param.h"
45#elif defined(__mips64)
46#include "./mips64/syscall_param.h"
47#elif defined(__sw_64)
48#include "./sw64/syscall_param.h"
49#elif defined(__aarch64__)
50#include "./aarch64/syscall_param.h"
51#else
52#error need define new arch implement
53#endif
54
55const struct_sysent g_sysent0[] = {
56#if defined(__x86_64__)
57#include "./x86_64/syscallent.h"
58#elif defined(__mips64)
59#include "./mips64/syscallent-n64.h"
60#elif defined(__sw_64)
61#include "./sw64/syscallent.h"
62#elif defined(__aarch64__)
63#include "./aarch64/syscallent.h"
64#else
65#error need define new arch implement
66#endif
67};
68enum SyscallCount{
69 nsyscalls0 = ARRAY_SIZE(g_sysent0)
70};
71
72const unsigned int g_nsyscall_vec[SUPPORTED_PERSONALITIES] = {
73 nsyscalls0
74};
75
76const struct_sysent *const g_sysent_vec[SUPPORTED_PERSONALITIES] = {
77 g_sysent0,
78};
79
80static struct number_set g_trace_set[SUPPORTED_PERSONALITIES];
81
82static void number_setbit(const unsigned int i, number_slot_t *const vec)
83{
84 vec[i / BITS_PER_SLOT] |= (number_slot_t) 1 << (i % BITS_PER_SLOT);
85}
86
87inline bool number_isset(const unsigned int i, const number_slot_t *const vec)
88{
89 return vec[i / BITS_PER_SLOT] & ((number_slot_t) 1 << (i % BITS_PER_SLOT));
90}
91
92static void reallocate_number_set(struct number_set *const set,
93 const unsigned int new_nslots)
94{
95 if (new_nslots <= set->nslots)
96 return;
97 set->vec = (number_slot_t*)realloc(set->vec, new_nslots*sizeof(*set->vec));
98 memset(set->vec + set->nslots, 0,
99 sizeof(*set->vec) * (new_nslots - set->nslots));
100 set->nslots = new_nslots;
101}
102
103static void add_number_to_set(const unsigned int number,
104 struct number_set *const set)
105{
106 reallocate_number_set(set, number / BITS_PER_SLOT + 1);
107 number_setbit(number, set->vec);
108}
109
110static bool is_number_in_set(const unsigned int number, struct number_set* set)
111{
112 if (0 == set[0].nslots) return set[0].not_flag;
113 unsigned int no = number - __NR_Linux;
114 return (((no / BITS_PER_SLOT < set[0].nslots) &&
115 number_isset(no, set[0].vec)) ^ set[0].not_flag);
116}
117
118static bool qualify_syscall_number(const char *s, struct number_set *set)
119{
120 int n = atoi(s);
121 if (n < 0)
122 return false;
123
124 unsigned int p;
125 bool done = false;
126
127 for (p = 0; p < SUPPORTED_PERSONALITIES; ++p) {
128 if ((unsigned) n >= g_nsyscall_vec[p]) {
129 continue;
130 }
131 add_number_to_set(n, &set[p]);
132 done = true;
133 }
134
135 return done;
136}
137
138static unsigned int lookup_class(const char *s)
139{
140 static const struct {
141 const char *name;
142 unsigned int value;
143 } syscall_class[] = {
144 { "desc", TRACE_DESC},
145 { "file", TRACE_FILE},
146 { "memory", TRACE_MEMORY},
147 { "process", TRACE_PROCESS},
148 { "signal", TRACE_SIGNAL},
149 { "ipc", TRACE_IPC},
150 { "network", TRACE_NETWORK},
151 };
152
153 unsigned int i;
154 for (i = 0; i < ARRAY_SIZE(syscall_class); ++i) {
155 if (strcmp(s, syscall_class[i].name) == 0) {
156 return syscall_class[i].value;
157 }
158 }
159
160 return 0;
161}
162
163static bool qualify_syscall_class(const char *s, struct number_set *set)
164{
165 const unsigned int n = lookup_class(s);
166 if (!n)
167 return false;
168
169 unsigned int p;
170 for (p = 0; p < SUPPORTED_PERSONALITIES; ++p) {
171 unsigned int i;
172
173 for (i = 0; i < g_nsyscall_vec[p]; ++i) {
174 if (!g_sysent_vec[p][i].sys_name
175 || (g_sysent_vec[p][i].sys_flags & n) != n) {
176 continue;
177 }
178 add_number_to_set(i, &set[p]);
179 }
180 }
181
182 return true;
183}
184
185static bool qualify_syscall_name(const char *s, struct number_set *set)
186{
187 unsigned int p;
188 bool found = false;
189
190 for (p = 0; p < SUPPORTED_PERSONALITIES; ++p) {
191 unsigned int i;
192
193 for (i = 0; i < g_nsyscall_vec[p]; ++i) {
194 if (!g_sysent_vec[p][i].sys_name
195 || strcmp(s, g_sysent_vec[p][i].sys_name)) {
196 continue;
197 }
198 add_number_to_set(i, &set[p]);
199 found = true;
200 }
201 }
202
203 return found;
204}
205
206static bool qualify_syscall(const char *token, struct number_set *set)
207{
208 if (*token >= '0' && *token <= '9')
209 return qualify_syscall_number(token, set);
210 return qualify_syscall_class(token, set) ||
211 qualify_syscall_name(token, set);
212}
213
214/*
215 * Add syscall numbers to SETs for each supported personality
216 * according to STR specification.
217 */
218static void qualify_syscall_tokens(const char *const str,
219 struct number_set *const set, const char *const name, bool reset)
220{
221 /* Clear all sets. */
222 unsigned int p;
223 if (reset) {
224 for (p = 0; p < SUPPORTED_PERSONALITIES; ++p) {
225 if (set[p].nslots)
226 memset(set[p].vec, 0,
227 sizeof(*set[p].vec) * set[p].nslots);
228 set[p].not_flag = false;
229 }
230 }
231
232 /*
233 * Each leading ! character means inversion
234 * of the remaining specification.
235 */
236 const char *s = str;
237handle_inversion:
238 while (*s == '!') {
239 for (p = 0; p < SUPPORTED_PERSONALITIES; ++p) {
240 set[p].not_flag = !set[p].not_flag;
241 }
242 ++s;
243 }
244
245 if (strcmp(s, "none") == 0) {
246 /*
247 * No syscall numbers are added to sets.
248 * Subsequent is_number_in_set invocations
249 * will return set[p]->not_flag.
250 */
251 return;
252 } else if (strcmp(s, "all") == 0) {
253 s = "!none";
254 goto handle_inversion;
255 }
256
257 /*
258 * Split the string into comma separated tokens.
259 * For each token, call qualify_syscall that will take care
260 * if adding appropriate syscall numbers to sets.
261 * The absence of tokens or a negative return code
262 * from qualify_syscall is a fatal error.
263 */
264 char *copy = strdup(s);
265 char *saveptr = NULL;
266 const char *token;
267 bool done = false;
268
269 for (token = strtok_r(copy, ",", &saveptr); token;
270 token = strtok_r(NULL, ",", &saveptr)) {
271 done = qualify_syscall(token, set);
272 if (!done) {
273 LOG(ERROR) << "invalid " << name << ":" << token;
274 }
275 }
276
277 free(copy);
278
279 if (!done) {
280 LOG(ERROR) << "invalid " << name << ":" << token;
281 }
282}
283
284void set_syscall_filter(const char* filter, bool reset)
285{
286 qualify_syscall_tokens(filter, g_trace_set, "system call", reset);
287}
288
289static void link_exec_file(string& parent_dir, int pid)
290{
291 char exe_path[500];
292 char linkname[500];
293 snprintf(linkname, sizeof(linkname), "/proc/%d/exe", pid);
294 int len = readlink(linkname, exe_path, sizeof(exe_path));
295 if (len < 0) {
296 LOG(ERROR) << "failed to readlink for tracee " << pid
297 << ", errno=" << errno;
298 return;
299 }
300 exe_path[len] = 0;
301
302 snprintf(linkname, sizeof(linkname), "%s%s%d",
303 parent_dir.data(), EXEC_FILE_NAME, pid);
304 FILE* pf = fopen(linkname, "wb");
305 if (pf) {
306 fprintf(pf, "%s", exe_path);
307 fclose(pf);
308 }
309}
310
311///////////////////////////////////////////////////////////////////////
312//ptrace utility
313
314#define CASE(_id) \
315 case _id: \
316 return #_id;
317
318const char* ptrace_event_name(int event) {
319 switch (event) {
320 CASE(PTRACE_EVENT_FORK);
321 CASE(PTRACE_EVENT_VFORK);
322 CASE(PTRACE_EVENT_CLONE);
323 CASE(PTRACE_EVENT_EXEC);
324 CASE(PTRACE_EVENT_VFORK_DONE);
325 CASE(PTRACE_EVENT_EXIT);
326 /* XXX Ubuntu 12.04 defines a "PTRACE_EVENT_STOP", but that
327 * has the same value as the newer EVENT_SECCOMP, so we'll
328 * ignore STOP. */
329 CASE(PTRACE_EVENT_SECCOMP_OBSOLETE);
330 CASE(PTRACE_EVENT_SECCOMP);
331 CASE(PTRACE_EVENT_STOP);
332 /* Special-case this so we don't need to sprintf in this common case.
333 * This case is common because we often pass ptrace_event_name(event) to
334 * assertions when event is 0.
335 */
336 case 0:
337 return "PTRACE_EVENT(0)";
338 default: {
339 static char buf[100];
340 sprintf(buf, "PTRACE_EVENT(%d)", event);
341 return (buf);
342 }
343 }
344}
345
346static int ReadProcess(void* dest, TraceStream* stream, const void* src, int length)
347{
348 if (src == nullptr) return 0;
349
350 int got = stream->read((uintptr_t)src, dest, length);
351 if (got > 0) {
352 return got;
353 }
354
355 return read_mem(stream->get_pid(), (uintptr_t)src, dest, length);
356}
357
358bool inline my_ptrace(const char* reason,
359 enum __ptrace_request request, pid_t pid, void *addr, void *data)
360{
361 int ret = ptrace(request, pid, addr, data);
362 if (ret < 0) {
363 LOG(ERROR) << ptrace_cmd_name(request)
364 << " failed for tracee " << pid
365 << ", reason=" << reason
366 << ", errno=" << errno;
367
368 return false;
369 }
370
371#ifdef _DEBUG
372 LOG(DEBUG) << ptrace_cmd_name(request) << " ok for tracee " << pid
373 << ", reason=" << reason;
374#endif
375
376 return true;
377}
378
379inline long get_syscall_no(USER_REGS* regs)
380{
381#if defined(__x86_64__)
382 return regs->orig_rax;
383#elif defined(__mips__) || defined(__mips64)
384 return regs->v0;
385#elif defined(__sw_64)
386 return regs->v0;
387#elif defined(__aarch64__)
388 return regs->x8;
389#else
390#error Not implment
391#endif
392}
393
394inline long get_syscall_result(USER_REGS* regs)
395{
396#if defined(__x86_64__)
397 return regs->rax;
398#elif defined(__mips__) || defined(__mips64)
399 // $a3 set to 0/1 for success/error
400 return regs->v0;
401#elif defined(__sw_64)
402 // $a3 set to 0/1 for success/error
403 return regs->v0;
404#elif defined(__aarch64__)
405 return regs->x0;
406#else
407#error Not implment
408#endif
409}
410
411static void get_syscall_args(USER_REGS* regs, uintptr_t* args)
412{
413#if defined(__x86_64__)
414 // The kernel interface uses: %rdi, %rsi, %rdx, %r10, %r8 and %r9."
415 args[0] = regs->rdi;
416 args[1] = regs->rsi;
417 args[2] = regs->rdx;
418 args[3] = regs->r10;
419 args[4] = regs->r8;
420 args[5] = regs->r9;
421#elif defined(__mips__) || defined(__mips64)
422 //FIXME: The mips/o32 system call convention passes arguments 5 through 8 on the user
423 //stack.
424 args[0] = regs->a0;
425 args[1] = regs->a1;
426 args[2] = regs->a2;
427 args[3] = regs->a3;
428 args[4] = regs->a4;
429 args[5] = regs->a5;
430#elif defined(__sw_64)
431 args[0] = regs->a0;
432 args[1] = regs->a1;
433 args[2] = regs->a2;
434 args[3] = regs->a3;
435 args[4] = regs->a4;
436 args[5] = regs->a5;
437#elif defined(__aarch64__)
438 args[0] = regs->x0;
439 args[1] = regs->x1;
440 args[2] = regs->x2;
441 args[3] = regs->x3;
442 args[4] = regs->x4;
443 args[5] = regs->x5;
444#else
445#error Not implment
446#endif
447}
448
449///////////////////////////////////////////////////////////////////////////////
450// auxv and elf helper function
451
452#include "elf_parse.h"
453
454bool is_elf(const char* filename)
455{
456 MemoryMappedFile mapped_file(filename, O_RDONLY);
457 if (!mapped_file.data() ||
458 mapped_file.size() < SELFMAG) {
459 return false;
460 }
461 if (!IsValidElf(mapped_file.data())) {
462 return false;
463 }
464
465 int cls = ElfClass(mapped_file.data());
466 if (cls == ELFCLASS32) {
467 return false;
468 }
469 else if (cls == ELFCLASS64) {
470 return true;
471 }
472
473 return false;
474}
475
476static bool GetElfSectionRanges(const char* elf_mapped_base,
477 const char** section_names, int* section_types, int* ranges, int count)
478{
479 const char* addr = NULL;
480 size_t size = 0;
481 for (int i = 0; i<count; ++i) {
482 if (!FindElfSection(elf_mapped_base, section_names[i], section_types[i],
483 (const void**)&addr, &size)) {
484 return false;
485 }
486 ranges[2*i+0] = (int)(addr - elf_mapped_base);
487 ranges[2*i+1] = ranges[2*i] + (int)size;
488 }
489
490 return true;
491}
492
493// Attempt to locate a .note.gnu.build-id section in an ELF binary
494// and copy it into |identifier|.
495static bool FindElfBuildIDNote(const void* elf_mapped_base,
496 uint8_t* identifier)
497{
498 // lld normally creates 2 PT_NOTEs, gold normally creates 1.
499 vector<ElfSegment> segs;
500 if (FindElfSegments(elf_mapped_base, PT_NOTE, &segs)) {
501 for (ElfSegment& seg : segs) {
502 if (ElfClassBuildIDNoteIdentifier(seg.start, seg.size, identifier)) {
503 return true;
504 }
505 }
506 }
507
508 void* note_section;
509 size_t note_size;
510 if (FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
511 (const void**)&note_section, &note_size)) {
512 return ElfClassBuildIDNoteIdentifier(note_section, note_size, identifier);
513 }
514
515 return false;
516}
517
518// Attempt to locate the .text section of an ELF binary and generate
519// a simple hash by XORing the first page worth of bytes into |identifier|.
520static bool HashElfTextSection(const void* elf_mapped_base,
521 uint8_t* identifier)
522{
523 void* text_section;
524 size_t text_size;
525 if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
526 (const void**)&text_section, &text_size) ||
527 text_size == 0) {
528 return false;
529 }
530
531 // Only provide |kMDGUIDSize| bytes to keep identifiers produced by this
532 // function backwards-compatible.
533 memset(identifier, 0, kMDGUIDSize);
534 const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
535 const uint8_t* ptr_end = ptr + std::min(text_size, static_cast<size_t>(4096));
536 while (ptr < ptr_end) {
537 for (unsigned i = 0; i < kMDGUIDSize; i++)
538 identifier[i] ^= ptr[i];
539 ptr += kMDGUIDSize;
540 }
541 return true;
542}
543
544bool ElfFileIdentifierFromMappedFile(const void* base, uint8_t* identifier)
545{
546 // Look for a build id note first.
547 if (FindElfBuildIDNote(base, identifier))
548 return true;
549
550 // Fall back on hashing the first page of the text section.
551 return HashElfTextSection(base, identifier);
552}
553
554static void get_system_info(MDRawSystemInfo* info)
555{
556 memset(info, 0, sizeof(*info));
557
558#if defined(__i386__)
559 info->processor_architecture = MD_CPU_ARCHITECTURE_X86;
560#elif defined(__x86_64__)
561 info->processor_architecture = MD_CPU_ARCHITECTURE_AMD64;
562#elif defined(__arm__)
563 info->processor_architecture = MD_CPU_ARCHITECTURE_ARM;
564#elif defined(__aarch64__)
565 info->processor_architecture = MD_CPU_ARCHITECTURE_ARM64;
566#elif defined(__mips__)
567# if _MIPS_SIM == _ABIO32
568 info->processor_architecture = MD_CPU_ARCHITECTURE_MIPS;
569# elif _MIPS_SIM == _ABI64
570 info->processor_architecture = MD_CPU_ARCHITECTURE_MIPS64;
571# else
572# error "This mips ABI is currently not supported (n32)"
573# endif
574#elif defined(__sw_64)
575 info->processor_architecture = MD_CPU_ARCHITECTURE_SW64;
576#else
577#error "This code has not been ported to your platform yet"
578#endif
579
580 // dump deepin version and Linux kernel version
581 MemoryMappedFile lsb_release("/etc/lsb-release", 0);
582 uint32_t length = lsb_release.size();
583 memcpy(info->lsb_release, lsb_release.data(),
584 (length > sizeof(info->lsb_release)) ?
585 sizeof(info->lsb_release) : length);
586
587 system("uname -a > /tmp/.uname.nux");
588 MemoryMappedFile uname("/tmp/.uname.nux", 0);
589 length = uname.size();
590 memcpy(info->uname, uname.data(),
591 (length > sizeof(info->lsb_release)) ?
592 sizeof(info->lsb_release) : length);
593}
594
595bool EnumerateMappings(vector< shared_ptr<MappingInfo> >& mappings,
596 vector<elf_aux_val_t>& auxv, int pid, VmSegment* stack, VmSegment* heap)
597{
598 char maps_path[NAME_MAX];
599 snprintf(maps_path, sizeof(maps_path), "/proc/%d/maps", pid);
600
601 // linux_gate_loc is the beginning of the kernel's mapping of
602 // linux-gate.so in the process. It doesn't actually show up in the
603 // maps list as a filename, but it can be found using the AT_SYSINFO_EHDR
604 // aux vector entry, which gives the information necessary to special
605 // case its entry when creating the list of mappings.
606 // See http://www.trilithium.com/johan/2005/08/linux-gate/ for more
607 // information.
608 const void* linux_gate_loc =
609 reinterpret_cast<void *>(auxv[AT_SYSINFO_EHDR]);
610 // Although the initial executable is usually the first mapping, it's not
611 // guaranteed (see http://crosbug.com/25355); therefore, try to use the
612 // actual entry point to find the mapping.
613 const void* entry_point_loc = reinterpret_cast<void *>(auxv[AT_ENTRY]);
614
615 const int fd = open(maps_path, O_RDONLY, 0);
616 if (fd < 0)
617 return false;
618
619 unique_ptr<LineReader> line_reader = make_unique<LineReader>(fd);
620
621 const char* line = nullptr;
622 unsigned int line_len = 0;
623 while (line_reader->GetNextLine(&line, &line_len)) {
624 int offset;
625 uintptr_t start_addr, end_addr;
626
627 char* i1 = nullptr;
628 start_addr = strtoul(line, &i1, 16);
629 if (*i1 == '-') {
630 char* i2 = nullptr;
631 end_addr = strtoul(i1+1, &i2, 16);
632 if (*i2 == ' ') {
633 // bool write = (*(i2 + 2) == 'w');
634 bool exec = (*(i2 + 3) == 'x');
635 char* i3 = nullptr;
636 offset = strtol(i2 + 6, &i3, 16/* skip ' rwxp ' */);
637 if (*i3 == ' ') {
638 const char* name = nullptr;
639 // Only copy name if the name is a valid path name, or if
640 // it's the VDSO image.
641 if ((name = strchr(i3, '/')) == nullptr){
642 if (linux_gate_loc &&
643 reinterpret_cast<void*>(start_addr) == linux_gate_loc) {
644 name = kLinuxGateLibraryName;
645 offset = 0;
646 }
647 else if (strstr(i3, "[heap]") != nullptr) {
648 heap->start = start_addr;
649 heap->end = end_addr;
650 }
651 else if (strstr(i3, "[stack]") != nullptr) {
652 stack->start = start_addr;
653 stack->end = end_addr;
654 }
655 }
656 // Merge adjacent mappings into one module, assuming they're a single
657 // library mapped by the dynamic linker. Do this only if their name
658 // matches and either they have the same +x protection flag, or if the
659 // previous mapping is not executable and the new one is, to handle
660 // lld's output (see crbug.com/716484).
661 if (name && !mappings.empty()) {
662 MappingInfo* module = mappings.back().get();
663 if ((start_addr == module->start_addr + module->size) &&
664 (strlen(name) == strlen(module->name)) &&
665 (strncmp(name, module->name, strlen(name)) == 0) &&
666 (offset > module->offset) &&
667 ((exec == module->exec) || (!module->exec && exec))) {
668 module->size = end_addr - module->start_addr;
669 module->exec |= exec;
670 line_reader->PopLine(line_len);
671 continue;
672 }
673 }
674 shared_ptr<MappingInfo> module = make_shared<MappingInfo>();
675 mappings.push_back(module);
676 memset(module.get(), 0, sizeof(MappingInfo));
677 module->start_addr = start_addr;
678 module->size = end_addr - start_addr;
679 module->offset = offset;
680 module->exec = exec;
681 if (name != nullptr) {
682 const unsigned int l = strlen(name);
683 assert(l < sizeof(module->name));
684 memcpy(module->name, name, l);
685 }
686 }
687 }
688 }
689 line_reader->PopLine(line_len);
690 }
691
692 if (entry_point_loc) {
693 for (size_t i = 0; i < mappings.size(); ++i) {
694 auto module = mappings[i];
695
696 // If this module contains the entry-point, and it's not already the first
697 // one, then we need to make it be first. This is because the minidump
698 // format assumes the first module is the one that corresponds to the main
699 // executable (as codified in
700 // processor/minidump.cc:MinidumpModuleList::GetMainModule()).
701 if ((entry_point_loc >= reinterpret_cast<void*>(module->start_addr)) &&
702 (entry_point_loc <
703 reinterpret_cast<void*>(module->start_addr + module->size))) {
704 for (size_t j = i; j > 0; j--) {
705 mappings[j] = mappings[j - 1];
706 }
707 mappings[0] = module;
708 break;
709 }
710 }
711 }
712
713 close(fd);
714
715 return !mappings.empty();
716}
717
718static bool ShouldIncludeMapping(const MappingInfo& mapping)
719{
720 // TODO: should exlucde /etc/ld.so.cache
721 if (mapping.name[0] == 0 || // only want modules with filenames.
722 // Only want to include one mapping per shared lib.
723 // Avoid filtering executable mappings.
724 (mapping.offset != 0 && !mapping.exec) ||
725 mapping.size < 4096) { // too small to get a signature for.
726 return false;
727 }
728
729 return true;
730}
731
732inline void GetMappingEffectivePath(const MappingInfo& mapping,
733 char* file_path, size_t file_path_size)
734{
735 strncpy(file_path, mapping.name, file_path_size);
736
737 // If an executable is mapped from a non-zero offset, this is likely because
738 // the executable was loaded directly from inside an archive file (e.g., an
739 // apk on Android). We try to find the name of the shared object (SONAME) by
740 // looking in the file for ELF sections.
741#if 0
742 bool mapped_from_archive = false;
743 if (mapping.exec && mapping.offset != 0) {
744 LOG(DEBUG) << "Maybe load from an archive file, offset="
745 << mapping.offset << ", file=" << file_path;
746 }
747#endif
748}
749
750/**
751 * Returns the name of the first dynamic library that |exe_file| depends on
752 * that starts with |prefix|, or an empty string if there isn't one or
753 * anything fails.
754 */
755string find_needed_library_starting_with(const string& exe_file,
756 const string& prefix)
757{
758 // load elf file!
759 MemoryMappedFile mapped_file(exe_file.c_str(), O_RDONLY);
760 if (!mapped_file.data() ||
761 mapped_file.size() < SELFMAG) {
762 return string();
763 }
764 if (!IsValidElf(mapped_file.data())) {
765 return string();
766 }
767
768 const char* name = nullptr;
769 int cls = ElfClass(mapped_file.data());
770 if (cls == ELFCLASS32) {
771 name = FindLibClassStartWith<ElfClass32>(mapped_file.data(),
772 mapped_file.size(), prefix.data());
773 }
774 else if (cls == ELFCLASS64) {
775 name = FindLibClassStartWith<ElfClass64>(mapped_file.data(),
776 mapped_file.size(), prefix.data());
777 }
778 if (name) return string(name);
779 return string();
780}
781
782typedef struct tagElfIdInfo{
783 string guid;
784 int rw_offset; // first rw_offset
785 int addrs[4]; //.data and .bss
786}ElfIdInfo;
787static map<string, ElfIdInfo> g_guid_maps;
788
789// Fill the MDRawModule |mod| with information about the provided
790// |mapping|. If |identifier| is non-nullptr, use it instead of calculating
791// a file ID from the mapping.
792bool FillRawModule(const MappingInfo& mapping,
793 ElfSymbolFiles& modules, TraceStream* stream, MDRawModule* mod)
794{
795 memset(mod, 0xcc, sizeof(MDRawModule));
796
797 mod->base_of_image = mapping.start_addr;
798 mod->size_of_image = mapping.size;
799 GetMappingEffectivePath(mapping, mod->file_path, sizeof(mod->file_path));
800
801 if (mapping.exec && modules.find(mapping.name) == modules.end()) {
802 // not cached in current process, but maybe has cached in other process.
803 // FIXME: some module has more than one execute segment,e.g.:
804 /*
805 7ffff7bbc000-7ffff7bd4000 r-xp 00000000 08:02 1449013 /usr/lib/x86_64-linux-gnu/libpthread-2.24.so
806 7ffff7bd4000-7ffff7dd3000 ---p 00018000 08:02 1449013 /usr/lib/x86_64-linux-gnu/libpthread-2.24.so
807 7ffff7dd3000-7ffff7dd9000 r-xp 00217000 08:02 1449013 /usr/lib/x86_64-linux-gnu/libpthread-2.24.so
808 */
809 std::shared_ptr<SymbolFile> helper = make_shared<SymbolFile>(
810 mapping.start_addr, mapping.name);
811 modules.insert(pair<string, shared_ptr<SymbolFile>>(
812 string(&mapping.name[0]), helper));
813 }
814
815 auto it = g_guid_maps.find(mapping.name);
816 if (it != g_guid_maps.end()) {
817 memcpy(mod->guid, it->second.guid.data(), kMDGUIDSize);
818 }
819 else {
820 ElfIdInfo idinfo;
821 // Special-case linux-gate because it's not a real file.
822 // Look for a build id note first.
823 // Fall back on hashing the first page of the text section.
824 if (strcmp(mapping.name, kLinuxGateLibraryName) == 0) {
825 // load linux_gate context.
826 unique_ptr<char []> linux_gate = make_unique<char []>(mapping.size);
827 ReadProcess(linux_gate.get(), stream,
828 reinterpret_cast<const void*>(mapping.start_addr),
829 mapping.size);
830 ElfFileIdentifierFromMappedFile(linux_gate.get(), mod->guid);
831 }
832 // skip kMappedFileUnsafePrefix and kDeletedSuffix file
833 else if ( strstr(mapping.name, kMappedFileUnsafePrefix) ||
834 strstr(mapping.name, kDeletedSuffix)) {
835 return false;
836 }
837 else {
838 // load the whole elf file!
839 MemoryMappedFile mapped_file(mapping.name, 0);
840 if (!mapped_file.data() ||
841 mapped_file.size() < SELFMAG) {
842 return false;
843 }
844 if (!IsValidElf(mapped_file.data())) {
845 return false;
846 }
847 ElfFileIdentifierFromMappedFile(mapped_file.data(), mod->guid);
848
849 int section_types[] = {SHT_PROGBITS, SHT_NOBITS};
850 const char* section_names[] = {".data", ".bss"};
851 memset(&idinfo.addrs[0], 0, sizeof(idinfo.addrs));
852 GetElfSectionRanges((const char*)mapped_file.data(),
853 section_names, section_types, &idinfo.addrs[0],
854 sizeof(section_types)/sizeof(int));
855
856 vector<ElfSegment> segs;
857 if (FindElfSegments(mapped_file.data(), PT_LOAD, &segs)) {
858 idinfo.rw_offset = (int)((const char*)segs[1].start - (char*)mapped_file.data());
859 }
860 else {
861 idinfo.rw_offset = 0;
862 }
863 }
864
865 idinfo.guid = string((char*)(mod->guid), kMDGUIDSize);
866 g_guid_maps.insert(pair<string, ElfIdInfo>(string(&mapping.name[0]), idinfo));
867 }
868
869 return true;
870}
871
872// Write information about the mappings in effect. Because we are using the
873// minidump format, the information about the mappings is pretty limited.
874// Because of this, we also include the full, unparsed, /proc/$x/maps file in
875// another stream in the file.
876bool WriteMappings(ZstdWriter& file,
877 const vector< shared_ptr<MappingInfo> >& mappings,
878 ElfSymbolFiles& modules, TraceStream* stream)
879{
880 const unsigned int num_mappings = mappings.size();
881 unsigned int num_output_mappings = 0;
882
883 for (unsigned int i = 0; i < mappings.size(); ++i) {
884 const MappingInfo& mapping = *mappings[i];
885 if (ShouldIncludeMapping(mapping))
886 num_output_mappings++;
887 }
888
889#ifdef _DEBUG2
890 LOG(DEBUG) << "module count:" << num_output_mappings;
891#endif
892
893 file.write(&num_output_mappings, sizeof(num_output_mappings));
894
895 for (unsigned int i = 0; i < num_mappings; ++i) {
896 const MappingInfo& mapping = *mappings[i];
897 if (!ShouldIncludeMapping(mapping))
898 continue;
899
900 MDRawModule mod;
901 FillRawModule(mapping, modules, stream, &mod);
902 file.write(&mod, sizeof(mod));
903#ifdef _DEBUG2
904 LOG(DEBUG) << "write module:" << mod.file_path
905 << " at " << HEX(mod.base_of_image)
906 << ", size=" << HEX(mod.size_of_image);
907#endif
908 }
909
910 return true;
911}
912
913bool WriteDSODebugStream(ZstdWriter& file, vector<elf_aux_val_t>& auxv, TraceStream* stream)
914{
915 vector<MDRawLinkMap> dso_list;
916 MDRawDebug debug;
917 char* base = nullptr;
918 int phnum = auxv[AT_PHNUM];
919 int size = 0;
920 ElfW(Phdr)* phdr = reinterpret_cast<ElfW(Phdr) *>(auxv[AT_PHDR]);
921 if (!phnum || !phdr) {
922 file.write(&size, sizeof(size));
923 return false;
924 }
925
926 // Assume the program base is at the beginning of the same page as the PHDR
927 base = reinterpret_cast<char *>(reinterpret_cast<uintptr_t>(phdr) & ~0xfff);
928
929 // Search for the program PT_DYNAMIC segment
930 ElfW(Addr) dyn_addr = 0;
931 for (; phnum >= 0; phnum--, phdr++) {
932 ElfW(Phdr) ph;
933 if (sizeof(ph) != ReadProcess(&ph, stream, phdr, sizeof(ph))) {
934 file.write(&size, sizeof(size));
935 return false;
936 }
937
938 // Adjust base address with the virtual address of the PT_LOAD segment
939 // corresponding to offset 0
940 if (ph.p_type == PT_LOAD && ph.p_offset == 0) {
941 base -= ph.p_vaddr;
942 }
943 if (ph.p_type == PT_DYNAMIC) {
944 dyn_addr = ph.p_vaddr;
945 }
946 }
947 if (!dyn_addr) {
948 file.write(&size, sizeof(size));
949 return false;
950 }
951
952 ElfW(Dyn) *dynamic = reinterpret_cast<ElfW(Dyn) *>(dyn_addr + base);
953
954 // The dynamic linker makes information available that helps gdb find all
955 // DSOs loaded into the program. If this information is indeed available,
956 // dump it to a MD_LINUX_DSO_DEBUG stream.
957 // see readelf -d for more information
958 uint32_t dynamic_length = 0;
959 struct r_debug* r_debug_addr = nullptr;
960
961 for (int i = 0; ; ++i) {
962 ElfW(Dyn) dyn;
963 dynamic_length += sizeof(dyn);
964 if (sizeof(dyn) != ReadProcess(&dyn, stream, dynamic + i, sizeof(dyn))) {
965 file.write(&size, sizeof(size));
966 return false;
967 }
968
969 // in gdb: see `info address _r_debug`
970#ifdef __mips__
971 if (dyn.d_tag == DT_MIPS_RLD_MAP) {
972 r_debug_addr = reinterpret_cast<struct r_debug*>(dyn.d_un.d_ptr);
973 ReadProcess(&r_debug_addr, stream, r_debug_addr, sizeof(r_debug_addr));
974 continue;
975 }
976 else if (dyn.d_tag == DT_MIPS_RLD_MAP_REL) {
977 /* NOTE:
978 start from deepin 15.5 SP3, DT_MIPS_RLD_MAP has been changed to DT_MIPS_RLD_MAP_REL
979 And add a new .rld_map section.
980 An alternative description of the classic MIPS RLD_MAP that is usable
981 in a PIE as it stores a relative offset from the address of the tag
982 rather than an absolute address.
983 */
984 char* rld_map = (char*)(dynamic + i) + dyn.d_un.d_ptr;
985 ReadProcess(&r_debug_addr, stream, rld_map, sizeof(r_debug_addr));
986 debug.rld_map = (uint64_t)rld_map;
987 continue;
988#else
989 if (dyn.d_tag == DT_DEBUG) {
990 r_debug_addr = reinterpret_cast<struct r_debug*>(dyn.d_un.d_ptr);
991 continue;
992#endif
993 } else if (dyn.d_tag == DT_NULL) {
994 break;
995 }
996 }
997
998 // The "r_map" field of that r_debug struct contains a linked list of all
999 // loaded DSOs.
1000 // Our list of DSOs potentially is different from the ones in the crashing
1001 // process. So, we have to be careful to never dereference pointers
1002 // directly. Instead, we use ReadProcess() everywhere.
1003 // See <link.h> for a more detailed discussion of the how the dynamic
1004 // loader communicates with debuggers.
1005
1006 // Count the number of loaded DSOs
1007 struct r_debug debug_entry;
1008 if (sizeof(debug_entry) != ReadProcess(&debug_entry,
1009 stream, r_debug_addr, sizeof(debug_entry))) {
1010 file.write(&size, sizeof(size));
1011 return false;
1012 }
1013
1014 LOG(DEBUG) << "load r_debug at: " << r_debug_addr
1015 << ", r_version=" << debug_entry.r_version
1016 << ", r_map=" << debug_entry.r_map
1017 << ", r_brk=" << HEX(debug_entry.r_brk)
1018 << ", r_state=" << HEX(debug_entry.r_state)
1019 << ", loader base :" << HEX(debug_entry.r_ldbase)
1020 << ", _DYNAMIC address :" << dynamic << "," << dynamic_length;
1021
1022 for (struct link_map* ptr = debug_entry.r_map; ptr; ) {
1023 struct link_map map;
1024 if (sizeof(map) != ReadProcess(&map, stream, ptr, sizeof(map))) {
1025 file.write(&size, sizeof(size));
1026 return false;
1027 }
1028
1029 MDRawLinkMap entry;
1030 if (map.l_name) {
1031 ReadProcess(entry.name, stream, map.l_name, sizeof(entry.name) - 1);
1032 }
1033 entry.name[sizeof(entry.name) -1] = 0;
1034 entry.addr = map.l_addr;
1035 entry.ld = reinterpret_cast<uintptr_t>(map.l_ld);
1036 dso_list.push_back(entry);
1037
1038#ifdef _DEBUG2
1039 LOG(DEBUG) << "\tdso:" << HEX(map.l_addr) << ", " << entry.name;
1040#endif
1041
1042 ptr = map.l_next;
1043 }
1044
1045 // Write MD_LINUX_DSO_DEBUG record
1046 debug.version = debug_entry.r_version;
1047 debug.dso_count = dso_list.size();
1048 debug.brk = debug_entry.r_brk;
1049 debug.ldbase = debug_entry.r_ldbase;
1050 debug.dynamic = reinterpret_cast<uintptr_t>(dynamic);
1051
1052 size = sizeof(debug);
1053 file.write(&size, sizeof(size));
1054 file.write(&debug, sizeof(debug));
1055
1056 for (uint32_t i = 0; i<debug.dso_count; ++i) {
1057 file.write(&dso_list[i], sizeof(MDRawLinkMap));
1058 }
1059
1060 vector<char> dso_data;
1061 // The passed-in size to the constructor (above) is only a hint.
1062 // Must call .resize() to do actual initialization of the elements.
1063 dso_data.resize(dynamic_length);
1064 ReadProcess(&dso_data[0], stream, dynamic, dynamic_length);
1065 file.write(&dynamic_length, sizeof(dynamic_length));
1066 file.write(&dso_data[0], dynamic_length);
1067
1068 return debug.dso_count > 0;
1069}
1070
1071///////////////////////////////////////////////////////////////////////////////
1072
1073static void copy_variables(vector<Variable>& src,
1074 vector<TraceProcess::VariableEx>& dest)
1075{
1076 for (auto& v : src) {
1077 TraceProcess::VariableEx v2;
1078 v2.address = 0;
1079 v2.address_ptr = 0;
1080 v2.max_size = v.max_size;
1081 v2.is_pointer = v.is_pointer;
1082 v2.is_loaded = false;
1083 v2.is_pointer_loaded = false;
1084 v2.sym_name = v.sym_name;
1085 dest.push_back(v2);
1086 }
1087}
1088
1089static void align_4bytes (char* buf, int& len)
1090{
1091 int unaligned = (len&3);
1092 if (unaligned) {
1093 memset(buf + len, 0, 4 - unaligned);
1094 len += 4-unaligned;
1095 }
1096}
1097
1098void TraceProcess::process_magic_syscall(pid_t tid,
1099 int syscall_no, uintptr_t* syscall_args)
1100{
1101 int len = 0;
1102 char detail[EVENT_EXTRA_INFO_SIZE];
1103
1104 switch (syscall_no) {
1105 case SYS_dump_x11: {
1106 detail[0] = 0;
1107 if (syscall_args[1] && syscall_args[2] < sizeof(detail)-4) {
1108 len = ReadProcess(&detail[0], &m_ctx_stream,
1109 reinterpret_cast<const void*>(syscall_args[1]), syscall_args[2]);
1110 align_4bytes(detail, len);
1111 }
1112
1113 dump_event(DUMP_REASON_x11 + (char)(syscall_args[0]), tid, detail, len);
1114 LOG(DEBUG) << "X11-" << syscall_args[0] << ":" << &detail[0];
1115 }
1116 break;
1117
1118 case SYS_dump_dbus: {
1119 detail[0] = 0;
1120 if (syscall_args[1] && syscall_args[2] < sizeof(detail)-4) {
1121 len = ReadProcess(&detail[0], &m_ctx_stream,
1122 reinterpret_cast<const void*>(syscall_args[1]), syscall_args[2]);
1123 align_4bytes(detail, len);
1124 }
1125
1126 dump_event(DUMP_REASON_dbus + (char)(syscall_args[0]), tid, detail, len);
1127 LOG(DEBUG) << "DBUS-" << syscall_args[0] << ":" << &detail[0];
1128 }
1129 break;
1130 default:
1131 break;
1132 }
1133}
1134
1135// @return: true, trace syscall-exit-stop;
1136// false, skip syscall-exit-stop;
1137bool TraceProcess::process_syscall_enter(pid_t tid,
1138 int syscall_no, uintptr_t* syscall_args)
1139{
1140 if (SYS_restart_syscall == syscall_no) return true;
1141 LOG(DEBUG) << "(SYSCALL " << syscall_name(syscall_no)
1142 << ") enter for tracee " << tid;
1143
1144 ++m_counter.syscall_enter;
1145
1146 if (syscall_no >= SYS_init_buffers) {
1147 process_magic_syscall(tid, syscall_no, syscall_args);
1148 return true;
1149 }
1150
1151 switch (syscall_no) {
1152 case SYS_fork:
1153#if defined(__x86_64__)
1154 case SYS_vfork:
1155#endif
1156 case SYS_clone:
1157 return prepare_clone(tid, syscall_no, syscall_args) >= 0;
1158 default:
1159 if (syscall_no < __NR_Linux) {
1160 if (89 == syscall_no) {
1161 // on 3.10.0 #1 SMP PREEMPT Fri Dec 14 15:20:30 CST 2018 00022-g5b64ed3 mips64
1162 // SYS_dump_x11 and SYS_dump_dbus will got 89
1163 // and syscall_args[3] always set to 1, indicate syscall failed.
1164 syscall_no = (1 == syscall_args[4]) ? SYS_dump_x11 : SYS_dump_dbus;
1165 process_magic_syscall(tid, syscall_no, syscall_args);
1166 }
1167 else {
1168 LOG(WARNING) << "(SYSCALL " << syscall_name(syscall_no)
1169 << " < __NR_Linux";
1170 }
1171 return false;
1172 }
1173 else if (is_number_in_set(syscall_no, &g_trace_set[0])) {
1174 dump_event(syscall_no, tid, syscall_args, 0);
1175 }
1176 return true;
1177 }
1178}
1179
1180bool TraceProcess::process_syscall_exit(pid_t tid,
1181 int syscall_no, uintptr_t* syscall_args, long syscall_result)
1182{
1183 bool dump = true;
1184 if (SYS_restart_syscall == syscall_no) return false;
1185 ++m_counter.syscall_exit;
1186 LOG(DEBUG) << "(SYSCALL " << syscall_name(syscall_no)
1187 << ") exit for tracee " << tid
1188 << ", result=" << syscall_result
1189 << ", continue=" << m_cont_type;
1190
1191 if (syscall_no >= SYS_init_buffers) {
1192 // ignore magic syscall
1193 ptrace(PTRACE_POKEUSER, tid, sizeof(long)*SYSCALL_RESULT_INDEX, 0);
1194 return false;
1195 }
1196
1197 switch (syscall_no) {
1198 case SYS_clone:
1199 // FIXME: It seems no need call PTRACE_SYSCALL after SYS_clone?
1200 /*
1201 if (syscall_result > 0) {
1202 my_ptrace("SYS_clone_succeed", PTRACE_SYSCALL, syscall_result, 0, 0);
1203 }*/
1204 break;
1205 case SYS_execve:
1206 // see PTRACE_EVENT_EXEC
1207 dump = false;
1208 break;
1209 case SYS_brk:
1210 dump_maps(syscall_no, 0);
1211 break;
1212 case SYS_mmap:
1213 if (syscall_result >= 0) {
1214 // NOTE: here we can't ignore anonymous map, or will cause call stack
1215 // back-trace not work in core file, because some thread stack or parameter
1216 // buffer in anonymous map;
1217 dump_maps(syscall_no, syscall_args[2]);
1218 }
1219 break;
1220 case SYS_mremap:
1221 // case SYS_munmap:
1222 if (syscall_result >= 0) {
1223 dump_maps(syscall_no, 0);
1224 }
1225 break;
1226 case SYS_mprotect:
1227 break;
1228 default:
1229 if (syscall_no < __NR_Linux) {
1230 dump = false;
1231 }
1232 break;
1233 }
1234
1235 if (dump && is_number_in_set(syscall_no, &g_trace_set[0])) {
1236 dump_event(DUMP_REASON_syscall_exit+syscall_no,
1237 tid, &syscall_result, sizeof(syscall_result));
1238 }
1239
1240 return dump;
1241}
1242
1243int TraceProcess::prepare_clone(pid_t tid, int syscall_no, uintptr_t* syscall_args)
1244{
1245 uintptr_t flags;
1246 int ptrace_event;
1247 int termination_signal = SIGCHLD;
1248
1249 if (syscall_no == SYS_clone) {
1250 flags = syscall_args[0];
1251 if (flags & CLONE_UNTRACED) {
1252 assert(0 && "should remove CLONE_UNTRACED");
1253 }
1254 termination_signal = flags & 0xff;
1255 if (flags & CLONE_VFORK) {
1256 ptrace_event = PTRACE_EVENT_VFORK;
1257 } else if (termination_signal == SIGCHLD) {
1258 ptrace_event = PTRACE_EVENT_FORK;
1259 } else {
1260 ptrace_event = PTRACE_EVENT_CLONE;
1261 }
1262#if defined(__x86_64__)
1263 } else if (syscall_no == SYS_vfork) {
1264 ptrace_event = PTRACE_EVENT_VFORK;
1265 flags = CLONE_VM | CLONE_VFORK | SIGCHLD;
1266#endif
1267 } else {
1268 ptrace_event = PTRACE_EVENT_FORK;
1269 flags = SIGCHLD;
1270 }
1271
1272 if (is_number_in_set(syscall_no, &g_trace_set[0])) {
1273 dump_event(syscall_no, tid, syscall_args, 0);
1274 }
1275
1276 if (!my_ptrace("SYS_clone_execute", PTRACE_SYSCALL, tid, 0, 0)) {
1277 ++m_counter.ptrace_error;
1278 return -1;
1279 }
1280
1281 WaitStatus status;
1282 int raw_status = 0;
1283 for (;;) {
1284 pid_t ret = waitpid(tid, &raw_status, __WALL);
1285 if (tid == ret) {
1286 status = WaitStatus(raw_status);
1287
1288 if (WaitStatus::SYSCALL_STOP == status.type()) {
1289 // clone failed
1290 long syscall_result = ptrace(PTRACE_PEEKUSER,
1291 tid, sizeof(long)*SYSCALL_RESULT_INDEX, NULL);
1292 process_syscall_exit(tid, syscall_no, syscall_args, syscall_result);
1293 LOG(ERROR) << "clone failed for tracee " << tid;
1294 return -2;
1295 }
1296
1297 //NOTE: PTRACE_EVENT_SECCOMP will arrived before PTRACE_EVENT_CLONE
1298 // (PTRACE_EVENT_VFORK, PTRACE_EVENT_FORK) if enable seccom.
1299 if (PTRACE_EVENT_SECCOMP != status.ptrace_event()) {
1300 break;
1301 }
1302 } else if (tid < 0) {
1303 int wait_errno = errno;
1304 if ((wait_errno == ECHILD) && m_syscall_state.empty()) {
1305 break;
1306 }
1307 continue;
1308 }
1309
1310 assert(m_syscall_state.find(ret) != m_syscall_state.end());
1311 if (!process_status(raw_status, ret))
1312 return -3;
1313 }
1314
1315 if (WaitStatus::PTRACE_EVENT != status.type()) {
1316 LOG(ERROR) << "Not found PTRACE_EVENT for tracee " << tid
1317 << ", status=" << HEX(raw_status)
1318 << ", type=" <<status.type();
1319 return -4;
1320 }
1321
1322 if (ptrace_event != status.ptrace_event()) {
1323 LOG(WARNING) << "Got " << ptrace_event_name(status.ptrace_event())
1324 << " for " << tid
1325 << " but require " << ptrace_event_name(ptrace_event);
1326 assert(0);
1327 }
1328
1329 // Ideally we'd just use t->get_ptrace_eventmsg_pid() here, but
1330 // kernels failed to translate that value from other pid namespaces to
1331 // our pid namespace until June 2014:
1332 // https://github.com/torvalds/linux/commit/4e52365f279564cef0ddd41db5237f0471381093
1333 pid_t new_tid = 0;
1334 my_ptrace("query_cloned_tid", PTRACE_GETEVENTMSG, tid, nullptr, &new_tid);
1335 if (new_tid > 0) {
1336 if (flags & CLONE_THREAD) {
1337 if (m_syscall_state.find(new_tid) == m_syscall_state.end()) {
1338 add_thread(new_tid);
1339 LOG(INFO) << new_tid << " newborn thread after clone >>>";
1340 }
1341 } else {
1342 if (nullptr == m_parent->get_process(new_tid)) {
1343 shared_ptr<TraceProcess> child = make_shared<TraceProcess>(this);
1344 m_parent->add_process(child, new_tid);
1345 LOG(INFO) << new_tid << " newborn process after clone >>>";
1346 }
1347 }
1348 }
1349
1350 return 0;
1351}
1352
1353void TraceProcess::dump_debugger_count(void)
1354{
1355 LOG(INFO) << "\tNumber of ptrace error:" << m_counter.ptrace_error;
1356 LOG(INFO) << "\tNumber of pread error:" << m_ctx_stream.error_count();
1357 LOG(INFO) << "\tNumber of seccomp stop:" << m_counter.seccomp_stop;
1358 LOG(INFO) << "\tNumber of syscall-enter stop:" << m_counter.syscall_enter;
1359 LOG(INFO) << "\tNumber of syscall-exit stop:" << m_counter.syscall_exit;
1360 LOG(INFO) << "\tNumber of syscall-exit miss:" << m_counter.syscall_exit_miss;
1361 LOG(INFO) << "\tNumber of maps changed:" << m_counter.maps_change;
1362 if (m_syscall_dumper) {
1363 LOG(INFO) << "\tNumber of flush buffer:" << m_counter.flush_buffer;
1364 LOG(INFO) << "\tNumber of syscall hooked:" << m_syscall_dumper->syscall_count;
1365 LOG(INFO) << "\tNumber of events dumped:"
1366 << m_syscall_dumper->syscall_count + m_counter.event_dumped;
1367 if (m_counter.hook_dumped > 1024*1024) {
1368 LOG(INFO) << "\tSyscall hook bytes:"
1369 << m_counter.hook_dumped/(1024.0*1024.0) << "MB";
1370 }
1371 else {
1372 LOG(INFO) << "\tSyscall hook bytes:"
1373 << m_counter.hook_dumped/(1024.0) << "KB";
1374 }
1375 }
1376 else {
1377 LOG(INFO) << "\tNumber of events dumped:" << m_counter.event_dumped;
1378 }
1379
1380 if (m_counter.total_dumped > 1024*1024) {
1381 LOG(INFO) << "\tTotal dump bytes:"
1382 << m_counter.total_dumped/(1024.0*1024.0) << "MB";
1383 }
1384 else {
1385 LOG(INFO) << "\tTotal dump bytes:"
1386 << m_counter.total_dumped/(1024.0) << "KB";
1387 }
1388}
1389
1390void TraceProcess::reset_debugger_count(void)
1391{
1392 m_counter.syscall_enter = 0;
1393 m_counter.syscall_exit = 0;
1394 m_counter.syscall_exit_miss = 0;
1395 m_counter.seccomp_stop = 0;
1396 m_counter.flush_buffer = 0;
1397 m_counter.maps_change = 0;
1398 m_counter.ptrace_error = 0;
1399 m_counter.event_dumped = 0;
1400 m_counter.hook_dumped = 0;
1401 m_counter.total_dumped = 0;
1402}
1403
1404TraceProcess::TraceProcess(DumpConfig* config)
1405{
1406 m_parent = this;
1407 m_can_dump = false;
1408 m_exec_stop = false;
1409 m_syscall_dumper = nullptr;
1410 m_breakpoint.address = 0;
1411
1412 reset_debugger_count();
1413
1414 m_cfg = config;
1415 copy_variables(m_cfg->vars, m_global_vars);
1416
1417 m_page_size = sysconf(_SC_PAGESIZE);
1418
1419 m_send_socket = 0;
1420
1421 LOG(INFO) << "create root process " << this;
1422}
1423
1424TraceProcess::TraceProcess(TraceProcess* parent)
1425{
1426 m_parent = parent;
1427 m_can_dump = false;
1428 m_exec_stop = false;
1429 m_syscall_dumper = nullptr;
1430 m_breakpoint.address = 0;
1431
1432 reset_debugger_count();
1433
1434 m_cfg = parent->get_config();
1435 copy_variables(m_cfg->vars, m_global_vars);
1436
1437 m_page_size = sysconf(_SC_PAGESIZE);
1438
1439 m_send_socket = -1;
1440 m_recv_socket = -1;
1441 m_server_socket = -1;
1442 m_recv_fd = -1;
1443
1444 LOG(INFO) << "create child process " << this;
1445}
1446
1447TraceProcess::~TraceProcess(void)
1448{
1449 if (m_recv_socket > 0) {
1450 close(m_recv_socket);
1451 }
1452 if (m_send_socket > 0) {
1453 close(m_send_socket);
1454 }
1455
1456 LOG(INFO) << "destroy process " << this << ", for tracee " << m_pid;
1457}
1458
1459void TraceProcess::get_share_name(uintptr_t name)
1460{
1461 char path[256];
1462 int len = snprintf(path, sizeof(path)-1,
1463 SHARED_FILE_NAME, m_pid, m_nonce);
1464
1465 write_mem(m_pid, name, &path[0], len + 1);
1466 LOG(INFO) << "pass shared file:" << path;
1467}
1468
1469int TraceProcess::get_shared_buffers(char** ptr)
1470{
1471 if (m_syscall_dumper) {
1472 *ptr = BUFFER_HEAD(m_syscall_dumper);
1473 return m_syscall_dumper->current;
1474 }
1475
1476 return 0;
1477}
1478
1479void TraceProcess::init_shared_buffers(int size)
1480{
1481 int fd = -1;
1482 int flags = MAP_SHARED;
1483 char shared_file[256];
1484 snprintf(shared_file, sizeof(shared_file) - 1,
1485 SHARED_FILE_NAME, m_pid, m_nonce);
1486 fd = open(shared_file, O_RDWR | O_CLOEXEC, 0600);
1487 if (fd <= 0) {
1488 LOG(FATAL) << "Failed to open shmem " << shared_file
1489 << ", errno=" << errno
1490 << ", for tracee " << m_pid;
1491 return;
1492 }
1493 if (ftruncate(fd, size)) {
1494 close(fd);
1495 LOG(FATAL) << "Failed to resize shmem to " << size
1496 << ", errno=" << errno
1497 << ", for tracee " << m_pid;
1498 return;
1499 }
1500
1501 void* map_addr = mmap(NULL, size,
1502 PROT_READ | PROT_WRITE, flags, fd, 0);
1503 if (MAP_FAILED == map_addr) {
1504 close(fd);
1505 LOG(FATAL) << "Failed to mmap shmem region, errno=" << errno
1506 << ", for tracee " << m_pid;
1507 return;
1508 }
1509
1510 m_syscall_dumper = reinterpret_cast<MemoryDumper *>(map_addr);
1511 m_syscall_dumper->size = size - sizeof(MemoryDumper);
1512 m_syscall_dumper->current = 0;
1513 m_syscall_dumper->page_size = m_page_size;
1514 m_syscall_dumper->max_stack_size = m_cfg->max_stack_size;
1515 m_syscall_dumper->stack_begin = 0;
1516 m_syscall_dumper->stack_end = 0;
1517 m_syscall_dumper->max_param_size = m_cfg->max_param_size;
1518
1519 // pass syscall filter set to syscall preload
1520 m_syscall_dumper->syscall.nslots = g_trace_set[0].nslots;
1521 m_syscall_dumper->syscall.not_flag = g_trace_set[0].not_flag;
1522 char* data = BUFFER_HEAD(m_syscall_dumper);
1523 if (g_trace_set[0].nslots > 0) {
1524 int size = sizeof(number_slot_t) * g_trace_set[0].nslots;
1525 memcpy(data, g_trace_set[0].vec, size);
1526 data += size;
1527 }
1528
1529 // pass syscall parameters to syscall preload
1530 *(int*)data = sizeof(syscall_param_flags);
1531 memcpy(data + sizeof(int), syscall_param_flags, sizeof(syscall_param_flags));
1532 close(fd);
1533
1534 LOG(INFO) << "init_shared_buffers create dumper=" << m_syscall_dumper
1535 << " for tracee " << m_pid;
1536}
1537
1538void TraceProcess::post_exec_syscall(pid_t tid)
1539{
1540 (void)tid;
1541 if (m_cfg->mode != DRY_RUN) {
1542 m_can_dump = true;
1543 link_exec_file(m_cfg->dump_dir, m_pid);
1544 dump_maps(SYS_execve, PROT_EXEC|PROT_WRITE);
1545 if (!m_cfg->break_function.empty()) {
1546 //wait the specified function resolved and called!
1547 m_can_dump = false;
1548 }
1549 }
1550}
1551
1552struct ps_prochandle
1553{
1554 pid_t pid;
1555 TraceProcess* process;
1556 TraceStream* stream;
1557 vector<VmSegment>* segs;
1558};
1559
1560typedef enum tag_ps_err_e
1561{
1562 PS_OK, /* Generic "call succeeded". */
1563 PS_ERR, /* Generic error. */
1564 PS_BADPID, /* Bad process handle. */
1565 PS_BADLID, /* Bad LWP identifier. */
1566 PS_BADADDR, /* Bad address. */
1567 PS_NOSYM, /* Could not find given symbol. */
1568 PS_NOFREGS /* FPU register set not available for given LWP. */
1569} ps_err_e;
1570
1571extern "C" {
1572#include <thread_db.h>
1573// these ps_xxx NEED by libthread_db.so.1
1574ps_err_e ps_pdread(struct ps_prochandle *ph,
1575 psaddr_t addr, void *buf, size_t size)
1576{
1577 if (ReadProcess(buf, ph->stream, addr, size) != (int)size) {
1578 LOG(ERROR) << "ps_pdread failed to read " << size
1579 << " bytes from " << (addr);
1580 return PS_ERR;
1581 }
1582
1583 /* whatever td_ta_thr_iter() reads, dump to core */
1584 VmSegment vm;
1585 vm.start = reinterpret_cast<uintptr_t>(addr);
1586 vm.end = reinterpret_cast<uintptr_t>(addr) + size;
1587 ph->segs->push_back(vm);
1588
1589 return PS_OK;
1590}
1591
1592ps_err_e ps_pdwrite(struct ps_prochandle *ph,
1593 psaddr_t addr, const void *buf, size_t size)
1594{
1595 (void)ph;
1596 (void)addr;
1597 (void)buf;
1598 (void)size;
1599 /* NOP */
1600 return PS_OK;
1601}
1602
1603ps_err_e ps_lgetregs(struct ps_prochandle *ph,
1604 lwpid_t lwpid, prgregset_t prgregset)
1605{
1606 (void)ph;
1607 (void)lwpid;
1608 (void)prgregset;
1609 /* NOP */
1610 return PS_OK;
1611}
1612
1613ps_err_e ps_lsetregs(struct ps_prochandle *ph,
1614 lwpid_t lwpid, const prgregset_t prgregset)
1615{
1616 (void)ph;
1617 (void)lwpid;
1618 (void)prgregset;
1619 /* NOP */
1620 return PS_OK;
1621}
1622
1623ps_err_e ps_lgetfpregs(struct ps_prochandle *ph,
1624 lwpid_t lwpid, prfpregset_t *prfpregset)
1625{
1626 (void)ph;
1627 (void)lwpid;
1628 (void)prfpregset;
1629 /* NOP */
1630 return PS_OK;
1631}
1632
1633ps_err_e ps_lsetfpregs(struct ps_prochandle *ph,
1634 lwpid_t lwpid, const prfpregset_t *prfpregset)
1635{
1636 (void)ph;
1637 (void)lwpid;
1638 (void)prfpregset;
1639 /* NOP */
1640 return PS_OK;
1641}
1642
1643pid_t ps_getpid(struct ps_prochandle *ph)
1644{
1645 return ph->pid;
1646}
1647
1648ps_err_e ps_pglobal_lookup(struct ps_prochandle *ph,
1649 const char *object_name, const char *sym_name, psaddr_t *sym_addr)
1650{
1651 unsigned long addr;
1652
1653 if (!ph->process->get_sym_address(object_name, sym_name, &addr)) {
1654 LOG(DEBUG) << __FUNCTION__ << " failed for:" << object_name << ", " << sym_name;
1655 return PS_NOSYM;
1656 }
1657 LOG(DEBUG) << __FUNCTION__ << " ok for:" << object_name << ", " << sym_name;
1658
1659 *sym_addr = (psaddr_t)addr;
1660
1661 return PS_OK;
1662}
1663} /* end extern "C" */
1664
1665static int find_pthreads_cb(const td_thrhandle_t *th, void *cb_data)
1666{
1667 (void)cb_data;
1668 /* Get thread info, in order to access (and dump) data that
1669 gdb/libthread_db needs. */
1670 td_thrinfo_t thinfo;
1671 td_thr_get_info(th, &thinfo);
1672
1673 return TD_OK;
1674}
1675
1676#if defined(__x86_64__)
1677#define PTHREAD_LIB "/usr/lib/x86_64-linux-gnu/libpthread-2.24.so"
1678#elif defined(__mips64)
1679#define PTHREAD_LIB "/lib/mips64el-linux-gnuabi64/libpthread-2.23.so"
1680#elif defined(__sw_64)
1681#define PTHREAD_LIB "/lib/libpthread-2.23.so"
1682#elif defined(__aarch64__)
1683#define PTHREAD_LIB "/lib/libpthread-2.23.so"
1684#else
1685#error need define PTHREAD_LIB
1686#endif
1687
1688int TraceProcess::dump_thread_list(pid_t tid)
1689{
1690 struct ps_prochandle ph;
1691 td_thragent_t *ta;
1692 td_err_e err;
1693
1694 (void)tid;
1695
1696 if (m_symbols.find(PTHREAD_LIB) == m_symbols.end()) {
1697 return 0;
1698 }
1699
1700 ph.pid = m_pid;
1701 ph.process = this;
1702 ph.stream = &m_ctx_stream;
1703 ph.segs = &m_syscall_memblks;
1704
1705 err = td_ta_new(&ph, &ta);
1706 if (err == TD_OK) {
1707 err = td_ta_thr_iter(ta, find_pthreads_cb, NULL,
1708 TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY,
1709 TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS);
1710
1711 td_ta_delete(ta);
1712
1713 LOG(DEBUG) << "dump_thread_list count:" << ph.segs->size();
1714 }
1715
1716 if (err == TD_NOLIBTHREAD) {
1717 LOG(DEBUG) << "target does not appear to be multi-threaded for tracee " << m_pid;
1718 } else if (err != TD_OK) {
1719 LOG(WARNING) << "FIXME: libthread_db not found, using fallback: " << err
1720 << " for tracee " << m_pid;
1721 // TODO: fallback to call get_pthread_list_fallback(di);
1722 }
1723
1724 return 0;
1725}
1726
1727int TraceProcess::dump_thread_status(pid_t tid)
1728{
1729 char maps_path[NAME_MAX];
1730 snprintf(maps_path, sizeof(maps_path), "/proc/%d/status", tid);
1731
1732 const int fd = open(maps_path, O_RDONLY, 0);
1733 if (fd < 0)
1734 return false;
1735
1736 unique_ptr<LineReader> line_reader = make_unique<LineReader>(fd);
1737
1738 const char* line = nullptr;
1739 unsigned int line_len = 0;
1740 int i = 0;
1741 while ((i < 8) && line_reader->GetNextLine(&line, &line_len)) {
1742 LOG(DEBUG) << "\t" << line;
1743 ++i;
1744 line_reader->PopLine(line_len);
1745 }
1746
1747 close(fd);
1748
1749 return true;
1750}
1751
1752int TraceProcess::dump_thread_context(ThreadContext* ctx)
1753{
1754#if 1
1755 (void)ctx;
1756#else
1757 struct user_desc desc;
1758
1759 ctx->tls.clear();
1760
1761 errno = 0;
1762
1763#if defined(__x86_64__)
1764#define GDT_ENTRY_TLS_MIN 12
1765#define GDT_ENTRY_TLS_MAX 14
1766#else
1767#error need to define new GDT_ENTRY_TLS_MAX, GDT_ENTRY_TLS_MIN
1768#endif
1769
1770 // TODO: read tls by ptrace does not work, the follow code just read user_desc.
1771 // We should dump the .tbss section.
1772 // [19] .tbss NOBITS 0000000000201dc8 001dc8 000004 00 WAT 0 0 4
1773 for (int i = GDT_ENTRY_TLS_MIN; i <= GDT_ENTRY_TLS_MAX; ++i) {
1774 long ret = ptrace(
1775 static_cast<__ptrace_request>(PTRACE_GET_THREAD_AREA),
1776 tid, i, &desc);
1777 if (ret < 0) {
1778 LOG(DEBUG) << "Failed to get tls for tracee " << tid
1779 << ", nth=" << i
1780 << ", errno=" << errno;
1781 break;
1782 }
1783 ctx->tls.push_back(desc);
1784 }
1785
1786 ctx->tls.push_back(desc);
1787#endif
1788
1789 return 0;
1790}
1791
1792/* parse the 28th field of /proc/#tid/stat or use PTRACE_PEEKUSER.
1793 * then, search current stack point in /proc/#pid/maps to locate stack segment
1794 */
1795int TraceProcess::dump_thread_stack(ThreadContext* ctx)
1796{
1797 VmSegment vm;
1798 uintptr_t stack_pointer;
1799
1800#if defined(__i386__)
1801 stack_pointer = ctx->regs.esp;
1802#elif defined(__x86_64__)
1803 stack_pointer = ctx->regs.rsp;
1804#elif defined(__mips64) || defined(__sw_64) || defined(__aarch64__)
1805 stack_pointer = ctx->regs.sp;
1806#else
1807#error need to implement new method to read stack pointer
1808#endif
1809
1810 vm.start = stack_pointer & (~(m_page_size-1));
1811 vm.end = vm.start + m_cfg->max_stack_size;
1812
1813 // NOTE: some child thread's stack is in an anonymous region not in [stack] region,
1814 // need search stack_pointer in maps file to detect the real stack range
1815 for (auto& m:m_mappings) {
1816 MappingInfo* mp = m.get();
1817 uintptr_t end = mp->start_addr + mp->size;
1818 if (vm.start >= mp->start_addr &&
1819 vm.start < end) {
1820 if (vm.end > end) vm.end = end;
1821 break;
1822 }
1823 }
1824
1825 ctx->stack = vm;
1826
1827 return 0;
1828}
1829
1830void TraceProcess::check_global_pointer_var_is_assigned(void)
1831{
1832 VmSegment seg;
1833 vector<VmSegment> new_rw_segs;
1834
1835 for (auto& var: m_global_vars) {
1836 if (!var.address ||
1837 !var.is_pointer ||
1838 var.is_pointer_loaded) {
1839 continue;
1840 }
1841
1842 if (sizeof(uintptr_t) == ReadProcess(&var.address_ptr,
1843 &m_ctx_stream,
1844 reinterpret_cast<const void*>(var.address),
1845 sizeof(uintptr_t)) && (var.address_ptr > 0)) {
1846 const uintptr_t mask = ~3;
1847 seg.start = var.address_ptr & mask;
1848 seg.end = (var.address_ptr + var.max_size + 4) & mask;
1849 assert(seg.start > 0xffff);
1850 new_rw_segs.push_back(seg);
1851 var.is_pointer_loaded = true;
1852 }
1853 else {
1854 var.address_ptr = 0;
1855 }
1856 }
1857
1858 if (!new_rw_segs.empty()) {
1859 merge_heap(new_rw_segs);
1860 }
1861}
1862
1863void TraceProcess::merge_heap(vector<VmSegment>& segs)
1864{
1865 VmSegment seg;
1866
1867 segs.insert(segs.end(), m_heaps.begin(), m_heaps.end());
1868 sort(segs.begin(), segs.end(),
1869 [](auto& a, auto& b){return a.start < b.start;});
1870
1871 seg.start = 0;
1872 seg.end = 0;
1873
1874 m_heaps.clear();
1875
1876 for (auto& heap: segs) {
1877 assert(heap.start > 0xffff);
1878
1879 if (0 == seg.start) {
1880 // first
1881 seg.start = heap.start;
1882 seg.end = heap.end;
1883 continue;
1884 }
1885
1886 if (seg.start <= heap.start && heap.start <= seg.end) {
1887 // overlap
1888 if (seg.end < heap.end) seg.end = heap.end;
1889 }
1890 else {
1891 // new range
1892 m_heaps.push_back(seg);
1893 seg.start = heap.start;
1894 seg.end = heap.end;
1895 }
1896 }
1897
1898 m_heaps.push_back(seg);
1899
1900 return;
1901}
1902
1903/* maps.bin layout:
1904 * time(struct timespec), [data-size, data], ...
1905 * */
1906int TraceProcess::dump_maps(int reason, int map_prot)
1907{
1908 if (m_counter.total_dumped >= m_cfg->max_dump_bytes) {
1909 return 0;
1910 }
1911
1912 if (m_auxv.empty()) {
1913 // NOTE: here we only dump auxv once!
1914 dump_auxv(m_pid);
1915 }
1916
1917 struct timespec current;
1918 clock_gettime(CLOCK_REALTIME, &current);
1919 m_maps_file.write(&current, sizeof(current));
1920 dump_proc_file(m_pid, "maps");
1921
1922 // dump module list, especially linux-gate.so,
1923 VmSegment seg;
1924 VmSegment heap = {0,0};
1925 VmSegment stack = {0,0};
1926 vector<VmSegment> new_rw_segs;
1927 m_mappings.clear();
1928 EnumerateMappings(m_mappings, m_auxv, m_pid, &stack, &heap);
1929 if (m_syscall_dumper) {
1930 m_syscall_dumper->stack_begin = stack.start;
1931 m_syscall_dumper->stack_end = stack.end;
1932 }
1933
1934 // NOE: [heap] created and adjust after every brk syscall;
1935 // And the first part of heap maybe used by libc,libpthread,...
1936 // so we dump around the center point.
1937 if (m_cfg->max_heap_size && heap.start > 0 && heap.end > heap.start) {
1938 uintptr_t mid = heap.start + (heap.end - heap.start)/2;
1939 seg.start = mid - m_cfg->max_heap_size/2;
1940 seg.end = mid + m_cfg->max_heap_size/2;
1941 if (seg.start < heap.start) seg.start = heap.start;
1942 if (seg.end > heap.end) seg.end = heap.end;
1943 new_rw_segs.push_back(seg);
1944 }
1945
1946 WriteMappings(m_maps_file, m_mappings, m_symbols, &m_ctx_stream);
1947
1948 // dump DSO Debug, should have PT_DYNAMIC, see readelf -d xxx-elf
1949 bool dso_found = WriteDSODebugStream(m_maps_file, m_auxv, &m_ctx_stream);
1950 LOG(DEBUG) << "dump_maps for tracee " << m_pid
1951 << ", dso_found=" << dso_found << ", reason=" << reason;
1952
1953 if (map_prot & PROT_EXEC) {
1954 // search unresolved symbol
1955 const char* break_function = NULL;
1956 if ((0 == m_breakpoint.address) && !m_cfg->break_function.empty()) {
1957 break_function = m_cfg->break_function.data();
1958 }
1959 for (auto& kv : m_symbols) {
1960 SymbolFile* helper = kv.second.get();
1961 if (!helper->m_valid || helper->m_searched) continue;
1962 helper->m_searched = true;
1963
1964 // search global variables address
1965 for (auto& var: m_global_vars) {
1966 if (!var.address &&
1967 helper->get_sym_address(var.sym_name.data(),
1968 &var.address, elf::stt::object)) {
1969 if (var.is_pointer) {
1970 if (sizeof(uintptr_t) != ReadProcess(&var.address_ptr,
1971 &m_ctx_stream,
1972 reinterpret_cast<const void*>(var.address),
1973 sizeof(uintptr_t))) {
1974 var.address_ptr = 0;
1975 }
1976 }
1977 LOG(DEBUG) << "Resolved variable:" << var.sym_name.data()
1978 << " in module " << kv.first.data()
1979 << ", at " << HEX(var.address);
1980 }
1981 }
1982
1983 // search break function address
1984 if (break_function && helper->get_sym_address(break_function,
1985 &m_breakpoint.address, elf::stt::func)) {
1986 break_at_function(m_pid);
1987 }
1988 }
1989 }
1990
1991 // check if there are some new global variables loaded
1992 const uintptr_t mask = ~3;
1993 for (auto& var: m_global_vars) {
1994 if (!var.address || var.is_loaded) {
1995 continue;
1996 }
1997
1998 // TODO: remove a global variable if its module has been unloaded!
1999 // NOTE: we dump at least one memory page!
2000 seg.start = var.address & mask;
2001 seg.end = (var.address + var.max_size + 4) & mask;
2002 assert(seg.start > 0xffff);
2003 new_rw_segs.push_back(seg);
2004 var.is_loaded = true;
2005
2006 if (var.is_pointer) {
2007 if (var.address_ptr > 0) {
2008 seg.start = var.address_ptr & mask;
2009 seg.end = (var.address_ptr + var.max_size + 4) & mask;
2010 assert(seg.start > 0xffff);
2011 new_rw_segs.push_back(seg);
2012 var.is_pointer_loaded = true;
2013 }
2014 else {
2015 continue; // the pointer is not assigned !
2016 }
2017 }
2018 }
2019
2020 // check if new writable segments
2021 if (map_prot & PROT_WRITE) {
2022 std::list<string>::iterator n = m_cfg->modules.begin();
2023 while (n != m_cfg->modules.end()) {
2024 bool found = false;
2025 for (auto& m: m_mappings) {
2026 MappingInfo* mp = m.get();
2027 if (!mp->name[0] || !mp->offset) {
2028 continue;
2029 }
2030
2031 if (fnmatch(n->data(), mp->name, FNM_PATHNAME)) {
2032 continue;
2033 }
2034
2035 auto it = g_guid_maps.find(mp->name);
2036 if (it == g_guid_maps.end()) {
2037 found = true;
2038 break;
2039 }
2040
2041 /*NOTE: libc is strange
20427ffff7813000-7ffff79a8000 r-xp 00000000 08:02 1447866 /usr/lib/x86_64-linux-gnu/libc-2.24.so
20437ffff79a8000-7ffff7ba7000 ---p 00195000 08:02 1447866 /usr/lib/x86_64-linux-gnu/libc-2.24.so
20447ffff7ba7000-7ffff7bad000 rw-p 00194000 08:02 1447866 /usr/lib/x86_64-linux-gnu/libc-2.24.so
2045*/
2046 if (abs(mp->offset - it->second.rw_offset) > m_page_size) {
2047 // the segment's offset should page size align
2048 continue; // try next segment
2049 }
2050 //NOTE: here we just need dump .data and .bss
2051 for (int i = 0; i < 4; i += 2) {
2052 seg.start = it->second.addrs[i] - mp->offset + mp->start_addr;
2053 seg.end = it->second.addrs[i+1]- mp->offset + mp->start_addr;
2054 if (!std::binary_search(m_heaps.begin(), m_heaps.end(), seg,
2055 [](const VmSegment& a, const VmSegment& b){
2056 return (a.start < b.start) || (a.end < b.end);})) {
2057 new_rw_segs.push_back(seg);
2058 }
2059 }
2060
2061 found = true;
2062 break; // OK
2063 }
2064 if (!found)
2065 ++n;
2066 else
2067 n = m_cfg->modules.erase(n);
2068 }
2069 }
2070
2071 if (!new_rw_segs.empty()) {
2072 merge_heap(new_rw_segs);
2073 }
2074 ++m_counter.maps_change;
2075
2076 return 0;
2077}
2078
2079/*status, stat, maps, smaps, cmdline, environ, ... etc*/
2080int TraceProcess::dump_proc_file(pid_t pid, const char* name)
2081{
2082 char path[NAME_MAX];
2083 snprintf(path, sizeof(path), "/proc/%d/%s", pid, name);
2084
2085 MemoryMappedFile mapped_file(path, 0);
2086 int size = mapped_file.size();
2087 assert(size > 0);
2088 m_maps_file.write(&size, sizeof(size));
2089 m_maps_file.write(mapped_file.data(), size);
2090 LOG(DEBUG) << "dump " << path << "," << size;
2091
2092 return 0;
2093}
2094
2095int TraceProcess::dump_auxv(pid_t pid)
2096{
2097 uint32_t size = 0;
2098 char path[NAME_MAX];
2099
2100#define AT_MAX AT_SYSINFO_EHDR
2101
2102 struct timespec current;
2103 clock_gettime(CLOCK_REALTIME, &current);
2104 m_maps_file.write(&current, sizeof(current));
2105
2106 m_auxv.resize(AT_MAX+1);
2107 for (auto& i : m_auxv) {
2108 i = 0;
2109 }
2110 snprintf(path, sizeof(path), "/proc/%d/auxv", pid);
2111 MemoryMappedFile mapped_file(path, 0);
2112 size = mapped_file.size();
2113 assert(size > 0);
2114 m_maps_file.write(&size, sizeof(size));
2115 m_maps_file.write(mapped_file.data(), size);
2116 LOG(DEBUG) << "dump auxv " << size;
2117
2118 dump_proc_file(pid, "cmdline");
2119 dump_proc_file(pid, "environ");
2120
2121 const elf_aux_entry* aux_entry = reinterpret_cast<const elf_aux_entry*>(mapped_file.data());
2122 for (uint32_t i=0; i < size/sizeof(elf_aux_entry); ++i) {
2123 if (aux_entry->a_type != AT_NULL) {
2124 if (aux_entry->a_type <= AT_MAX) {
2125 m_auxv[aux_entry->a_type] = aux_entry->a_un.a_val;
2126 }
2127 }
2128 ++aux_entry;
2129 }
2130
2131 // dump vdso
2132 const void* linux_gate_loc = reinterpret_cast<const void*>(
2133 m_auxv[AT_SYSINFO_EHDR]);
2134 if (linux_gate_loc) {
2135 // FIXME: how to get size of vdso
2136#if defined(__mips__)
2137 size = m_page_size;
2138#else
2139 size = m_page_size * 2;
2140#endif
2141 unique_ptr<char[]> buf = make_unique<char[]>(size);
2142 int len = ReadProcess(buf.get(), &m_ctx_stream, linux_gate_loc, size);
2143 size += sizeof(uintptr_t);
2144 m_maps_file.write(&size, sizeof(size));
2145 m_maps_file.write(buf.get(), size - sizeof(uintptr_t));
2146 m_maps_file.write(&linux_gate_loc, sizeof(linux_gate_loc));
2147 LOG(DEBUG) << "dump vdso " << linux_gate_loc << ", " << len;
2148 }
2149 else {
2150 int size = 0;
2151 m_maps_file.write(&size, sizeof(size));
2152 // NOTE: vdso is not implemented in SunWay!
2153#if !defined(__sw_64)
2154 LOG(WARNING) << "failed to dump vdso " << linux_gate_loc;
2155#endif
2156 }
2157
2158 return 0;
2159}
2160
2161static bool is_in_stack(VmSegment* seg, vector<ThreadContext>& ctx, int count)
2162{
2163 for (int i = 0; i < count; ++i) {
2164 if (seg->start >= ctx[i].stack.start &&
2165 seg->end <= ctx[i].stack.end) {
2166 return true;
2167 }
2168 }
2169
2170 return false;
2171}
2172
2173int TraceProcess::dump_clone(pid_t cur_tid, pid_t newtid) //only used in fast mode
2174{
2175 EventHead head;
2176 uint32_t heap_num = 0;
2177 long syscall_result = newtid;
2178 ThreadContext context, *ctx = &context;
2179#if defined(__aarch64__)
2180 static struct iovec io;
2181#endif
2182
2183 if (!m_can_dump) {
2184 return 0;
2185 }
2186
2187 clock_gettime(CLOCK_REALTIME, &head.cur_time);
2188 head.reason = SYS_clone;
2189 head.thread_num = 1;
2190 head.current_tid = cur_tid;
2191 head.extra_size = sizeof(syscall_result);
2192
2193 ctx->tid = cur_tid;
2194#if defined(__aarch64__)
2195 io.iov_base = &ctx->regs;
2196 io.iov_len = sizeof(ctx->regs);
2197 ptrace(PTRACE_GETREGSET, ctx->tid, NT_PRSTATUS, &io);
2198
2199 io.iov_base = &ctx->fpregs;
2200 io.iov_len = sizeof(ctx->fpregs);
2201 ptrace(PTRACE_GETREGSET, ctx->tid, NT_FPREGSET, &io);
2202#else
2203 ptrace(PTRACE_GETREGS, ctx->tid, nullptr, &ctx->regs);
2204 ptrace(PTRACE_GETFPREGS, ctx->tid, nullptr, &ctx->fpregs);
2205#endif
2206 dump_thread_context(ctx);
2207 dump_thread_stack(ctx);
2208 m_ctx_stream2.write(&head, &syscall_result);
2209 m_ctx_stream2.write(ctx);
2210 m_ctx_stream2.write(&heap_num, sizeof(heap_num));
2211
2212 return 0;
2213}
2214
2215int TraceProcess::dump_event(int reason, pid_t cur_tid, void* extra_data, int extra_data_size)
2216{
2217 int ret = 0;
2218 uint8_t flags = 0;
2219 uint8_t args = 0;
2220 bool is_sig = false;
2221 uint32_t heap_num = 0;
2222 EventHead head;
2223 long begin = 0;
2224 uint32_t syscall_no = 0;
2225
2226#if defined(__aarch64__)
2227 static struct iovec io;
2228#endif
2229
2230 if (!m_can_dump || m_counter.total_dumped >= m_cfg->max_dump_bytes) {
2231 return 0;
2232 }
2233
2234 check_global_pointer_var_is_assigned();
2235
2236 clock_gettime(CLOCK_REALTIME, &head.cur_time);
2237 head.reason = reason;
2238 head.thread_num = 0;
2239 head.current_tid = cur_tid;
2240 head.extra_size = extra_data_size;
2241
2242 begin = m_counter.total_dumped;
2243 if (reason < __NR_Linux) {
2244 m_counter.total_dumped += m_ctx_stream.write(&head, extra_data);
2245
2246 //TODO: how to dump errno in target tracee with pread?
2247 goto dump_end;
2248 }
2249 ++m_counter.event_dumped;
2250
2251 if (m_thread_ctx.size() < m_syscall_state.size()) {
2252 m_thread_ctx.resize(m_syscall_state.size());
2253 }
2254
2255 is_sig = (reason >= DUMP_REASON_signal && reason < DUMP_REASON_dbus);
2256 for (auto& kv : m_syscall_state) {
2257 if (!is_sig) {
2258 if (m_cfg->current_thread_only) {
2259 if (kv.first != cur_tid)
2260 continue;
2261 }
2262 // FIXME: skip sleeping thread which has dumped is reasonable?
2263 // The Stack Data is redundancy if not do that.
2264 else if (kv.second.state & SYSCALL_ENTER_MASK) {
2265 continue;
2266 }
2267 }
2268
2269 ThreadContext* ctx = &m_thread_ctx[head.thread_num];
2270 ctx->tid = kv.first;
2271 ctx->interrupted = false;
2272
2273#if defined(__aarch64__)
2274 io.iov_base = &ctx->regs;
2275 io.iov_len = sizeof(ctx->regs);
2276 ret = ptrace(PTRACE_GETREGSET, ctx->tid, NT_PRSTATUS, &io);
2277#else
2278 ret = ptrace(PTRACE_GETREGS, ctx->tid, nullptr, &ctx->regs);
2279#endif
2280 if (ret < 0) {
2281 if (errno == ESRCH) {
2282 // current thread is not at trace-STOP (running or sleeping)
2283 /*
2284 * If the tracee is running or sleeping in kernel space and PTRACE_SYSCALL
2285 * is in effect, the system call is interrupted and syscall-exit-stop is reported.
2286 * (The interrupted system call is restarted when the tracee is restarted.)
2287 *
2288 * 根据以上 PTRACE_INTERRUPT 的说明, 此时如果使用 PTRACE_INTERRUPT 强行中断在
2289 * running 或 sleeping 的线程:
2290 * 1) 处于 sleeping 的线程会返回一个失败的系统调用,干扰分析,不应该记录这种线程.
2291 * 2) 处于 running 的线程的执行过程不是立即被打断,而是有一个延时, 在其进入系统调用
2292 * 或时间片结束的时候才会停下来,执行性能严重降低,会干扰多线程程序的重现条件.
2293 * 如果问题出在 running 的线程上,运行一段时间后一般会触发一个崩溃事件或死锁事件;
2294 * 从解决问题角度和记录性能上考虑,此时强行中断 running 的线程其实意义不大,除非要
2295 * 防止 dump 全局变量和堆内存时候的数据访问竞争.
2296 *
2297 * 另外一个方法是利用 cpu affinity set API 限定目标进程在一个单核上运行.
2298 */
2299 continue; //国产平台 PTRACE_INTERRUPT 性能低.
2300 }
2301 else {
2302 ++m_counter.ptrace_error;
2303 LOG(DEBUG) << "PTRACE_GETREGS failed for tracee " << ctx->tid
2304 << ", ret=" << ret << ", errno=" << errno;
2305 continue;
2306 }
2307 }
2308
2309 ++head.thread_num;
2310#if defined(__aarch64__)
2311 io.iov_base = &ctx->fpregs;
2312 io.iov_len = sizeof(ctx->fpregs);
2313 ptrace(PTRACE_GETREGSET, ctx->tid, NT_FPREGSET, &io);
2314#else
2315 ptrace(PTRACE_GETFPREGS, ctx->tid, nullptr, &ctx->fpregs);
2316#endif
2317 dump_thread_context(ctx);
2318 dump_thread_stack(ctx);
2319 }
2320 assert(head.thread_num > 0);
2321
2322 m_counter.total_dumped += m_ctx_stream.write(&head, extra_data);
2323
2324 for (int i = 0; i < head.thread_num; ++i) {
2325 m_counter.total_dumped += m_ctx_stream.write(&m_thread_ctx[i]);
2326 }
2327
2328 if (reason >= DUMP_REASON_signal) {
2329 goto dump_heap;
2330 }
2331
2332 // Auto dump if current syscall has pass memory pointer?
2333 syscall_no = reason - __NR_Linux;
2334 flags = syscall_param_flags[2*syscall_no];
2335 if ((flags != 0) && (m_cfg->max_param_size > 0)) {
2336 VmSegment seg;
2337 const uintptr_t mask = ~3;
2338 uintptr_t* syscall_args = (uintptr_t*)extra_data;
2339
2340 // parameter is void*, size indicate in next parameter
2341 bool size_indicate_by_next = flags & 0x80;
2342
2343 args = syscall_param_flags[2*syscall_no + 1];
2344 assert(m_syscall_memblks.empty());
2345 for (unsigned char i = 0; i < args; ++i) {
2346 if (flags & 1) {
2347 uintptr_t addr = syscall_args[i];
2348 if ((addr > 0) ) {
2349 seg.start = addr & mask;
2350 if (size_indicate_by_next) {
2351 if (syscall_args[i+1] > 0) {
2352 seg.end = (addr + syscall_args[i+1] + 3) & mask;
2353 if (!is_in_stack(&seg, m_thread_ctx, head.thread_num))
2354 m_syscall_memblks.push_back(seg);
2355 }
2356 break;
2357 }
2358 seg.end = (addr + m_cfg->max_param_size + 3) & mask;
2359 if (!is_in_stack(&seg, m_thread_ctx, head.thread_num))
2360 m_syscall_memblks.push_back(seg);
2361 }
2362 }
2363 flags >>= 1;
2364 }
2365
2366 if (!m_syscall_memblks.empty()) {
2367 // memory pointer parameter only dump once,
2368 // so store heaps first, then restore.
2369 m_heaps_temp = m_heaps;
2370 merge_heap(m_syscall_memblks);
2371 }
2372 }
2373
2374dump_heap:
2375 heap_num = m_heaps.size();
2376 // heap_count(4 byte)[, sizeof(long)+sizeof(int)+data, ...]
2377 m_counter.total_dumped += m_ctx_stream.write(&heap_num, sizeof(heap_num));
2378 for (auto& heap: m_heaps) {
2379 m_counter.total_dumped += m_ctx_stream.write(&heap);
2380 }
2381
2382 if (!m_syscall_memblks.empty()) {
2383 m_syscall_memblks.clear();
2384 m_heaps_temp.swap(m_heaps);
2385 }
2386
2387 // resume all interrupted threads
2388 for (int i = 0; i < head.thread_num; ++i) {
2389 if (m_thread_ctx[i].interrupted) {
2390 my_ptrace("dump_event", PTRACE_SYSCALL, m_thread_ctx[i].tid, 0, 0);
2391 }
2392 }
2393
2394dump_end:
2395 LOG(DEBUG) << "dump_event reason:" << reason
2396 << " tid " << cur_tid
2397 << " threads " << head.thread_num << "/" << m_syscall_state.size()
2398 << " at [" << begin
2399 << ", " << m_counter.total_dumped << "]";
2400
2401 return 0;
2402}
2403
2404int TraceProcess::flush_shared_buffers(void)
2405{
2406 char* buf = nullptr;
2407 int size = get_shared_buffers(&buf);
2408 if (size > 0 ) {
2409 LOG(DEBUG) << "flush_shared_buffers offset=" << m_counter.total_dumped
2410 << ", size=" << size << " for tracee " << m_pid;
2411 m_syscall_dumper->current = 0;
2412
2413 ++m_counter.flush_buffer;
2414 m_counter.hook_dumped += size;
2415 if (m_counter.total_dumped < m_cfg->max_dump_bytes) {
2416 m_counter.total_dumped += size;
2417 m_ctx_stream.write(buf, size);
2418 }
2419 }
2420
2421 return size;
2422}
2423
2424void TraceProcess::stop_record(void)
2425{
2426 m_can_dump = false;
2427 for (auto i = m_childs.begin(); i != m_childs.end(); ++i) {
2428 i->get()->stop_record();
2429 }
2430
2431 m_end_time = time(NULL);
2432 LOG(INFO) << "close process " << this
2433 << " duration=" << (m_end_time - m_begin_time)
2434 <<"s for tracee " << m_pid;
2435
2436 flush_shared_buffers();
2437 m_ctx_file.close();
2438
2439 if (m_cfg->mode == FAST) {
2440 m_ctx_file2.close();
2441 // NOTE: the handle_connect thread maybe in dump_maps,
2442 // so simple sleep 1s to wait dump_maps finish.
2443 sleep(1);
2444 }
2445
2446 m_maps_file.close();
2447
2448 dump_debugger_count();
2449}
2450
2451static void* handle_connect(void* param)
2452{
2453 reinterpret_cast<TraceProcess*>(param)->handle_connect();
2454 return nullptr;
2455}
2456
2457int TraceProcess::handle_connect(void)
2458{
2459 char buf[256];
2460
2461 while (m_maps_file.valid()) {
2462 int fd = accept(m_server_socket, NULL, NULL);
2463 if (fd < 0) {
2464 LOG(FATAL) << "failed to accept:" << strerror(errno)
2465 << ", for tracee " << m_pid;
2466 break;
2467 }
2468
2469 // blocking recv
2470 int nbytes = read(fd, buf, sizeof(int) + sizeof(uintptr_t));
2471 if (-1 == nbytes) {
2472 LOG(FATAL) << "failed to read:" << strerror(errno)
2473 << ", for tracee " << m_pid;
2474 break;
2475 }
2476
2477 // TODO: add sync protect to avoid conflict to the waitpid thread
2478 LOG(DEBUG) << "recvmsg:" << *(int*)&buf[0]
2479 << "," << HEX(*(uintptr_t*)&buf[4])
2480 << ", for tracee " << m_pid;
2481
2482 switch (*(int*)&buf[0]) {
2483 case SYS_init_buffers:
2484 init_shared_buffers(*(int*)&buf[4]);
2485 break;
2486
2487 case SYS_flush_buffers:
2488 // NOTE: SYS_flush_buffers maybe receive before SYS_init_buffers
2489 // for child create by fork before execve,e.g. firefox-esr 52.0
2490 flush_shared_buffers();
2491 break;
2492
2493 case SYS_share_name:
2494 *(int*)&buf[0] = m_nonce;
2495 break;
2496
2497 case SYS_enable_dump: {
2498 // Here we can dump dso link map now!
2499 m_can_dump = true;
2500 dump_maps(SYS_enable_dump, PROT_EXEC|PROT_WRITE);
2501 if (!m_cfg->break_function.empty()) {
2502 //wait the specified function resolved and called!
2503 m_can_dump = false;
2504 }
2505 }
2506 break;
2507
2508 case SYS_update_maps:
2509 dump_maps(SYS_update_maps, *(int*)&buf[4]);
2510 break;
2511
2512 default:
2513 break;
2514 }
2515
2516 nbytes = write(fd, buf, sizeof(int));
2517 if (-1 == nbytes) {
2518 LOG(FATAL) << "failed to write:" << strerror(errno)
2519 << ", for tracee " << m_pid;
2520 break;
2521 }
2522 close(fd);
2523 }
2524
2525 close(m_server_socket);
2526 m_server_socket = -1;
2527
2528 snprintf(buf, sizeof(buf) - 1, SHARED_SOCKET_NAME, m_pid);
2529 unlink(buf);
2530
2531 LOG(INFO) << "disconnect socket:" << buf << ", for tracee " << m_pid;
2532
2533 return 0;
2534}
2535
2536int TraceProcess::setup_listener(void)
2537{
2538 struct sockaddr_un addr;
2539 int sc, rc = -1;
2540
2541 int sock_fd = socket(AF_UNIX, SOCK_STREAM, 0);
2542 if (sock_fd == -1) {
2543 LOG(ERROR) << "failed to create socket:" << strerror(errno)
2544 << ", for tracee " << m_pid;
2545 goto done;
2546 }
2547
2548 memset(&addr, 0, sizeof(addr));
2549 addr.sun_family = AF_UNIX;
2550 snprintf(addr.sun_path, sizeof(addr.sun_path) - 1, SHARED_SOCKET_NAME, m_pid);
2551
2552 sc = bind(sock_fd, (struct sockaddr*)&addr, sizeof(addr));
2553 if (sc == -1) {
2554 LOG(ERROR) << "failed to bind:" << strerror(errno)
2555 << ", for tracee " << m_pid;
2556 goto done;
2557 }
2558
2559 sc = listen(sock_fd, 5);
2560 if (sc == -1) {
2561 LOG(ERROR) << "failed to listen:" << strerror(errno)
2562 << ", for tracee " << m_pid;
2563 goto done;
2564 }
2565 LOG(INFO) << "listen " << addr.sun_path << ", for tracee " << m_pid;
2566
2567 rc = 0;
2568
2569 m_server_socket = sock_fd;
2570 pthread_t thread;
2571 pthread_create(&thread, nullptr, ::handle_connect, this);
2572
2573done:
2574 return rc;
2575}
2576
2577int TraceProcess::setup_socket(int* pfd_number)
2578{
2579 int sockets[2];
2580 long ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, sockets);
2581 if (ret < 0) {
2582 LOG(FATAL) << "socketpair failed";
2583 if (pfd_number) *pfd_number = 0;
2584
2585 return -1;
2586 }
2587 m_recv_socket = sockets[0];
2588
2589 // Find a usable FD number to dup to in the child. RESERVED_SOCKET_FD
2590 // might already be used by an outer rr.
2591 int fd_number = RESERVED_SOCKET_FD;
2592 // We assume no other thread is mucking with this part of the fd address space.
2593 while (true) {
2594 ret = fcntl(fd_number, F_GETFD);
2595 if (ret < 0) {
2596 if (errno != EBADF) {
2597 LOG(FATAL) << "Error checking fd";
2598 }
2599 break;
2600 }
2601 ++fd_number;
2602 }
2603 if (pfd_number) *pfd_number = fd_number;
2604
2605 LOG(INFO) << "socketpair " << sockets[0]
2606 << ", " << sockets[1] << ", " << fd_number;
2607
2608 return sockets[1];
2609}
2610
2611static void mark_crash(const char* dir, int pid, int sig)
2612{
2613 char path[256];
2614 int len = snprintf(path, sizeof(path), "%s/crash.txt", dir);
2615 FILE* pf = fopen(path, "wb");
2616 if (pf) {
2617 fprintf(pf, "%d,%d\n", pid, sig);
2618 fclose(pf);
2619 }
2620 else {
2621 path[len-1] = 0;
2622 LOG(ERROR) << "Failed to mark crash:" << len << "," << path;
2623 }
2624}
2625
2626bool TraceProcess::process_signal(pid_t tid)
2627{
2628 bool exit_record = true;
2629
2630 switch (m_sig) {
2631 case SIGHUP:
2632 case SIGINT:
2633 case SIGKILL:
2634 case SIGPIPE:
2635 case SIGALRM:
2636 case SIGTERM:
2637 case SIGPOLL:
2638 case SIGPROF:
2639 {
2640 // term signal
2641 // increase max dump bytes first, or dump_maps or dump_event will ignore
2642 m_cfg->max_dump_bytes += 4*1024*1024*1024LL;
2643 if (!m_can_dump) {
2644 m_can_dump = true;
2645 link_exec_file(m_cfg->dump_dir, m_pid);
2646 dump_maps(-m_sig, PROT_EXEC|PROT_WRITE);
2647 }
2648 m_cfg->current_thread_only = false;
2649 dump_event(DUMP_REASON_signal+m_sig, tid, nullptr, 0);
2650 }
2651 break;
2652
2653 case SIGQUIT:
2654 case SIGILL:
2655 case SIGABRT:
2656 case SIGFPE:
2657 case SIGSEGV:
2658 case SIGBUS:
2659 {
2660 // core signal
2661 // increase max dump bytes first, or dump_maps or dump_event will ignore
2662 m_cfg->max_dump_bytes += 4*1024*1024*1024LL;
2663 if (!m_can_dump) {
2664 m_can_dump = true;
2665 link_exec_file(m_cfg->dump_dir, m_pid);
2666 dump_maps(-m_sig, PROT_EXEC|PROT_WRITE);
2667 }
2668 m_cfg->current_thread_only = false;
2669 dump_event(DUMP_REASON_signal+m_sig, tid, nullptr, 0);
2670 }
2671 break;
2672
2673 case SIGCHLD:
2674 case SIGCONT:
2675 case SIGSTOP:
2676 case SIGTSTP:
2677 case SIGTTIN:
2678 case SIGTTOU:
2679 case SIGSYS:
2680 case SIGTRAP:
2681 if (std::find(m_cfg->sigs.begin(), m_cfg->sigs.end(), m_sig) !=
2682 m_cfg->sigs.end()) {
2683 if (!m_can_dump) {
2684 m_can_dump = true;
2685 link_exec_file(m_cfg->dump_dir, m_pid);
2686 dump_maps(-m_sig, PROT_EXEC|PROT_WRITE);
2687 }
2688 dump_event(DUMP_REASON_signal+m_sig, tid, nullptr, 0);
2689 }
2690 else {
2691 m_sig = 0; // ignore
2692 exit_record = false;
2693 }
2694 break;
2695
2696 case SIGVTALRM: //firefox-esr 52.0 got SIGVTALRM
2697 exit_record = false;
2698 break;
2699
2700 default:
2701 // other signal, forward it to the kernel!
2702 exit_record = false;
2703 break;
2704 }
2705
2706 if (exit_record) {
2707#ifdef _DEBUG
2708 gdb_bt(tid);
2709#endif
2710
2711 mark_crash(m_cfg->dump_dir.data(), m_pid, m_sig);
2712 }
2713
2714 return exit_record;
2715}
2716
2717bool TraceProcess::process_status(int raw_status, pid_t tid)
2718{
2719 USER_REGS regs;
2720 WaitStatus status = WaitStatus(raw_status);
2721
2722 switch (status.type()) {
2723 case WaitStatus::EXIT:
2724 {
2725 remove_thread(tid);
2726 LOG(INFO) << "(EXIT-" << status.exit_code() << ") for tracee " << tid;
2727
2728 goto has_restart;
2729 }
2730 break;
2731 case WaitStatus::FATAL_SIGNAL:
2732 {
2733 LOG(INFO) << "(FATAL-" << signal_name(status.fatal_sig())
2734 << ") for tracee " << tid;
2735 siginfo_t pending_siginfo;
2736 ptrace(PTRACE_GETSIGINFO, tid, 0, &pending_siginfo);
2737 m_sig = pending_siginfo.si_signo;
2738
2739 dump_event(DUMP_REASON_signal+m_sig, tid, nullptr, 0);
2740 goto fatal_error;
2741 }
2742 break;
2743 case WaitStatus::SIGNAL_STOP:
2744 {
2745 siginfo_t pending_siginfo;
2746 LOG(INFO) << "(SIGNAL-STOP-" << signal_name(status.stop_sig())
2747 << ") for tracee " << tid;
2748 if (status.stop_sig() == SIGTRAP && remove_break_function(tid)) {
2749 goto has_restart;
2750 }
2751
2752 ptrace(PTRACE_GETSIGINFO, tid, 0, &pending_siginfo);
2753 m_sig = pending_siginfo.si_signo;
2754 if (process_signal(tid)) {
2755 goto fatal_error;
2756 }
2757 }
2758 break;
2759 case WaitStatus::GROUP_STOP:
2760 {
2761 int stop = status.group_stop();
2762 LOG(DEBUG) << "(GROUP-STOP-" << signal_name(stop) << ") for tracee " << tid;
2763
2764 /* If the PTRACE_O_TRACEEXEC option is not in effect, all successful
2765 calls to execve(2) by the traced process will cause it to be sent a
2766 SIGTRAP signal, giving the parent a chance to gain control before the
2767 new program begins execution.
2768 */
2769 if (stop == SIGTRAP) {
2770 if (!my_ptrace("skip_SIGTRAP", PTRACE_SYSCALL, tid, 0, 0))
2771 goto fatal_error;
2772 goto has_restart;
2773 }
2774
2775 /*
2776 * This ends ptrace-stop, but does *not* end group-stop.
2777 * This makes stopping signals work properly on straced process
2778 * (that is, process really stops. It used to continue to run).
2779 */
2780 if (!my_ptrace("skip_GROUPSTOP", PTRACE_LISTEN, tid, 0, 0))
2781 goto fatal_error;
2782 goto has_restart;
2783 }
2784 break;
2785 case WaitStatus::SYSCALL_STOP:
2786 {
2787 /**
2788 * Syscall events track syscalls through entry into the kernel,
2789 * processing in the kernel, and exit from the kernel.
2790 *
2791 * This also models interrupted syscalls. During recording, only
2792 * descheduled buffered syscalls /push/ syscall interruptions; all
2793 * others are detected at exit time and transformed into syscall
2794 * interruptions from the original, normal syscalls.
2795 *
2796 * Normal system calls (interrupted or not) record two events: ENTERING_SYSCALL
2797 * and EXITING_SYSCALL. If the process exits before the syscall exit (because
2798 * this is an exit/exit_group syscall or the process gets SIGKILL), there's no
2799 * syscall exit event.
2800 *
2801 * When PTRACE_SYSCALL is used, there will be three events:
2802 * ENTERING_SYSCALL_PTRACE to run the process until it gets into the kernel,
2803 * then ENTERING_SYSCALL and EXITING_SYSCALL. We need three events to handle
2804 * PTRACE_SYSCALL with clone/fork/vfork and execve. The tracee must run to
2805 * the ENTERING_SYSCALL_PTRACE state, allow a context switch so the ptracer
2806 * can modify tracee registers, then perform ENTERING_SYSCALL (which actually
2807 * creates the new task or does the exec), allow a context switch so the
2808 * ptracer can modify the new task or post-exec state in a PTRACE_EVENT_EXEC/
2809 * CLONE/FORK/VFORK, then perform EXITING_SYSCALL to get into the correct
2810 * post-syscall state.
2811 *
2812 * When PTRACE_SYSEMU is used, there will only be one event: an
2813 * ENTERING_SYSCALL_PTRACE.
2814 */
2815 /*TODO: HOW to handle interrupted syscall was restart?*/
2816#if defined(__aarch64__)
2817 static struct iovec io = {
2818 .iov_base = &regs,
2819 .iov_len = sizeof(regs)
2820 };
2821 int ok = my_ptrace("syscall_stop", PTRACE_GETREGSET, tid, (void*)NT_PRSTATUS, &io);
2822#else
2823 int ok = my_ptrace("syscall_stop", PTRACE_GETREGS, tid, nullptr, &regs);
2824#endif
2825 if (!ok) {
2826 goto do_restart;
2827 }
2828
2829 auto state = get_thread_state(tid);
2830#if defined(__mips64) || defined(__mips__) || defined(__sw_64) || defined(__aarch64__)
2831 if (state->second.state >= 0) {
2832 int syscall_no = get_syscall_no(&regs);
2833 get_syscall_args(&regs, state->second.args);
2834 if (process_syscall_enter(tid, syscall_no, state->second.args)) {
2835 state->second.state = syscall_no | SYSCALL_ENTER_MASK;
2836 }
2837 else {
2838 state->second.state = 0; //skip syscall exit-stop
2839 }
2840 }
2841 else {
2842 // ON MIPS: regs.v0 always return 0 when syscall exit-stop
2843 // FIXME: regs.a3=1 indicate failed of current syscall
2844 process_syscall_exit(tid, state->second.state & SYSCALL_NO_MASK,
2845 state->second.args, get_syscall_result(&regs));
2846 state->second.state = 0;
2847 }
2848#else
2849 int syscall_no = get_syscall_no(&regs);
2850 if (syscall_no < 0) {
2851 LOG(WARNING) << "syscall-number is negative for tracee " << tid;
2852 // negative syscall are treated as skip this call
2853 // for some syscall: SYS_rt_sigreturn, SYS_exit_group never return.
2854 state->second.state = 0; //skip syscall exit-stop
2855 goto do_restart;
2856 }
2857
2858 if ((state->second.state < 0) &&
2859 (syscall_no != (state->second.state & SYSCALL_NO_MASK))) {
2860 long previous = (state->second.state & SYSCALL_NO_MASK);
2861 LOG(WARNING) << "syscall-exit missing for tracee " << tid
2862 << ", current=" << syscall_name(syscall_no)
2863 << ", previous=" << syscall_name(previous);
2864 state->second.state = 0;
2865 ++m_counter.syscall_exit_miss;
2866
2867 // fake a EINTR result
2868 if (is_number_in_set(previous, &g_trace_set[0])) {
2869 long syscall_result = -EINTR;
2870 dump_event(DUMP_REASON_syscall_exit+previous,
2871 tid, &syscall_result, sizeof(syscall_result));
2872 }
2873 }
2874
2875 if (state->second.state >= 0) {
2876 get_syscall_args(&regs, state->second.args);
2877 if (process_syscall_enter(tid, syscall_no, state->second.args)) {
2878 state->second.state = syscall_no | SYSCALL_ENTER_MASK;
2879 }
2880 else {
2881 state->second.state = 0; //skip syscall exit-stop
2882 }
2883 }
2884 else {
2885 process_syscall_exit(tid, syscall_no, state->second.args,
2886 get_syscall_result(&regs));
2887 state->second.state = 0;
2888 }
2889#endif
2890 }
2891 break;
2892 case WaitStatus::PTRACE_EVENT:
2893 {
2894 int event = status.ptrace_event();
2895 LOG(DEBUG) << "(" << ptrace_event_name(event) << ") for tracee " << tid;
2896
2897 switch (event) {
2898 case PTRACE_EVENT_EXEC:
2899 {
2900 /*
2901 * Under Linux, execve changes pid to thread leader's pid,
2902 * and we see this changed pid on EVENT_EXEC and later,
2903 * execve sysexit. Leader "disappears" without exit
2904 * notification. Let user know that, drop leader's tcb,
2905 * and fix up pid in execve thread's tcb.
2906 * Effectively, execve thread's tcb replaces leader's tcb.
2907 *
2908 * BTW, leader is 'stuck undead' (doesn't report WIFEXITED
2909 * on exit syscall) in multithreaded programs exactly
2910 * in order to handle this case.
2911 *
2912 * PTRACE_GETEVENTMSG returns old pid starting from Linux 3.0.
2913 * On 2.6 and earlier, it can return garbage.
2914 */
2915 int old_tid = 0;
2916 if (!my_ptrace("PTRACE_EVENT_EXEC",
2917 PTRACE_GETEVENTMSG, tid, NULL, &old_tid)) {
2918 ++m_counter.ptrace_error;
2919 }
2920 else if (old_tid <=0 || old_tid == tid) {
2921 }
2922 else {
2923 remove_thread(old_tid);
2924 add_thread(tid);
2925 LOG(INFO) << "pid has changed from " << old_tid << " to " << tid;
2926 }
2927
2928 // maybe can receive PTRACE_EVENT_EXEC but no receive exec syscall exit
2929 m_exec_stop = true;
2930 post_exec_syscall(tid);
2931 }
2932 break;
2933 case PTRACE_EVENT_CLONE:
2934 case PTRACE_EVENT_FORK:
2935 case PTRACE_EVENT_VFORK:
2936 {
2937 pid_t new_tid = 0;
2938 if (my_ptrace("PTRACE_EVENT_FORK",
2939 PTRACE_GETEVENTMSG, tid, NULL, &new_tid)) {
2940 if (!add_thread(new_tid)) {
2941 shared_ptr<TraceProcess> child = make_shared<TraceProcess>(this);
2942 m_parent->add_process(child, new_tid);
2943 LOG(INFO) << new_tid << " newborn process on PTRACE_EVENT>>>";
2944 }
2945 else {
2946 LOG(INFO) << new_tid << " newborn thread on PTRACE_EVENT>>>";
2947 }
2948 if (m_cfg->mode == FAST) {
2949 if (is_number_in_set(SYS_clone, &g_trace_set[0])) {
2950 dump_clone(tid, new_tid);
2951 }
2952 }
2953 }
2954 else {
2955 ++m_counter.ptrace_error;
2956 }
2957 }
2958 break;
2959 case PTRACE_EVENT_EXIT:
2960 {
2961 /*The tracee is stopped early during process exit, when
2962 registers are still available.
2963 TODO: does it need to call dump_event, dump_maps ?
2964 */
2965 remove_thread(tid);
2966 LOG(INFO) << tid << " thread exited <<<";
2967 /*TODO: how to avoid child process enter zombie ?*/
2968 m_cont_type = CONTINUE;
2969 }
2970 break;
2971 case PTRACE_EVENT_SECCOMP:
2972 {
2973 ++m_counter.seccomp_stop;
2974
2975 /*After a PTRACE_EVENT_SECCOMP stop, seccomp will be rerun, with a
2976 SECCOMP_RET_TRACE rule now functioning the same as a SECCOMP_RET_ALLOW.
2977 Specifically, this means that if registers are not modified during
2978 the PTRACE_EVENT_SECCOMP stop, the system call will then be allowed.
2979 */
2980 }
2981 break;
2982 default:
2983 break;
2984 }
2985 }
2986 break;
2987 default:
2988 LOG(INFO)<< "Unknown status (" << status.type() << ") for tracee " << tid;
2989 break;
2990 }
2991
2992do_restart:
2993 if (!my_ptrace("process_status",
2994 static_cast<__ptrace_request>(m_cont_type),
2995 tid, nullptr, reinterpret_cast<void*>(m_sig))) {
2996 goto fatal_error;
2997 }
2998
2999 if (m_cont_type == CONTINUE && m_cfg->mode != FAST) {
3000 //NOTE: CONTINUE only run once, or all later syscall stop will missing!
3001 m_cont_type = CONTINUE_SYSCALL;
3002 }
3003
3004has_restart:
3005 m_sig = 0;
3006 return true;
3007
3008fatal_error:
3009 ++m_counter.ptrace_error;
3010 remove_thread(tid);
3011 return false;
3012}
3013
3014bool TraceProcess::start_record(pid_t pid2)
3015{
3016 MDRawSystemInfo info;
3017 int size = sizeof(info);
3018
3019 m_pid = pid2;
3020 m_nonce = g_nonce++;
3021 LOG(INFO) << "open process " << this << ", for tracee " << m_pid;
3022
3023 get_system_info(&info);
3024 if (m_cfg->mode == FAST) {
3025 memcpy(info.mode, "fast", 4);
3026 }
3027 else if (m_cfg->mode == NORMAL) {
3028 memcpy(info.mode, "hard", 4);
3029 }
3030
3031 string filename;
3032 filename = m_cfg->dump_dir + CONTEXT_FILE_NAME + std::to_string(m_pid);
3033 m_ctx_file.open(filename.data(), m_cfg->compress_level);
3034 if (!m_ctx_stream.init(m_pid, m_cfg->shared_buffer_size, &m_ctx_file)) {
3035 LOG(FATAL) << "Failed to initialized thread stream for " << m_pid;
3036 }
3037
3038 if (m_cfg->mode == FAST) {
3039 filename += ".clone";
3040 m_ctx_file2.open(filename.data(), m_cfg->compress_level);
3041 if (!m_ctx_stream2.init(m_pid, 1024*1024, &m_ctx_file2)) {
3042 LOG(FATAL) << "Failed to initialized thread stream for " << m_pid;
3043 }
3044 }
3045
3046 // dump system info
3047 m_counter.total_dumped = m_ctx_file.write(&size, sizeof(size));
3048 m_counter.total_dumped += m_ctx_file.write(&info, sizeof(info));
3049
3050 filename = m_cfg->dump_dir + MAP_FILE_NAME + std::to_string(m_pid);
3051 m_maps_file.open(filename.data(), m_cfg->compress_level);
3052
3053 // dump system info
3054 m_maps_file.write(&size, sizeof(size));
3055 m_maps_file.write(&info, sizeof(info));
3056
3057 m_sig = 0;
3058 add_thread(m_pid);
3059
3060 m_cont_type = (FAST == m_cfg->mode) ? CONTINUE : CONTINUE_SYSCALL;
3061
3062 if (m_cfg->mode == FAST) {
3063 setup_listener();
3064 }
3065 m_begin_time = time(NULL);
3066
3067 return true;
3068}
3069
3070bool TraceProcess::get_sym_address(const char* object_name,
3071 const char *name, unsigned long *addr)
3072{
3073 string libname;
3074 if (!strcmp(object_name, "libpthread.so.0")) {
3075 libname = PTHREAD_LIB;
3076 }
3077 else {
3078 libname = "/lib/x86_64-linux-gnu/";
3079 libname += object_name;
3080 // TODO: should parse link(libname)
3081 }
3082 auto it = m_symbols.find(libname);
3083 if (it != m_symbols.end()) {
3084 if (it->second.get()->get_sym_address(name,
3085 addr, elf::stt::object)) {
3086 return true;
3087 }
3088
3089 return it->second.get()->get_sym_address(name,
3090 addr, elf::stt::func);
3091 }
3092 return false;
3093}
3094
3095void TraceProcess::break_at_function(pid_t tid)
3096{
3097 uintptr_t address = m_breakpoint.address;
3098 const char* func = m_cfg->break_function.data();
3099 if (address > 0) {
3100 if (gdb_break(tid, address, &m_breakpoint.value) >= 0) {
3101 m_breakpoint.address = address;
3102 LOG(INFO) << "Set break at " << func
3103 << ":" << HEX(address) << " for tracee " << tid;
3104 }
3105 }
3106 else {
3107 LOG(WARNING) << "Not found function: " << func
3108 << " for tracee " << tid
3109 << ", will try search at next time !" << tid;
3110 }
3111}
3112
3113bool TraceProcess::remove_break_function(pid_t tid)
3114{
3115 USER_REGS regs;
3116
3117#if defined(__aarch64__)
3118 static struct iovec io = {
3119 .iov_base = &regs,
3120 .iov_len = sizeof(regs)
3121 };
3122 ptrace(PTRACE_GETREGSET, tid, NT_PRSTATUS, &io);
3123#else
3124 ptrace(PTRACE_GETREGS, tid, nullptr, &regs);
3125#endif
3126
3127#if defined(__x86_64__)
3128 if (regs.rip == m_breakpoint.address + 1) {
3129 --regs.rip;
3130#elif defined(__mips64) || defined(__aarch64__) || defined(__sw_64)
3131 // ARM: pc + 4 or pc no changed
3132 // MIPS: pc no changed
3133 // SUNWAY: pc + 4
3134 if (regs.pc == m_breakpoint.address || regs.pc == m_breakpoint.address + 4) {
3135 if (regs.pc == m_breakpoint.address + 4) regs.pc -= 4;
3136#else
3137 assert(0);
3138 if (0) {
3139#endif
3140 LOG(INFO) << "hit break at "
3141#if defined(__x86_64__)
3142 << HEX(regs.rip)
3143#elif defined(__mips64) || defined(__sw_64) || defined(__aarch64__)
3144 << HEX(regs.pc)
3145#else
3146 << HEX(regs.ip)
3147#endif
3148 << " for tracee " << tid;
3149
3150 m_can_dump = true;
3151 dump_maps(-SIGTRAP, PROT_EXEC|PROT_WRITE);
3152 gdb_delete(tid, m_breakpoint.address, m_breakpoint.value);
3153 m_breakpoint.address = 0;
3154
3155 // reset pc to the begin of the target function.
3156#if defined(__aarch64__)
3157 ptrace(PTRACE_SETREGSET, tid, NT_PRSTATUS, &io);
3158#else
3159 ptrace(PTRACE_SETREGS, tid, nullptr, &regs);
3160#endif
3161 ptrace(PTRACE_SYSCALL, tid, 0, 0);
3162 return true;
3163 }
3164
3165 return false;
3166}
3167