1/*
2 * fault.c
3 *
4 * Copyright (C) 2008-2014 Aerospike, Inc.
5 *
6 * Portions may be licensed to Aerospike, Inc. under one or more contributor
7 * license agreements.
8 *
9 * This program is free software: you can redistribute it and/or modify it under
10 * the terms of the GNU Affero General Public License as published by the Free
11 * Software Foundation, either version 3 of the License, or (at your option) any
12 * later version.
13 *
14 * This program is distributed in the hope that it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16 * FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
17 * details.
18 *
19 * You should have received a copy of the GNU Affero General Public License
20 * along with this program. If not, see http://www.gnu.org/licenses/
21 */
22
23#include "fault.h"
24
25#include <errno.h>
26#include <fcntl.h>
27#include <link.h>
28#include <signal.h>
29#include <stdarg.h>
30#include <stdbool.h>
31#include <stddef.h>
32#include <stdint.h>
33#include <stdio.h>
34#include <stdlib.h>
35#include <string.h>
36#include <time.h>
37#include <unistd.h>
38#include <sys/time.h>
39
40#include "aerospike/as_log.h"
41#include "citrusleaf/alloc.h"
42#include "citrusleaf/cf_b64.h"
43
44#include "cf_mutex.h"
45#include "shash.h"
46
47
48/*
49 * Maximum length for logging binary (i.e., hexadecimal or bit string) data.
50 */
51#define MAX_BINARY_BUF_SZ (64 * 1024)
52
53// TODO - do we really need O_NONBLOCK for log sinks?
54#define SINK_OPEN_FLAGS (O_WRONLY | O_CREAT | O_NONBLOCK | O_APPEND)
55#define SINK_OPEN_MODE (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)
56
57#define SINK_REOPEN_FLAGS (O_WRONLY | O_CREAT | O_NONBLOCK | O_TRUNC)
58
59/* cf_fault_context_strings, cf_fault_severity_strings, cf_fault_scope_strings
60 * Strings describing fault states */
61
62/* MUST BE KEPT IN SYNC WITH FAULT.H */
63
64char *cf_fault_context_strings[] = {
65 "misc",
66
67 "alloc",
68 "arenax",
69 "hardware",
70 "msg",
71 "rbuffer",
72 "socket",
73 "tls",
74 "vmapx",
75 "xmem",
76
77 "aggr",
78 "appeal",
79 "as",
80 "batch",
81 "bin",
82 "config",
83 "clustering",
84 "drv_ssd",
85 "exchange",
86 "fabric",
87 "flat",
88 "geo",
89 "hb",
90 "health",
91 "hlc",
92 "index",
93 "info",
94 "info-port",
95 "job",
96 "migrate",
97 "mon",
98 "namespace",
99 "nsup",
100 "particle",
101 "partition",
102 "paxos",
103 "predexp",
104 "proto",
105 "proxy",
106 "proxy-divert",
107 "query",
108 "record",
109 "roster",
110 "rw",
111 "rw-client",
112 "scan",
113 "security",
114 "service",
115 "service-list",
116 "sindex",
117 "skew",
118 "smd",
119 "storage",
120 "truncate",
121 "tsvc",
122 "udf",
123 "xdr",
124 "xdr-client",
125 "xdr-http"
126};
127
128COMPILER_ASSERT(sizeof(cf_fault_context_strings) / sizeof(char*) == CF_FAULT_CONTEXT_UNDEF);
129
130static const char *cf_fault_severity_strings[] = {
131 "CRITICAL",
132 "WARNING",
133 "INFO",
134 "DEBUG",
135 "DETAIL"
136};
137
138COMPILER_ASSERT(sizeof(cf_fault_severity_strings) / sizeof(const char*) == CF_FAULT_SEVERITY_UNDEF);
139
140cf_fault_sink cf_fault_sinks[CF_FAULT_SINKS_MAX];
141cf_fault_severity cf_fault_filter[CF_FAULT_CONTEXT_UNDEF];
142int cf_fault_sinks_inuse = 0;
143int num_held_fault_sinks = 0;
144
145cf_shash *g_ticker_hash = NULL;
146#define CACHE_MSG_MAX_SIZE 128
147
148typedef struct cf_fault_cache_hkey_s {
149 // Members most likely to be unique come first:
150 int line;
151 cf_fault_context context;
152 const char *file_name;
153 cf_fault_severity severity;
154 char msg[CACHE_MSG_MAX_SIZE];
155} __attribute__((__packed__)) cf_fault_cache_hkey;
156
157bool g_use_local_time = false;
158
159static bool g_log_millis = false;
160
161#define MAX_BACKTRACE_DEPTH 50
162
163extern char __executable_start;
164extern char __etext;
165
166// Filter stderr logging at this level when there are no sinks:
167#define NO_SINKS_LIMIT CF_WARNING
168
169static inline const char*
170severity_tag(cf_fault_severity severity)
171{
172 return severity == CF_CRITICAL ?
173 "FAILED ASSERTION" : cf_fault_severity_strings[severity];
174}
175
176/* cf_context_at_severity
177 * Return whether the given context is set to this severity level or higher. */
178bool
179cf_context_at_severity(const cf_fault_context context, const cf_fault_severity severity)
180{
181 return (severity <= cf_fault_filter[context]);
182}
183
184static inline void
185cf_fault_set_severity(const cf_fault_context context, const cf_fault_severity severity)
186{
187 cf_fault_filter[context] = severity;
188
189 // UDF logging relies on the common as_log facility.
190 // Set as_log_level whenever AS_UDF severity changes.
191 if (context == AS_UDF && severity < CF_FAULT_SEVERITY_UNDEF) {
192 as_log_set_level((as_log_level)severity);
193 }
194}
195
196static inline uint32_t
197cache_hash_fn(const void *key)
198{
199 return (uint32_t)((const cf_fault_cache_hkey*)key)->line +
200 *(uint32_t*)((const cf_fault_cache_hkey*)key)->msg;
201}
202
203/* cf_fault_init
204 * This code MUST be the first thing executed by main(). */
205void
206cf_fault_init()
207{
208 // Initialize the fault filter.
209 for (int j = 0; j < CF_FAULT_CONTEXT_UNDEF; j++) {
210 // We start with no sinks, so let's be in-sync with that.
211 cf_fault_set_severity(j, NO_SINKS_LIMIT);
212 }
213
214 // Create the ticker hash.
215 g_ticker_hash = cf_shash_create(cache_hash_fn, sizeof(cf_fault_cache_hkey),
216 sizeof(uint32_t), 256, CF_SHASH_MANY_LOCK);
217}
218
219
220/* cf_fault_sink_add
221 * Register an sink for faults */
222cf_fault_sink *
223cf_fault_sink_add(char *path)
224{
225 cf_fault_sink *s;
226
227 if ((CF_FAULT_SINKS_MAX - 1) == cf_fault_sinks_inuse)
228 return(NULL);
229
230 s = &cf_fault_sinks[cf_fault_sinks_inuse++];
231 s->path = cf_strdup(path);
232 if (0 == strncmp(path, "stderr", 6))
233 s->fd = 2;
234 else {
235 if (-1 == (s->fd = open(path, SINK_OPEN_FLAGS, SINK_OPEN_MODE))) {
236 cf_fault_sinks_inuse--;
237 return(NULL);
238 }
239 }
240
241 for (int i = 0; i < CF_FAULT_CONTEXT_UNDEF; i++)
242 s->limit[i] = CF_INFO;
243
244 return(s);
245}
246
247
248/* cf_fault_sink_hold
249 * Register but don't activate a sink for faults - return sink object pointer on
250 * success, NULL on failure. Only use at startup when parsing config file. After
251 * all sinks are registered, activate via cf_fault_sink_activate_all_held(). */
252cf_fault_sink *
253cf_fault_sink_hold(char *path)
254{
255 if (num_held_fault_sinks >= CF_FAULT_SINKS_MAX) {
256 cf_warning(CF_MISC, "too many fault sinks");
257 return NULL;
258 }
259
260 cf_fault_sink *s = &cf_fault_sinks[num_held_fault_sinks];
261
262 s->path = cf_strdup(path);
263
264 // If a context is not added, its runtime default will be CF_INFO.
265 for (int i = 0; i < CF_FAULT_CONTEXT_UNDEF; i++) {
266 s->limit[i] = CF_INFO;
267 }
268
269 num_held_fault_sinks++;
270
271 return s;
272}
273
274
275/* cf_fault_console_is_held
276 * Return whether the console is held.
277 */
278bool
279cf_fault_console_is_held()
280{
281 for (int i = 0; i < num_held_fault_sinks; i++) {
282 cf_fault_sink *s = &cf_fault_sinks[i];
283 if (!strcmp(s->path, "stderr")) {
284 return true;
285 }
286 }
287
288 return false;
289}
290
291
292static void
293fault_filter_adjust(cf_fault_sink *s, cf_fault_context ctx)
294{
295 // Don't adjust filter while adding contexts during config file parsing.
296 if (cf_fault_sinks_inuse == 0) {
297 return;
298 }
299
300 // Fault filter must allow logs at a less critical severity.
301 if (s->limit[ctx] > cf_fault_filter[ctx]) {
302 cf_fault_set_severity(ctx, s->limit[ctx]);
303 }
304 // Fault filter might be able to become stricter - check all sinks.
305 else if (s->limit[ctx] < cf_fault_filter[ctx]) {
306 cf_fault_severity severity = CF_CRITICAL;
307
308 for (int i = 0; i < cf_fault_sinks_inuse; i++) {
309 cf_fault_sink *t = &cf_fault_sinks[i];
310
311 if (t->limit[ctx] > severity) {
312 severity = t->limit[ctx];
313 }
314 }
315
316 cf_fault_set_severity(ctx, severity);
317 }
318}
319
320
321/* cf_fault_sink_activate_all_held
322 * Activate all sinks on hold - return 0 on success, -1 on failure. Only use
323 * once at startup, after parsing config file. On failure there's no cleanup,
324 * assumes caller will stop the process. */
325int
326cf_fault_sink_activate_all_held()
327{
328 for (int i = 0; i < num_held_fault_sinks; i++) {
329 if (cf_fault_sinks_inuse >= CF_FAULT_SINKS_MAX) {
330 // In case this isn't first sink, force logging as if no sinks:
331 cf_fault_sinks_inuse = 0;
332 cf_warning(CF_MISC, "too many fault sinks");
333 return -1;
334 }
335
336 cf_fault_sink *s = &cf_fault_sinks[i];
337
338 // "Activate" the sink.
339 if (0 == strncmp(s->path, "stderr", 6)) {
340 s->fd = 2;
341 }
342 else if (-1 == (s->fd = open(s->path, SINK_OPEN_FLAGS, SINK_OPEN_MODE))) {
343 // In case this isn't first sink, force logging as if no sinks:
344 cf_fault_sinks_inuse = 0;
345 cf_warning(CF_MISC, "can't open %s: %s", s->path, cf_strerror(errno));
346 return -1;
347 }
348
349 cf_fault_sinks_inuse++;
350
351 // Adjust the fault filter to the runtime levels.
352 for (int j = 0; j < CF_FAULT_CONTEXT_UNDEF; j++) {
353 fault_filter_adjust(s, (cf_fault_context)j);
354 }
355 }
356
357 return 0;
358}
359
360
361/* cf_fault_sink_get_fd_list
362 * Fill list with all active sink fds, excluding stderr - return list count. */
363int
364cf_fault_sink_get_fd_list(int *fds)
365{
366 int num_open_fds = 0;
367
368 for (int i = 0; i < cf_fault_sinks_inuse; i++) {
369 cf_fault_sink *s = &cf_fault_sinks[i];
370
371 // Exclude stderr.
372 if (s->fd > 2 && 0 != strncmp(s->path, "stderr", 6)) {
373 fds[num_open_fds++] = s->fd;
374 }
375 }
376
377 return num_open_fds;
378}
379
380
381static int
382cf_fault_sink_addcontext_all(char *context, char *severity)
383{
384 for (int i = 0; i < cf_fault_sinks_inuse; i++) {
385 cf_fault_sink *s = &cf_fault_sinks[i];
386 int rv = cf_fault_sink_addcontext(s, context, severity);
387 if (rv != 0) return(rv);
388 }
389 return(0);
390}
391
392
393int
394cf_fault_sink_addcontext(cf_fault_sink *s, char *context, char *severity)
395{
396 if (s == 0) return(cf_fault_sink_addcontext_all(context, severity));
397
398 cf_fault_context ctx = CF_FAULT_CONTEXT_UNDEF;
399 cf_fault_severity sev = CF_FAULT_SEVERITY_UNDEF;
400
401 for (int i = 0; i < CF_FAULT_SEVERITY_UNDEF; i++) {
402 if (0 == strncasecmp(cf_fault_severity_strings[i], severity, strlen(severity)))
403 sev = (cf_fault_severity)i;
404 }
405 if (CF_FAULT_SEVERITY_UNDEF == sev)
406 return(-1);
407
408 if (0 == strncasecmp(context, "any", 3)) {
409 for (int i = 0; i < CF_FAULT_CONTEXT_UNDEF; i++) {
410 s->limit[i] = sev;
411 fault_filter_adjust(s, (cf_fault_context)i);
412 }
413 } else {
414 for (int i = 0; i < CF_FAULT_CONTEXT_UNDEF; i++) {
415 //strncasecmp only compared the length of context passed in the 3rd argument and as cf_fault_context_strings has info and info port,
416 //So when you try to set info to debug it will set info-port to debug . Just forcing it to check the length from cf_fault_context_strings
417 if (0 == strncasecmp(cf_fault_context_strings[i], context, strlen(cf_fault_context_strings[i])))
418 ctx = (cf_fault_context)i;
419 }
420 if (CF_FAULT_CONTEXT_UNDEF == ctx)
421 return(-1);
422
423 s->limit[ctx] = sev;
424 fault_filter_adjust(s, ctx);
425 }
426
427 return(0);
428}
429
430
431void
432cf_fault_use_local_time(bool val)
433{
434 g_use_local_time = val;
435}
436
437bool
438cf_fault_is_using_local_time()
439{
440 return g_use_local_time;
441}
442
443void
444cf_fault_log_millis(bool log_millis)
445{
446 g_log_millis = log_millis;
447}
448
449bool
450cf_fault_is_logging_millis()
451{
452 return g_log_millis;
453}
454
455int
456cf_sprintf_now(char* mbuf, size_t limit)
457{
458 struct tm nowtm;
459
460 if (cf_fault_is_logging_millis()) {
461 // Logging milli seconds as well.
462 struct timeval curTime;
463 gettimeofday(&curTime, NULL);
464 int millis = curTime.tv_usec / 1000;
465 int pos = 0;
466 if (g_use_local_time) {
467 localtime_r(&curTime.tv_sec, &nowtm);
468 pos = strftime(mbuf, limit, "%b %d %Y %T.", &nowtm);
469 pos +=
470 snprintf(mbuf + pos, limit - pos, "%03d", millis);
471 pos +=
472 strftime(mbuf + pos, limit - pos, " GMT%z: ", &nowtm);
473 return pos;
474 } else {
475 gmtime_r(&curTime.tv_sec, &nowtm);
476 pos = strftime(mbuf, limit, "%b %d %Y %T.", &nowtm);
477 pos +=
478 snprintf(mbuf + pos, limit - pos, "%03d", millis);
479 pos +=
480 strftime(mbuf + pos, limit - pos, " %Z: ", &nowtm);
481 return pos;
482 }
483 }
484
485 // Logging only seconds.
486 time_t now = time(NULL);
487
488 if (g_use_local_time) {
489 localtime_r(&now, &nowtm);
490 return strftime(mbuf, limit, "%b %d %Y %T GMT%z: ", &nowtm);
491 } else {
492 gmtime_r(&now, &nowtm);
493 return strftime(mbuf, limit, "%b %d %Y %T %Z: ", &nowtm);
494 }
495}
496
497/* cf_fault_event
498 * Respond to a fault */
499void
500cf_fault_event(const cf_fault_context context, const cf_fault_severity severity,
501 const char *file_name, const int line, const char *msg, ...)
502{
503 va_list argp;
504 char mbuf[1024];
505 size_t pos;
506
507
508 /* Make sure there's always enough space for the \n\0. */
509 size_t limit = sizeof(mbuf) - 2;
510
511 /* Set the timestamp */
512 pos = cf_sprintf_now(mbuf, limit);
513
514 /* Set the context/scope/severity tag */
515 pos += snprintf(mbuf + pos, limit - pos, "%s (%s): ", severity_tag(severity), cf_fault_context_strings[context]);
516
517 /*
518 * snprintf() and vsnprintf() will not write more than the size specified,
519 * but they return the size that would have been written without truncation.
520 * These checks make sure there's enough space for the final \n\0.
521 */
522 if (pos > limit) {
523 pos = limit;
524 }
525
526 /* Set the location: filename and line number */
527 if (file_name) {
528 pos += snprintf(mbuf + pos, limit - pos, "(%s:%d) ", file_name, line);
529 }
530
531 if (pos > limit) {
532 pos = limit;
533 }
534
535 /* Append the message */
536 va_start(argp, msg);
537 pos += vsnprintf(mbuf + pos, limit - pos, msg, argp);
538 va_end(argp);
539
540 if (pos > limit) {
541 pos = limit;
542 }
543
544 pos += snprintf(mbuf + pos, 2, "\n");
545
546 /* Route the message to the correct destinations */
547 if (0 == cf_fault_sinks_inuse) {
548 /* If no fault sinks are defined, use stderr for important messages */
549 if (severity <= NO_SINKS_LIMIT)
550 fprintf(stderr, "%s", mbuf);
551 } else {
552 for (int i = 0; i < cf_fault_sinks_inuse; i++) {
553 if ((severity <= cf_fault_sinks[i].limit[context]) || (CF_CRITICAL == severity)) {
554 if (0 >= write(cf_fault_sinks[i].fd, mbuf, pos)) {
555 // this is OK for a bit in case of a HUP. It's even better to queue the buffers and apply them
556 // after the hup. TODO.
557 fprintf(stderr, "internal failure in fault message write: %s\n", cf_strerror(errno));
558 }
559 }
560 }
561 }
562
563 /* Critical errors */
564 if (CF_CRITICAL == severity) {
565 fflush(NULL);
566
567 // Our signal handler will log a stack trace.
568 raise(SIGUSR1);
569 }
570} // end cf_fault_event()
571
572
573/**
574 * Generate a Packed Hex String Representation of the binary string.
575 * e.g. 0xfc86e83a6d6d3024659e6fe48c351aaaf6e964a5
576 * The value is preceeded by a "0x" to denote Hex (which allows it to be
577 * used in other contexts as a hex number).
578 */
579int
580generate_packed_hex_string(const void *mem_ptr, uint32_t len, char* output)
581{
582 uint8_t *d = (uint8_t *) mem_ptr;
583 char* p = output;
584 char* startp = p; // Remember where we started.
585
586 *p++ = '0';
587 *p++ = 'x';
588
589 for (uint32_t i = 0; i < len; i++) {
590 sprintf(p, "%02x", d[i]);
591 p += 2;
592 }
593 *p++ = 0; // Null terminate the output buffer.
594 return (int) (p - startp); // show how much space we used.
595} // end generate_packed_hex_string()
596
597
598/**
599 * Generate a Spaced Hex String Representation of the binary string.
600 * e.g. fc 86 e8 3a 6d 6d 30 24 65 9e 6f e4 8c 35 1a aa f6 e9 64 a5
601 */
602int
603generate_spaced_hex_string(const void *mem_ptr, uint32_t len, char* output)
604{
605 uint8_t *d = (uint8_t *) mem_ptr;
606 char* p = output;
607 char* startp = p; // Remember where we started.
608
609 for (uint32_t i = 0; i < len; i++) {
610 sprintf(p, "%02x ", d[i]); // Notice the space after the 02x.
611 p += 3;
612 }
613 *p++ = 0; // Null terminate the output buffer.
614 return (int) (p - startp); // show how much space we used.
615} // end generate_spaced_hex_string()
616
617
618/**
619 * Generate a Column Hex String Representation of the binary string.
620 * The Columns will be four two-byte values, with spaces between the bytes:
621 * fc86 e83a 6d6d 3024
622 * 659e 6fe4 8c35 1aaa
623 * f6e9 64a5
624 */
625int
626generate_column_hex_string(const void *mem_ptr, uint32_t len, char* output)
627{
628 uint8_t *d = (uint8_t *) mem_ptr;
629 char* p = output;
630 uint32_t i;
631 char* startp = p; // Remember where we started.
632
633 *p++ = '\n'; // Start out on a new line
634
635 for (i = 0; i < len; i++) {
636 sprintf(p, "%02x ", d[i]); // Two chars and a space
637 p += 3;
638 if ((i+1) % 8 == 0 && i != 0) {
639 *p++ = '\n'; // add a line return
640 }
641 }
642 *p++ = '\n'; // Finish with a new line
643 *p++ = 0; // Null terminate the output buffer.
644 return (int) (p - startp); // show how much space we used.
645} // end generate_column_hex_string()
646
647
648/**
649 * Generate a Base64 String Representation of the binary string.
650 * Base64 encoding converts three octets into four 6-bit encoded characters.
651 * So, the string 8-bit bytes are broken down into 6 bit values, each of which
652 * is then converted into a base64 value.
653 * So, for example, the string "Man" :: M[77: 0x4d)] a[97(0x61)] n[110(0x6e)]
654 * Bits: (4)0100 (d)1101 (6)0110 (1)0001 (6)0110 (e)1110
655 * Base 64 bits: 010011 010110 000101 101110
656 * Base 64 Rep: 010011(19) 010110(22) 000101(5) 101110(46)
657 * Base 64 Chars: T(19) W(22) F(5) u(46)
658 * and so this string is converted into the Base 64 string: "TWFu"
659 */
660int generate_base64_string(const void *mem_ptr, uint32_t len, char output_buf[])
661{
662 uint32_t encoded_len = cf_b64_encoded_len(len);
663 // TODO - check that output_buf is big enough, and/or truncate.
664
665 cf_b64_encode((const uint8_t*)mem_ptr, len, output_buf);
666
667 output_buf[encoded_len] = 0; // null-terminate
668
669 return (int)(encoded_len + 1); // bytes we used, including null-terminator
670} // end generate_base64_hex_string()
671
672
673/**
674 * Generate a BIT representation with spaces between the four bit groups.
675 * Print the bits left to right (big to small).
676 * This is assuming BIG ENDIAN representation (most significant bit is left).
677 */
678int generate_4spaced_bits_string(const void *mem_ptr, uint32_t len, char* output)
679{
680 uint8_t *d = (uint8_t *) mem_ptr;
681 char* p = output;
682 uint8_t uint_val;
683 uint8_t mask = 0x80; // largest single bit value in a byte
684 char* startp = p; // Remember where we started.
685
686 // For each byte in the string
687 for (uint32_t i = 0; i < len; i++) {
688 uint_val = d[i];
689 for (int j = 0; j < 8; j++) {
690 sprintf(p, "%1d", ((uint_val << j) & mask));
691 p++;
692 // Add a space after every 4th bit
693 if ( (j+1) % 4 == 0 ) *p++ = ' ';
694 }
695 }
696 *p++ = 0; // Null terminate the output buffer.
697 return (int) (p - startp); // show how much space we used.
698} // end generate_4spaced_bits_string()
699
700/**
701 * Generate a BIT representation of columns with spaces between the
702 * four bit groups. Columns will be 8 columns of 4 bits.
703 * (1 32 bit word per row)
704 */
705int generate_column_bits_string(const void *mem_ptr, uint32_t len, char* output)
706{
707 uint8_t *d = (uint8_t *) mem_ptr;
708 char* p = output;
709 uint8_t uint_val;
710 uint8_t mask = 0x80; // largest single bit value in a byte
711 char* startp = p; // Remember where we started.
712
713 // Start on a new line
714 *p++ = '\n';
715
716 // For each byte in the string
717 for (uint32_t i = 0; i < len; i++) {
718 uint_val = d[i];
719 for (int j = 0; j < 8; j++) {
720 sprintf(p, "%1d", ((uint_val << j) & mask));
721 p++;
722 // Add a space after every 4th bit
723 if ((j + 1) % 4 == 0) *p++ = ' ';
724 }
725 // Add a line return after every 4th byte
726 if ((i + 1) % 4 == 0) *p++ = '\n';
727 }
728 *p++ = 0; // Null terminate the output buffer.
729 return (int) (p - startp); // show how much space we used.
730} // end generate_column_bits_string()
731
732
733/* cf_fault_event -- TWO: Expand on the LOG ability by being able to
734 * print the contents of a BINARY array if we're passed a valid ptr (not NULL).
735 * We will print the array according to "format".
736 * Parms:
737 * (*) scope: The module family (e.g. AS_RW, AS_UDF...)
738 * (*) severify: The scope severity (e.g. INFO, DEBUG, DETAIL)
739 * (*) file_name: Ptr to the FILE generating the call
740 * (*) line: The function (really, the FILE) line number of the source call
741 * (*) mem_ptr: Ptr to memory location of binary array (or NULL)
742 * (*) len: Length of the binary string
743 * (*) format: The single char showing the format (e.g. 'D', 'B', etc)
744 * (*) msg: The format msg string
745 * (*) ... : The variable set of parameters the correspond to the msg string.
746 *
747 * NOTE: We will eventually merge this function with the original cf_fault_event()
748 **/
749void
750cf_fault_event2(const cf_fault_context context,
751 const cf_fault_severity severity, const char *file_name, const int line,
752 const void *mem_ptr, size_t len, cf_display_type dt, const char *msg, ...)
753{
754 va_list argp;
755 char mbuf[MAX_BINARY_BUF_SZ];
756 size_t pos;
757
758 char binary_buf[MAX_BINARY_BUF_SZ];
759
760 // Arbitrarily limit output to a fixed maximum length.
761 if (len > MAX_BINARY_BUF_SZ) {
762 len = MAX_BINARY_BUF_SZ;
763 }
764 char * labelp = NULL; // initialize to quiet build warning
765
766 /* Make sure there's always enough space for the \n\0. */
767 size_t limit = sizeof(mbuf) - 2;
768
769 /* Set the timestamp */
770 pos = cf_sprintf_now(mbuf, limit);
771
772 // If we're given a valid MEMORY POINTER for a binary value, then
773 // compute the string that corresponds to the bytes.
774 if (mem_ptr) {
775 switch (dt) {
776 case CF_DISPLAY_HEX_DIGEST:
777 labelp = "Digest";
778 generate_packed_hex_string(mem_ptr, len, binary_buf);
779 break;
780 case CF_DISPLAY_HEX_SPACED:
781 labelp = "HexSpaced";
782 generate_spaced_hex_string(mem_ptr, len, binary_buf);
783 break;
784 case CF_DISPLAY_HEX_PACKED:
785 labelp = "HexPacked";
786 generate_packed_hex_string(mem_ptr, len, binary_buf);
787 break;
788 case CF_DISPLAY_HEX_COLUMNS:
789 labelp = "HexColumns";
790 generate_column_hex_string(mem_ptr, len, binary_buf);
791 break;
792 case CF_DISPLAY_BASE64:
793 labelp = "Base64";
794 generate_base64_string(mem_ptr, len, binary_buf);
795 break;
796 case CF_DISPLAY_BITS_SPACED:
797 labelp = "BitsSpaced";
798 generate_4spaced_bits_string(mem_ptr, len, binary_buf);
799 break;
800 case CF_DISPLAY_BITS_COLUMNS:
801 labelp = "BitsColumns";
802 generate_column_bits_string(mem_ptr, len, binary_buf);
803 break;
804 default:
805 labelp = "Unknown Format";
806 binary_buf[0] = 0; // make sure it's null terminated.
807 break;
808
809 } // end switch
810 } // if binary data is present
811
812 /* Set the context/scope/severity tag */
813 pos += snprintf(mbuf + pos, limit - pos, "%s (%s): ",
814 severity_tag(severity),
815 cf_fault_context_strings[context]);
816
817 /*
818 * snprintf() and vsnprintf() will not write more than the size specified,
819 * but they return the size that would have been written without truncation.
820 * These checks make sure there's enough space for the final \n\0.
821 */
822 if (pos > limit) {
823 pos = limit;
824 }
825
826 /* Set the location: filename and line number */
827 if (file_name) {
828 pos += snprintf(mbuf + pos, limit - pos, "(%s:%d) ", file_name, line);
829 }
830
831 // Check for overflow (see above).
832 if (pos > limit) {
833 pos = limit;
834 }
835
836 /* Append the message */
837 va_start(argp, msg);
838 pos += vsnprintf(mbuf + pos, limit - pos, msg, argp);
839 va_end(argp);
840
841 // Check for overflow (see above).
842 if (pos > limit) {
843 pos = limit;
844 }
845
846 // Append our final BINARY string, if present (some might pass in NULL).
847 if ( mem_ptr ) {
848 pos += snprintf(mbuf + pos, limit - pos, "<%s>:%s", labelp, binary_buf);
849 }
850
851 // Check for overflow (see above).
852 if (pos > limit) {
853 pos = limit;
854 }
855
856 pos += snprintf(mbuf + pos, 2, "\n");
857
858 /* Route the message to the correct destinations */
859 if (0 == cf_fault_sinks_inuse) {
860 /* If no fault sinks are defined, use stderr for critical messages */
861 if (CF_CRITICAL == severity)
862 fprintf(stderr, "%s", mbuf);
863 } else {
864 for (int i = 0; i < cf_fault_sinks_inuse; i++) {
865 if ((severity <= cf_fault_sinks[i].limit[context]) || (CF_CRITICAL == severity)) {
866 if (0 >= write(cf_fault_sinks[i].fd, mbuf, pos)) {
867 // this is OK for a bit in case of a HUP. It's even better to queue the buffers and apply them
868 // after the hup. TODO.
869 fprintf(stderr, "internal failure in fault message write: %s\n", cf_strerror(errno));
870 }
871 }
872 }
873 }
874
875 /* Critical errors */
876 if (CF_CRITICAL == severity) {
877 fflush(NULL);
878
879 // Our signal handler will log a stack trace.
880 raise(SIGUSR1);
881 }
882}
883
884
885void
886cf_fault_event_nostack(const cf_fault_context context,
887 const cf_fault_severity severity, const char *fn, const int line,
888 const char *msg, ...)
889{
890 va_list argp;
891 char mbuf[1024];
892 time_t now;
893 struct tm nowtm;
894 size_t pos;
895
896 /* Make sure there's always enough space for the \n\0. */
897 size_t limit = sizeof(mbuf) - 2;
898
899 /* Set the timestamp */
900 now = time(NULL);
901
902 if (g_use_local_time) {
903 localtime_r(&now, &nowtm);
904 pos = strftime(mbuf, limit, "%b %d %Y %T GMT%z: ", &nowtm);
905 }
906 else {
907 gmtime_r(&now, &nowtm);
908 pos = strftime(mbuf, limit, "%b %d %Y %T %Z: ", &nowtm);
909 }
910
911 /* Set the context/scope/severity tag */
912 pos += snprintf(mbuf + pos, limit - pos, "%s (%s): ", severity_tag(severity), cf_fault_context_strings[context]);
913
914 /*
915 * snprintf() and vsnprintf() will not write more than the size specified,
916 * but they return the size that would have been written without truncation.
917 * These checks make sure there's enough space for the final \n\0.
918 */
919 if (pos > limit) {
920 pos = limit;
921 }
922
923 /* Set the location */
924 if (fn)
925 pos += snprintf(mbuf + pos, limit - pos, "(%s:%d) ", fn, line);
926
927 if (pos > limit) {
928 pos = limit;
929 }
930
931 /* Append the message */
932 va_start(argp, msg);
933 pos += vsnprintf(mbuf + pos, limit - pos, msg, argp);
934 va_end(argp);
935
936 if (pos > limit) {
937 pos = limit;
938 }
939
940 pos += snprintf(mbuf + pos, 2, "\n");
941
942 /* Route the message to the correct destinations */
943 if (0 == cf_fault_sinks_inuse) {
944 /* If no fault sinks are defined, use stderr for important messages */
945 if (severity <= NO_SINKS_LIMIT)
946 fprintf(stderr, "%s", mbuf);
947 } else {
948 for (int i = 0; i < cf_fault_sinks_inuse; i++) {
949 if ((severity <= cf_fault_sinks[i].limit[context]) || (CF_CRITICAL == severity)) {
950 if (0 >= write(cf_fault_sinks[i].fd, mbuf, pos)) {
951 // this is OK for a bit in case of a HUP. It's even better to queue the buffers and apply them
952 // after the hup. TODO.
953 fprintf(stderr, "internal failure in fault message write: %s\n", cf_strerror(errno));
954 }
955 }
956 }
957 }
958
959 /* Critical errors */
960 if (CF_CRITICAL == severity) {
961 fflush(NULL);
962
963 // these signals don't throw stack traces in our system
964 raise(SIGINT);
965 }
966}
967
968
969int
970cf_fault_sink_strlist(cf_dyn_buf *db)
971{
972 for (int i = 0; i < cf_fault_sinks_inuse; i++) {
973 cf_dyn_buf_append_int(db, i);
974 cf_dyn_buf_append_char(db, ':');
975 cf_dyn_buf_append_string(db,cf_fault_sinks[i].path);
976 cf_dyn_buf_append_char(db, ';');
977 }
978 cf_dyn_buf_chomp(db);
979 return(0);
980}
981
982
983extern void
984cf_fault_sink_logroll(void)
985{
986 fprintf(stderr, "cf_fault: rolling log files\n");
987 for (int i = 0; i < cf_fault_sinks_inuse; i++) {
988 cf_fault_sink *s = &cf_fault_sinks[i];
989 if ((0 != strncmp(s->path, "stderr", 6)) && (s->fd > 2)) {
990 static cf_mutex lock = CF_MUTEX_INIT;
991
992 cf_mutex_lock(&lock); // so concurrent SIGHUPs can't double-close
993
994 int old_fd = s->fd;
995
996 // Note - we use O_TRUNC, so we assume the file has been
997 // moved/copied elsewhere, or we're ok losing it.
998 s->fd = open(s->path, SINK_REOPEN_FLAGS, SINK_OPEN_MODE);
999
1000 usleep(1000); // threads may be interrupted while writing to old fd
1001 close(old_fd);
1002
1003 cf_mutex_unlock(&lock);
1004 }
1005 }
1006}
1007
1008
1009cf_fault_sink *cf_fault_sink_get_id(int id)
1010{
1011 if (id > cf_fault_sinks_inuse) return(0);
1012 return ( &cf_fault_sinks[id] );
1013}
1014
1015int
1016cf_fault_sink_context_all_strlist(int sink_id, cf_dyn_buf *db)
1017{
1018 // get the sink
1019 if (sink_id > cf_fault_sinks_inuse) return(-1);
1020 cf_fault_sink *s = &cf_fault_sinks[sink_id];
1021
1022 for (int i = 0; i < CF_FAULT_CONTEXT_UNDEF; i++) {
1023 cf_dyn_buf_append_string(db, cf_fault_context_strings[i]);
1024 cf_dyn_buf_append_char(db, ':');
1025 cf_dyn_buf_append_string(db, cf_fault_severity_strings[s->limit[i]]);
1026 cf_dyn_buf_append_char(db, ';');
1027 }
1028 cf_dyn_buf_chomp(db);
1029 return(0);
1030}
1031
1032int
1033cf_fault_sink_context_strlist(int sink_id, char *context, cf_dyn_buf *db)
1034{
1035 // get the sink
1036 if (sink_id > cf_fault_sinks_inuse) return(-1);
1037 cf_fault_sink *s = &cf_fault_sinks[sink_id];
1038
1039 // get the severity
1040 int i;
1041 for (i = 0; i < CF_FAULT_CONTEXT_UNDEF; i++) {
1042 if (0 == strcmp(cf_fault_context_strings[i],context))
1043 break;
1044 }
1045 if (i == CF_FAULT_CONTEXT_UNDEF) {
1046 cf_dyn_buf_append_string(db, context);
1047 cf_dyn_buf_append_string(db, ":unknown");
1048 return(0);
1049 }
1050
1051 // get the string
1052 cf_dyn_buf_append_string(db, context);
1053 cf_dyn_buf_append_char(db, ':');
1054 cf_dyn_buf_append_string(db, cf_fault_severity_strings[s->limit[i]]);
1055 return(0);
1056}
1057
1058
1059static int
1060cf_fault_cache_reduce_fn(const void *key, void *data, void *udata)
1061{
1062 uint32_t *count = (uint32_t*)data;
1063
1064 if (*count == 0) {
1065 return CF_SHASH_REDUCE_DELETE;
1066 }
1067
1068 const cf_fault_cache_hkey *hkey = (const cf_fault_cache_hkey*)key;
1069
1070 cf_fault_event(hkey->context, hkey->severity, hkey->file_name, hkey->line,
1071 "(repeated:%u) %s", *count, hkey->msg);
1072
1073 *count = 0;
1074
1075 return CF_SHASH_OK;
1076}
1077
1078
1079// For now there's only one cache, dumped by the ticker.
1080void
1081cf_fault_dump_cache()
1082{
1083 cf_shash_reduce(g_ticker_hash, cf_fault_cache_reduce_fn, NULL);
1084}
1085
1086
1087// For now there's only one cache, dumped by the ticker.
1088void
1089cf_fault_cache_event(cf_fault_context context, cf_fault_severity severity,
1090 const char *file_name, int line, char *msg, ...)
1091{
1092 cf_fault_cache_hkey key = {
1093 .line = line,
1094 .context = context,
1095 .file_name = file_name,
1096 .severity = severity,
1097 .msg = { 0 } // must pad hash keys
1098 };
1099
1100 size_t limit = sizeof(key.msg) - 1; // truncate leaving null-terminator
1101
1102 va_list argp;
1103
1104 va_start(argp, msg);
1105 vsnprintf(key.msg, limit, msg, argp);
1106 va_end(argp);
1107
1108 while (true) {
1109 uint32_t *valp = NULL;
1110 cf_mutex *lockp = NULL;
1111
1112 if (cf_shash_get_vlock(g_ticker_hash, &key, (void**)&valp, &lockp) ==
1113 CF_SHASH_OK) {
1114 // Already in hash - increment count and don't log it.
1115 (*valp)++;
1116 cf_mutex_unlock(lockp);
1117 break;
1118 }
1119 // else - not found, add it to hash and log it.
1120
1121 uint32_t initv = 1;
1122
1123 if (cf_shash_put_unique(g_ticker_hash, &key, &initv) ==
1124 CF_SHASH_ERR_FOUND) {
1125 continue; // other thread beat us to it - loop around and get it
1126 }
1127
1128 cf_fault_event(context, severity, file_name, line, "%s", key.msg);
1129 break;
1130 }
1131}
1132
1133void
1134cf_fault_hex_dump(const char *title, const void *data, size_t len)
1135{
1136 const uint8_t *data8 = data;
1137 char line[8 + 3 * 16 + 17];
1138 size_t k;
1139
1140 cf_info(CF_MISC, "hex dump - %s", title);
1141
1142 for (size_t i = 0; i < len; i += k) {
1143 sprintf(line, "%06zx: ", i);
1144
1145 for (k = 0; i + k < len && k < 16; ++k) {
1146 char num[3];
1147 uint8_t d = data8[i + k];
1148 sprintf(num, "%02x", d);
1149 line[8 + 3 * k + 0] = num[0];
1150 line[8 + 3 * k + 1] = num[1];
1151 line[8 + 3 * 16 + k] = d >= 32 && d <= 126 ? d : '.';
1152 }
1153
1154 cf_info(CF_MISC, "%s", line);
1155 }
1156}
1157
1158void
1159cf_fault_print_signal_context(void *_ctx)
1160{
1161 ucontext_t *uc = _ctx;
1162 mcontext_t *mc = &uc->uc_mcontext;
1163 uint64_t *gregs = (uint64_t *)&mc->gregs[0];
1164
1165 char regs[1000];
1166
1167 snprintf(regs, sizeof(regs),
1168 "rax %016lx rbx %016lx rcx %016lx rdx %016lx rsi %016lx rdi %016lx "
1169 "rbp %016lx rsp %016lx r8 %016lx r9 %016lx r10 %016lx r11 %016lx "
1170 "r12 %016lx r13 %016lx r14 %016lx r15 %016lx rip %016lx",
1171 gregs[REG_RAX], gregs[REG_RBX], gregs[REG_RCX], gregs[REG_RDX],
1172 gregs[REG_RSI], gregs[REG_RDI], gregs[REG_RBP], gregs[REG_RSP],
1173 gregs[REG_R8], gregs[REG_R9], gregs[REG_R10], gregs[REG_R11],
1174 gregs[REG_R12], gregs[REG_R13], gregs[REG_R14], gregs[REG_R15],
1175 gregs[REG_RIP]);
1176
1177 cf_fault_event(AS_AS, CF_WARNING, __FILENAME__, __LINE__,
1178 "stacktrace: registers: %s", regs);
1179
1180 void *bt[MAX_BACKTRACE_DEPTH];
1181 char trace[MAX_BACKTRACE_DEPTH * 20];
1182
1183 int sz = backtrace(bt, MAX_BACKTRACE_DEPTH);
1184 int off = 0;
1185
1186 for (int i = 0; i < sz; i++) {
1187 off += snprintf(trace + off, sizeof(trace) - off, " 0x%lx",
1188 cf_fault_strip_aslr(bt[i]));
1189 }
1190
1191 cf_fault_event(AS_AS, CF_WARNING, __FILENAME__, __LINE__,
1192 "stacktrace: found %d frames:%s offset 0x%lx", sz, trace,
1193 _r_debug.r_map->l_addr);
1194
1195 char **syms = backtrace_symbols(bt, sz);
1196
1197 if (syms) {
1198 for (int i = 0; i < sz; i++) {
1199 cf_fault_event(AS_AS, CF_WARNING, __FILENAME__, __LINE__,
1200 "stacktrace: frame %d: %s", i, syms[i]);
1201 }
1202 }
1203 else {
1204 cf_fault_event(AS_AS, CF_WARNING, __FILENAME__, __LINE__,
1205 "stacktrace: found no symbols");
1206 }
1207}
1208
1209uint64_t
1210cf_fault_strip_aslr(void *addr)
1211{
1212 void *start = &__executable_start;
1213 void *end = &__etext;
1214 uint64_t aslr_offset = _r_debug.r_map->l_addr;
1215
1216 return addr >= start && addr < end ?
1217 (uint64_t)addr - aslr_offset : (uint64_t)addr;
1218}
1219
1220