1/*
2 * fault.h
3 *
4 * Copyright (C) 2008-2014 Aerospike, Inc.
5 *
6 * Portions may be licensed to Aerospike, Inc. under one or more contributor
7 * license agreements.
8 *
9 * This program is free software: you can redistribute it and/or modify it under
10 * the terms of the GNU Affero General Public License as published by the Free
11 * Software Foundation, either version 3 of the License, or (at your option) any
12 * later version.
13 *
14 * This program is distributed in the hope that it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16 * FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
17 * details.
18 *
19 * You should have received a copy of the GNU Affero General Public License
20 * along with this program. If not, see http://www.gnu.org/licenses/
21 */
22
23#pragma once
24
25#include <alloca.h>
26#include <execinfo.h>
27#include <stdbool.h>
28#include <stddef.h>
29#include <stdint.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <ucontext.h>
34#include "dynbuf.h"
35
36
37// Use COMPILER_ASSERT() for compile-time verification.
38//
39// Usage does not add any compiled code, or cost anything at runtime. When the
40// evaluated expression is false, it causes a compile error which will draw
41// attention to the relevant line.
42//
43// e.g.
44// COMPILER_ASSERT(sizeof(my_int_array) / sizeof(int) == MY_INT_ARRAY_SIZE);
45//
46#define CGLUE(a, b) a##b
47#define CVERIFY(expr, counter) typedef char CGLUE(compiler_assert_failed_, counter)[(expr) ? 1 : -1]
48#define COMPILER_ASSERT(expr) CVERIFY(expr, __COUNTER__)
49
50// Use CF_MUST_CHECK with declarations to force caller to handle return value.
51//
52// e.g.
53// CF_MUST_CHECK int my_function();
54//
55#define CF_MUST_CHECK __attribute__((warn_unused_result))
56
57// Use CF_IGNORE_ERROR() as caller to override CF_MUST_CHECK in declaration.
58//
59// e.g.
60// CF_IGNORE_ERROR(my_function());
61//
62#define CF_IGNORE_ERROR(x) ((void)((x) == 12345))
63
64// Use CF_NEVER_FAILS() as caller to assert that returned value is not negative.
65//
66// e.g.
67// CF_NEVER_FAILS(my_function());
68//
69#define CF_NEVER_FAILS(x) \
70do { \
71 if ((x) < 0) { \
72 cf_crash(CF_MISC, "this cannot happen..."); \
73 } \
74} while (false);
75
76// Use CF_ZSTR_DEFINE() to null-terminate strings conveniently.
77//
78// e.g.
79// CF_ZSTR_DEFINE(zstr, 40, ns_name, name_sz);
80// cf_warning(AS_NAMESPACE, "got namespace %s", zstr);
81//
82#define CF_ZSTR_DEFINE(zstr, max_sz, str, sz) \
83 char zstr[max_sz]; \
84 size_t zstr##len = sz < max_sz ? sz : max_sz - 1; \
85 memcpy(zstr, str, zstr##len); \
86 zstr[zstr##len] = 0;
87
88// Use CF_ZSTRxx() to null-terminate strings conveniently. Useful especially as
89// cf_detail & cf_debug parameters where there's no cost unless the log level
90// is enabled. (Cost may be more than CF_ZSTR_DEFINE() due to copying struct on
91// function return.)
92//
93// e.g.
94// cf_debug(AS_NAMESPACE, "got namespace %s", CF_ZSTR64(ns_name, name_sz));
95//
96
97typedef struct cf_zstr64_s {
98 char s[64];
99} cf_zstr64;
100
101typedef struct cf_zstr1k_s {
102 char s[1024];
103} cf_zstr1k;
104
105static inline cf_zstr64
106cf_null_terminate_64(const char *str, size_t sz)
107{
108 cf_zstr64 zstr;
109 size_t len = sz < sizeof(zstr.s) ? sz : sizeof(zstr.s) - 1;
110
111 memcpy(zstr.s, str, len);
112 zstr.s[len] = 0;
113
114 return zstr;
115}
116
117static inline cf_zstr1k
118cf_null_terminate_1k(const char *str, size_t sz)
119{
120 cf_zstr1k zstr;
121 size_t len = sz < sizeof(zstr.s) ? sz : sizeof(zstr.s) - 1;
122
123 memcpy(zstr.s, str, len);
124 zstr.s[len] = 0;
125
126 return zstr;
127}
128
129#define CF_ZSTR64(str, sz) (cf_null_terminate_64((const char *)str, sz).s)
130#define CF_ZSTR1K(str, sz) (cf_null_terminate_1k((const char *)str, sz).s)
131
132
133/* SYNOPSIS
134 * Fault scoping
135 *
136 * Faults are identified by a context and severity. The context describes where
137 * the fault occurred, and the severity determines the required action.
138 *
139 * Examples:
140 * cf_info(CF_MISC, "important message: %s", my_msg);
141 * cf_crash(CF_MISC, "doom!");
142 * cf_assert(my_test, CF_MISC, "gloom!");
143 */
144
145/* cf_fault_context
146 * NB: if you add or remove entries from this enum, you must also change
147 * the corresponding strings structure in fault.c */
148typedef enum {
149 CF_MISC,
150
151 CF_ALLOC,
152 CF_ARENAX,
153 CF_HARDWARE,
154 CF_MSG,
155 CF_RBUFFER,
156 CF_SOCKET,
157 CF_TLS,
158 CF_VMAPX,
159 CF_XMEM,
160
161 AS_AGGR,
162 AS_APPEAL,
163 AS_AS,
164 AS_BATCH,
165 AS_BIN,
166 AS_CFG,
167 AS_CLUSTERING,
168 AS_DRV_SSD,
169 AS_EXCHANGE,
170 AS_FABRIC,
171 AS_FLAT,
172 AS_GEO,
173 AS_HB,
174 AS_HEALTH,
175 AS_HLC,
176 AS_INDEX,
177 AS_INFO,
178 AS_INFO_PORT,
179 AS_JOB,
180 AS_MIGRATE,
181 AS_MON,
182 AS_NAMESPACE,
183 AS_NSUP,
184 AS_PARTICLE,
185 AS_PARTITION,
186 AS_PAXOS,
187 AS_PREDEXP,
188 AS_PROTO,
189 AS_PROXY,
190 AS_PROXY_DIVERT, // special detail context
191 AS_QUERY,
192 AS_RECORD,
193 AS_ROSTER,
194 AS_RW,
195 AS_RW_CLIENT, // special detail context
196 AS_SCAN,
197 AS_SECURITY,
198 AS_SERVICE,
199 AS_SERVICE_LIST,
200 AS_SINDEX,
201 AS_SKEW,
202 AS_SMD,
203 AS_STORAGE,
204 AS_TRUNCATE,
205 AS_TSVC,
206 AS_UDF,
207 AS_XDR,
208 AS_XDR_CLIENT,
209 AS_XDR_HTTP,
210 CF_FAULT_CONTEXT_UNDEF
211} cf_fault_context;
212
213extern char *cf_fault_context_strings[];
214
215/* cf_fault_severity
216 * CRITICAL fatal runtime panics
217 * WARNING runtime errors
218 * INFO informational or advisory messages
219 * DEBUG debugging messages
220 * DETAIL detailed debugging messages
221 */
222typedef enum {
223 CF_CRITICAL = 0,
224 CF_WARNING = 1,
225 CF_INFO = 2,
226 CF_DEBUG = 3,
227 CF_DETAIL = 4,
228 CF_FAULT_SEVERITY_UNDEF = 5
229} cf_fault_severity;
230
231/* cf_fault_sink
232 * An endpoint (sink) for a flow of fault messages */
233typedef struct cf_fault_sink {
234 int fd;
235 char *path;
236 int limit[CF_FAULT_CONTEXT_UNDEF];
237} cf_fault_sink;
238
239#define CF_FAULT_SINKS_MAX 8
240
241/**
242 * When we want to dump out some binary data (like a digest, a bit string
243 * or a buffer), we want to be able to specify how we'll display the data.
244 * We expect this list to grow over time, as more binary representations
245 * are needed. (2014_03_20 tjl).
246 */
247typedef enum {
248 CF_DISPLAY_HEX_DIGEST, // Show Special Case DIGEST in Packed Hex
249 CF_DISPLAY_HEX_SPACED, // Show binary value in regular spaced hex
250 CF_DISPLAY_HEX_PACKED, // Show binary value in packed hex
251 CF_DISPLAY_HEX_COLUMNS, // Show binary value in Column Oriented Hex
252 CF_DISPLAY_BASE64, // Show binary value in Base64
253 CF_DISPLAY_BITS_SPACED, // Show binary value in a spaced bit string
254 CF_DISPLAY_BITS_COLUMNS // Show binary value in Column Oriented Bits
255} cf_display_type;
256
257
258/* Function declarations */
259
260// note: passing a null sink sets for all currently known sinks
261extern int cf_fault_sink_addcontext(cf_fault_sink *s, char *context, char *severity);
262extern cf_fault_sink *cf_fault_sink_add(char *path);
263
264extern cf_fault_sink *cf_fault_sink_hold(char *path);
265extern bool cf_fault_console_is_held();
266extern int cf_fault_sink_activate_all_held();
267extern int cf_fault_sink_get_fd_list(int *fds);
268
269extern int cf_fault_sink_strlist(cf_dyn_buf *db); // pack all contexts into a string - using ids
270extern int cf_fault_sink_context_all_strlist(int sink_id, cf_dyn_buf *db);
271extern int cf_fault_sink_context_strlist(int sink_id, char *context, cf_dyn_buf *db);
272
273extern cf_fault_sink *cf_fault_sink_get_id(int id);
274
275extern void cf_fault_sink_logroll(void);
276
277extern void cf_fault_use_local_time(bool val);
278extern bool cf_fault_is_using_local_time();
279
280extern void cf_fault_log_millis(bool log_millis);
281extern bool cf_fault_is_logging_millis();
282
283// TODO: Rework cf_display_type-based logging to have a more useful
284// output format, instead of having this separate function.
285extern void cf_fault_hex_dump(const char *title, const void *data, size_t len);
286
287extern void cf_fault_print_signal_context(void *_ctx);
288extern uint64_t cf_fault_strip_aslr(void *addr);
289
290extern cf_fault_severity cf_fault_filter[];
291
292// Define the mechanism that we'll use to write into the Server Log.
293// cf_fault_event() is "regular" logging
294extern void cf_fault_event(const cf_fault_context,
295 const cf_fault_severity severity, const char *file_name,
296 const int line, const char *msg, ...)
297 __attribute__ ((format (printf, 5, 6)));
298
299// cf_fault_event2() is for advanced logging, where we want to print some
300// binary object (often a digest).
301extern void cf_fault_event2(const cf_fault_context,
302 const cf_fault_severity severity, const char *file_name, const int line,
303 const void *mem_ptr, size_t len, cf_display_type dt, const char *msg, ...)
304 __attribute__ ((format (printf, 8, 9)));
305
306extern void cf_fault_event_nostack(const cf_fault_context,
307 const cf_fault_severity severity, const char *fn, const int line,
308 const char *msg, ...)
309 __attribute__ ((format (printf, 5, 6)));
310
311// For now there's only one cache, dumped by the ticker.
312extern void cf_fault_cache_event(cf_fault_context context,
313 cf_fault_severity severity, const char *file_name, int line,
314 char *msg, ...)
315 __attribute__ ((format (printf, 5, 6)));
316
317// This is ONLY to keep Eclipse happy without having to tell it __FILENAME__ is
318// defined. The make process will define it via the -D mechanism.
319#ifndef __FILENAME__
320#define __FILENAME__ ""
321#endif
322
323// The "regular" version.
324#define cf_assert(a, context, __msg, ...) \
325 ((a) ? (void)0 : \
326 cf_fault_event((context), CF_CRITICAL, __FILENAME__, __LINE__, (__msg), ##__VA_ARGS__))
327
328// The "no stack" versions.
329#define cf_assert_nostack(a, context, __msg, ...) \
330 ((a) ? (void)0 : \
331 cf_fault_event_nostack((context), CF_CRITICAL, __FILENAME__, __LINE__, (__msg), ##__VA_ARGS__))
332#define cf_crash_nostack(context, __msg, ...) \
333 cf_fault_event_nostack((context), CF_CRITICAL, __FILENAME__, __LINE__, (__msg), ##__VA_ARGS__)
334
335// The "regular" versions.
336#define __SEVLOG(severity, context, __msg, ...) \
337 (severity > cf_fault_filter[context] ? \
338 (void)0 : \
339 cf_fault_event((context), severity, __FILENAME__, __LINE__, (__msg), ##__VA_ARGS__))
340
341#define cf_crash(context, __msg, ...) \
342 cf_fault_event((context), CF_CRITICAL, __FILENAME__, __LINE__, (__msg), ##__VA_ARGS__)
343
344#define cf_warning(...) __SEVLOG(CF_WARNING, ##__VA_ARGS__)
345#define cf_info(...) __SEVLOG(CF_INFO, ##__VA_ARGS__)
346#define cf_debug(...) __SEVLOG(CF_DEBUG, ##__VA_ARGS__)
347#define cf_detail(...) __SEVLOG(CF_DETAIL, ##__VA_ARGS__)
348
349// In addition to the existing LOG calls, we will now add a new mechanism
350// that will the ability to print out a BINARY ARRAY, in a general manner, at
351// the end of the passed in PRINT STRING.
352// This is a general mechanism that can be used to express a binary array as
353// a hex or Base64 value, but we'll often use it to print a full Digest Value,
354// in either Hex format or Base64 format.
355#define __BINARY_SEVLOG(severity, context, ptr, len, DT, __msg, ...) \
356 (severity > cf_fault_filter[context] ? \
357 (void)0 : \
358 cf_fault_event2((context), severity, __FILENAME__, __LINE__, ptr, len, DT, (__msg), ##__VA_ARGS__))
359
360#define cf_crash_binary(context, ptr, len, DT, __msg, ...) \
361 cf_fault_event2((context), CF_CRITICAL, __FILENAME__, __LINE__, ptr, len, DT, (__msg), ##__VA_ARGS__)
362
363#define cf_warning_binary(...) __BINARY_SEVLOG(CF_WARNING, ##__VA_ARGS__)
364#define cf_info_binary(...) __BINARY_SEVLOG(CF_INFO, ##__VA_ARGS__)
365#define cf_debug_binary(...) __BINARY_SEVLOG(CF_DEBUG, ##__VA_ARGS__)
366#define cf_detail_binary(...) __BINARY_SEVLOG(CF_DETAIL, ##__VA_ARGS__)
367
368// This set of log calls specifically handles DIGEST values.
369#define __DIGEST_SEVLOG(severity, context, ptr,__msg, ...) \
370 (severity > cf_fault_filter[context] ? \
371 (void)0 : \
372 cf_fault_event2((context), severity, __FILENAME__, __LINE__, ptr, 20, CF_DISPLAY_HEX_DIGEST, (__msg), ##__VA_ARGS__))
373
374#define cf_crash_digest(context, ptr,__msg, ...) \
375 cf_fault_event2((context), CF_CRITICAL, __FILENAME__, __LINE__, ptr, 20, CF_DISPLAY_HEX_DIGEST, (__msg), ##__VA_ARGS__)
376
377#define cf_warning_digest(...) __DIGEST_SEVLOG(CF_WARNING, ##__VA_ARGS__)
378#define cf_info_digest(...) __DIGEST_SEVLOG(CF_INFO, ##__VA_ARGS__)
379#define cf_debug_digest(...) __DIGEST_SEVLOG(CF_DEBUG, ##__VA_ARGS__)
380#define cf_detail_digest(...) __DIGEST_SEVLOG(CF_DETAIL, ##__VA_ARGS__)
381
382// _GNU_SOURCE gives us a strerror_r() that returns (char *).
383#define cf_strerror(err) strerror_r(err, (char *)alloca(200), 200)
384
385/* cf_context_at_severity
386 * Return whether the given context is set to this severity level or higher. */
387extern bool cf_context_at_severity(const cf_fault_context context, const cf_fault_severity severity);
388
389extern void cf_fault_init();
390
391int generate_packed_hex_string(const void *mem_ptr, uint32_t len, char* output);
392
393// For now there's only one cache, dumped by the ticker.
394extern void cf_fault_dump_cache();
395
396#define cf_dump_ticker_cache() cf_fault_dump_cache()
397
398#define __CACHE_SEVLOG(severity, context, __msg, ...) \
399 (severity > cf_fault_filter[context] ? \
400 (void)0 : \
401 cf_fault_cache_event((context), severity, __FILENAME__, __LINE__, (__msg), ##__VA_ARGS__))
402
403#define cf_ticker_warning(...) __CACHE_SEVLOG(CF_WARNING, ##__VA_ARGS__)
404#define cf_ticker_info(...) __CACHE_SEVLOG(CF_INFO, ##__VA_ARGS__)
405#define cf_ticker_debug(...) __CACHE_SEVLOG(CF_DEBUG, ##__VA_ARGS__)
406#define cf_ticker_detail(...) __CACHE_SEVLOG(CF_DETAIL, ##__VA_ARGS__)
407