1/*
2 * as.c
3 *
4 * Copyright (C) 2008-2017 Aerospike, Inc.
5 *
6 * Portions may be licensed to Aerospike, Inc. under one or more contributor
7 * license agreements.
8 *
9 * This program is free software: you can redistribute it and/or modify it under
10 * the terms of the GNU Affero General Public License as published by the Free
11 * Software Foundation, either version 3 of the License, or (at your option) any
12 * later version.
13 *
14 * This program is distributed in the hope that it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16 * FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
17 * details.
18 *
19 * You should have received a copy of the GNU Affero General Public License
20 * along with this program. If not, see http://www.gnu.org/licenses/
21 */
22
23#include <errno.h>
24#include <fcntl.h>
25#include <getopt.h>
26#include <pthread.h>
27#include <stdbool.h>
28#include <stddef.h>
29#include <stdint.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <syscall.h>
34#include <unistd.h>
35#include <sys/stat.h>
36
37#include "citrusleaf/alloc.h"
38
39#include "cf_thread.h"
40#include "daemon.h"
41#include "dns.h"
42#include "fault.h"
43#include "hardware.h"
44#include "tls.h"
45
46#include "base/batch.h"
47#include "base/cfg.h"
48#include "base/datamodel.h"
49#include "base/health.h"
50#include "base/index.h"
51#include "base/json_init.h"
52#include "base/monitor.h"
53#include "base/nsup.h"
54#include "base/scan.h"
55#include "base/secondary_index.h"
56#include "base/security.h"
57#include "base/service.h"
58#include "base/smd.h"
59#include "base/stats.h"
60#include "base/thr_info.h"
61#include "base/thr_info_port.h"
62#include "base/thr_sindex.h"
63#include "base/ticker.h"
64#include "base/xdr_serverside.h"
65#include "fabric/clustering.h"
66#include "fabric/exchange.h"
67#include "fabric/fabric.h"
68#include "fabric/hb.h"
69#include "fabric/migrate.h"
70#include "fabric/skew_monitor.h"
71#include "storage/storage.h"
72#include "transaction/proxy.h"
73#include "transaction/rw_request_hash.h"
74#include "transaction/udf.h"
75
76
77//==========================================================
78// Constants.
79//
80
81// String constants in version.c, generated by make.
82extern const char aerospike_build_type[];
83extern const char aerospike_build_id[];
84
85// Command line options for the Aerospike server.
86static const struct option CMD_OPTS[] = {
87 { "help", no_argument, NULL, 'h' },
88 { "version", no_argument, NULL, 'v' },
89 { "config-file", required_argument, NULL, 'f' },
90 { "foreground", no_argument, NULL, 'd' },
91 { "fgdaemon", no_argument, NULL, 'F' },
92 { "cold-start", no_argument, NULL, 'c' },
93 { "instance", required_argument, NULL, 'n' },
94 { NULL, 0, NULL, 0 }
95};
96
97static const char HELP[] =
98 "\n"
99 "Aerospike server installation installs the script /etc/init.d/aerospike which\n"
100 "is normally used to start and stop the server. The script is also found as\n"
101 "as/etc/init-script in the source tree.\n"
102 "\n"
103 "asd informative command-line options:\n"
104 "\n"
105 "--help"
106 "\n"
107 "Print this message and exit.\n"
108 "\n"
109 "--version"
110 "\n"
111 "Print edition and build version information and exit.\n"
112 "\n"
113 "asd runtime command-line options:\n"
114 "\n"
115 "--config-file <file>"
116 "\n"
117 "Specify the location of the Aerospike server config file. If this option is not\n"
118 "specified, the default location /etc/aerospike/aerospike.conf is used.\n"
119 "\n"
120 "--foreground"
121 "\n"
122 "Specify that Aerospike not be daemonized. This is useful for running Aerospike\n"
123 "in gdb. Alternatively, add 'run-as-daemon false' in the service context of the\n"
124 "Aerospike config file.\n"
125 "\n"
126 "--fgdaemon"
127 "\n"
128 "Specify that Aerospike is to be run as a \"new-style\" (foreground) daemon. This\n"
129 "is useful for running Aerospike under systemd or Docker.\n"
130 "\n"
131 "--cold-start"
132 "\n"
133 "(Enterprise edition only.) At startup, force the Aerospike server to read all\n"
134 "records from storage devices to rebuild the index.\n"
135 "\n"
136 "--instance <0-15>"
137 "\n"
138 "(Enterprise edition only.) If running multiple instances of Aerospike on one\n"
139 "machine (not recommended), each instance must be uniquely designated via this\n"
140 "option.\n"
141 ;
142
143static const char USAGE[] =
144 "\n"
145 "asd informative command-line options:\n"
146 "[--help]\n"
147 "[--version]\n"
148 "\n"
149 "asd runtime command-line options:\n"
150 "[--config-file <file>] "
151 "[--foreground] "
152 "[--fgdaemon] "
153 "[--cold-start] "
154 "[--instance <0-15>]\n"
155 ;
156
157static const char DEFAULT_CONFIG_FILE[] = "/etc/aerospike/aerospike.conf";
158
159static const char SMD_DIR_NAME[] = "/smd";
160
161
162//==========================================================
163// Globals.
164//
165
166// Not cf_mutex, which won't tolerate unlock if already unlocked.
167pthread_mutex_t g_main_deadlock = PTHREAD_MUTEX_INITIALIZER;
168
169bool g_startup_complete = false;
170bool g_shutdown_started = false;
171
172
173//==========================================================
174// Forward declarations.
175//
176
177// signal.c doesn't have header file.
178extern void as_signal_setup();
179
180static void write_pidfile(char *pidfile);
181static void validate_directory(const char *path, const char *log_tag);
182static void validate_smd_directory();
183
184
185//==========================================================
186// Aerospike server entry point.
187//
188
189int
190main(int argc, char **argv)
191{
192 g_start_sec = cf_get_seconds();
193
194 // Initialize cf_thread wrapper.
195 cf_thread_init();
196
197 // Initialize memory allocation.
198 cf_alloc_init();
199
200 // Initialize fault management framework.
201 cf_fault_init();
202
203 // Setup signal handlers.
204 as_signal_setup();
205
206 // Initialize TLS library.
207 tls_check_init();
208
209 int opt;
210 int opt_i;
211 const char *config_file = DEFAULT_CONFIG_FILE;
212 bool run_in_foreground = false;
213 bool new_style_daemon = false;
214 bool cold_start_cmd = false;
215 uint32_t instance = 0;
216
217 // Parse command line options.
218 while ((opt = getopt_long(argc, argv, "", CMD_OPTS, &opt_i)) != -1) {
219 switch (opt) {
220 case 'h':
221 // printf() since we want stdout and don't want cf_fault's prefix.
222 printf("%s\n", HELP);
223 return 0;
224 case 'v':
225 // printf() since we want stdout and don't want cf_fault's prefix.
226 printf("%s build %s\n", aerospike_build_type, aerospike_build_id);
227 return 0;
228 case 'f':
229 config_file = cf_strdup(optarg);
230 break;
231 case 'F':
232 // As a "new-style" daemon(*), asd runs in the foreground and
233 // ignores the following configuration items:
234 // - user ('user')
235 // - group ('group')
236 // - PID file ('pidfile')
237 //
238 // If ignoring configuration items, or if the 'console' sink is not
239 // specified, warnings will appear in stderr.
240 //
241 // (*) http://0pointer.de/public/systemd-man/daemon.html#New-Style%20Daemons
242 run_in_foreground = true;
243 new_style_daemon = true;
244 break;
245 case 'd':
246 run_in_foreground = true;
247 break;
248 case 'c':
249 cold_start_cmd = true;
250 break;
251 case 'n':
252 instance = (uint32_t)strtol(optarg, NULL, 0);
253 break;
254 default:
255 // fprintf() since we don't want cf_fault's prefix.
256 fprintf(stderr, "%s\n", USAGE);
257 return 1;
258 }
259 }
260
261 // Set all fields in the global runtime configuration instance. This parses
262 // the configuration file, and creates as_namespace objects. (Return value
263 // is a shortcut pointer to the global runtime configuration instance.)
264 as_config *c = as_config_init(config_file);
265
266 // Detect NUMA topology and, if requested, prepare for CPU and NUMA pinning.
267 cf_topo_config(c->auto_pin, (cf_topo_numa_node_index)instance,
268 &c->service.bind);
269
270 // Perform privilege separation as necessary. If configured user & group
271 // don't have root privileges, all resources created or reopened past this
272 // point must be set up so that they are accessible without root privileges.
273 // If not, the process will self-terminate with (hopefully!) a log message
274 // indicating which resource is not set up properly.
275 cf_process_privsep(c->uid, c->gid);
276
277 //
278 // All resources such as files, devices, and shared memory must be created
279 // or reopened below this line! (The configuration file is the only thing
280 // that must be opened above, in order to parse the user & group.)
281 //==========================================================================
282
283 // A "new-style" daemon expects console logging to be configured. (If not,
284 // log messages won't be seen via the standard path.)
285 if (new_style_daemon) {
286 if (! cf_fault_console_is_held()) {
287 cf_warning(AS_AS, "in new-style daemon mode, console logging is not configured");
288 }
289 }
290
291 // Activate log sinks. Up to this point, 'cf_' log output goes to stderr,
292 // filtered according to NO_SINKS_LIMIT in fault.c. After this point, 'cf_'
293 // log output will appear in all log file sinks specified in configuration,
294 // with specified filtering. If console sink is specified in configuration,
295 // 'cf_' log output will continue going to stderr, but filtering will switch
296 // from NO_SINKS_LIMIT to that specified in console sink configuration.
297 if (0 != cf_fault_sink_activate_all_held()) {
298 // Specifics of failure are logged in cf_fault_sink_activate_all_held().
299 cf_crash_nostack(AS_AS, "can't open log sink(s)");
300 }
301
302 // Daemonize asd if specified. After daemonization, output to stderr will no
303 // longer appear in terminal. Instead, check /tmp/aerospike-console.<pid>
304 // for console output.
305 if (! run_in_foreground && c->run_as_daemon) {
306 // Don't close any open files when daemonizing. At this point only log
307 // sink files are open - instruct cf_process_daemonize() to ignore them.
308 int open_fds[CF_FAULT_SINKS_MAX];
309 int num_open_fds = cf_fault_sink_get_fd_list(open_fds);
310
311 cf_process_daemonize(open_fds, num_open_fds);
312 }
313
314 // Log which build this is - should be the first line in the log file.
315 cf_info(AS_AS, "<><><><><><><><><><> %s build %s <><><><><><><><><><>",
316 aerospike_build_type, aerospike_build_id);
317
318 // Includes echoing the configuration file to log.
319 as_config_post_process(c, config_file);
320
321 xdr_config_post_process();
322
323 // If we allocated a non-default config file name, free it.
324 if (config_file != DEFAULT_CONFIG_FILE) {
325 cf_free((void*)config_file);
326 }
327
328 // Write the pid file, if specified.
329 if (! new_style_daemon) {
330 write_pidfile(c->pidfile);
331 }
332 else {
333 if (c->pidfile) {
334 cf_warning(AS_AS, "will not write PID file in new-style daemon mode");
335 }
336 }
337
338 // Check that required directories are set up properly.
339 validate_directory(c->work_directory, "work");
340 validate_directory(c->mod_lua.user_path, "Lua user");
341 validate_smd_directory();
342
343 // Initialize subsystems. At this point we're allocating local resources,
344 // starting worker threads, etc. (But no communication with other server
345 // nodes or clients yet.)
346
347 as_json_init(); // Jansson JSON API used by System Metadata
348 as_index_tree_gc_init(); // thread to purge dropped index trees
349 as_sindex_thr_init(); // defrag secondary index (ok during population)
350 as_nsup_init(); // load previous evict-void-time(s)
351
352 // Initialize namespaces. Each namespace decides here whether it will do a
353 // warm or cold start. Index arenas, partition structures and index tree
354 // structures are initialized. Secondary index system metadata is restored.
355 as_namespaces_init(cold_start_cmd, instance);
356
357 // Initialize the storage system. For warm and cool restarts, this includes
358 // fully resuming persisted indexes - this may take a few minutes.
359 as_storage_init();
360
361 // Migrate memory to correct NUMA node (includes resumed index arenas).
362 cf_topo_migrate_memory();
363
364 // Drop capabilities that we kept only for initialization.
365 cf_process_drop_startup_caps();
366
367 // Activate the storage system. For cold starts and cool restarts, this
368 // includes full drive scans - this may take several hours. The defrag
369 // subsystem starts operating at the end of this call.
370 as_storage_load();
371
372 // Populate all secondary indexes. This may block for a long time.
373 as_sindex_boot_populateall();
374
375 cf_info(AS_AS, "initializing services...");
376
377 cf_dns_init(); // DNS resolver
378 as_netio_init(); // query responses
379 as_security_init(); // security features
380 as_service_init(); // server may process internal transactions
381 as_hb_init(); // inter-node heartbeat
382 as_skew_monitor_init(); // clock skew monitor
383 as_fabric_init(); // inter-node communications
384 as_exchange_init(); // initialize the cluster exchange subsystem
385 as_clustering_init(); // clustering-v5 start
386 as_info_init(); // info transaction handling
387 as_migrate_init(); // move data between nodes
388 as_proxy_init(); // do work on behalf of others
389 as_rw_init(); // read & write service
390 as_query_init(); // query transaction handling
391 as_udf_init(); // user-defined functions
392 as_scan_init(); // scan a namespace or set
393 as_batch_init(); // batch transaction handling
394 as_xdr_init(); // cross data-center replication
395 as_mon_init(); // monitor
396
397 // Wait for enough available storage. We've been defragging all along, but
398 // here we wait until it's enough. This may block for a long time.
399 as_storage_wait_for_defrag();
400
401 // Start subsystems. At this point we may begin communicating with other
402 // cluster nodes, and ultimately with clients.
403
404 as_smd_start(); // enables receiving cluster state change events
405 as_health_start(); // starts before fabric and hb to capture them
406 as_fabric_start(); // may send & receive fabric messages
407 as_xdr_start(); // XDR should start before it joins other nodes
408 as_hb_start(); // start inter-node heartbeat
409 as_exchange_start(); // start the cluster exchange subsystem
410 as_clustering_start(); // clustering-v5 start
411 as_nsup_start(); // may send evict-void-time(s) to other nodes
412 as_service_start(); // server will now receive client transactions
413 as_info_port_start(); // server will now receive info transactions
414 as_ticker_start(); // only after everything else is started
415
416 // Relevant for enterprise edition only.
417 as_storage_start_tomb_raider();
418
419 // Log a service-ready message.
420 cf_info(AS_AS, "service ready: soon there will be cake!");
421
422 //--------------------------------------------
423 // Startup is done. This thread will now wait
424 // quietly for a shutdown signal.
425 //
426
427 // Stop this thread from finishing. Intentionally deadlocking on a mutex is
428 // a remarkably efficient way to do this.
429 pthread_mutex_lock(&g_main_deadlock);
430 g_startup_complete = true;
431 pthread_mutex_lock(&g_main_deadlock);
432
433 // When the service is running, you are here (deadlocked) - the signals that
434 // stop the service (yes, these signals always occur in this thread) will
435 // unlock the mutex, allowing us to continue.
436
437 g_shutdown_started = true;
438 pthread_mutex_unlock(&g_main_deadlock);
439 pthread_mutex_destroy(&g_main_deadlock);
440
441 //--------------------------------------------
442 // Received a shutdown signal.
443 //
444
445 cf_info(AS_AS, "initiating clean shutdown ...");
446
447 as_storage_shutdown(instance);
448 as_xdr_shutdown();
449
450 cf_info(AS_AS, "finished clean shutdown - exiting");
451
452 // If shutdown was totally clean (all threads joined) we could just return,
453 // but for now we exit to make sure all threads die.
454#ifdef DOPROFILE
455 exit(0); // exit(0) so profile build actually dumps gmon.out
456#else
457 _exit(0);
458#endif
459
460 return 0;
461}
462
463
464//==========================================================
465// Local helpers.
466//
467
468static void
469write_pidfile(char *pidfile)
470{
471 if (! pidfile) {
472 // If there's no pid file specified in the config file, just move on.
473 return;
474 }
475
476 // Note - the directory the pid file is in must already exist.
477
478 remove(pidfile);
479
480 int pid_fd = open(pidfile, O_CREAT | O_RDWR,
481 S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH);
482
483 if (pid_fd < 0) {
484 cf_crash_nostack(AS_AS, "failed to open pid file %s: %s", pidfile,
485 cf_strerror(errno));
486 }
487
488 char pidstr[16];
489 sprintf(pidstr, "%u\n", (uint32_t)getpid());
490
491 // If we can't access this resource, just log a warning and continue -
492 // it is not critical to the process.
493 if (write(pid_fd, pidstr, strlen(pidstr)) == -1) {
494 cf_warning(AS_AS, "failed write to pid file %s: %s", pidfile,
495 cf_strerror(errno));
496 }
497
498 close(pid_fd);
499}
500
501static void
502validate_directory(const char *path, const char *log_tag)
503{
504 struct stat buf;
505
506 if (stat(path, &buf) != 0) {
507 cf_crash_nostack(AS_AS, "%s directory '%s' is not set up properly: %s",
508 log_tag, path, cf_strerror(errno));
509 }
510 else if (! S_ISDIR(buf.st_mode)) {
511 cf_crash_nostack(AS_AS, "%s directory '%s' is not set up properly: Not a directory",
512 log_tag, path);
513 }
514}
515
516static void
517validate_smd_directory()
518{
519 size_t len = strlen(g_config.work_directory);
520 char smd_path[len + sizeof(SMD_DIR_NAME)];
521
522 strcpy(smd_path, g_config.work_directory);
523 strcpy(smd_path + len, SMD_DIR_NAME);
524 validate_directory(smd_path, "system metadata");
525}
526