| 1 | /* |
| 2 | * as.c |
| 3 | * |
| 4 | * Copyright (C) 2008-2017 Aerospike, Inc. |
| 5 | * |
| 6 | * Portions may be licensed to Aerospike, Inc. under one or more contributor |
| 7 | * license agreements. |
| 8 | * |
| 9 | * This program is free software: you can redistribute it and/or modify it under |
| 10 | * the terms of the GNU Affero General Public License as published by the Free |
| 11 | * Software Foundation, either version 3 of the License, or (at your option) any |
| 12 | * later version. |
| 13 | * |
| 14 | * This program is distributed in the hope that it will be useful, but WITHOUT |
| 15 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 16 | * FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
| 17 | * details. |
| 18 | * |
| 19 | * You should have received a copy of the GNU Affero General Public License |
| 20 | * along with this program. If not, see http://www.gnu.org/licenses/ |
| 21 | */ |
| 22 | |
| 23 | #include <errno.h> |
| 24 | #include <fcntl.h> |
| 25 | #include <getopt.h> |
| 26 | #include <pthread.h> |
| 27 | #include <stdbool.h> |
| 28 | #include <stddef.h> |
| 29 | #include <stdint.h> |
| 30 | #include <stdio.h> |
| 31 | #include <stdlib.h> |
| 32 | #include <string.h> |
| 33 | #include <syscall.h> |
| 34 | #include <unistd.h> |
| 35 | #include <sys/stat.h> |
| 36 | |
| 37 | #include "citrusleaf/alloc.h" |
| 38 | |
| 39 | #include "cf_thread.h" |
| 40 | #include "daemon.h" |
| 41 | #include "dns.h" |
| 42 | #include "fault.h" |
| 43 | #include "hardware.h" |
| 44 | #include "tls.h" |
| 45 | |
| 46 | #include "base/batch.h" |
| 47 | #include "base/cfg.h" |
| 48 | #include "base/datamodel.h" |
| 49 | #include "base/health.h" |
| 50 | #include "base/index.h" |
| 51 | #include "base/json_init.h" |
| 52 | #include "base/monitor.h" |
| 53 | #include "base/nsup.h" |
| 54 | #include "base/scan.h" |
| 55 | #include "base/secondary_index.h" |
| 56 | #include "base/security.h" |
| 57 | #include "base/service.h" |
| 58 | #include "base/smd.h" |
| 59 | #include "base/stats.h" |
| 60 | #include "base/thr_info.h" |
| 61 | #include "base/thr_info_port.h" |
| 62 | #include "base/thr_sindex.h" |
| 63 | #include "base/ticker.h" |
| 64 | #include "base/xdr_serverside.h" |
| 65 | #include "fabric/clustering.h" |
| 66 | #include "fabric/exchange.h" |
| 67 | #include "fabric/fabric.h" |
| 68 | #include "fabric/hb.h" |
| 69 | #include "fabric/migrate.h" |
| 70 | #include "fabric/skew_monitor.h" |
| 71 | #include "storage/storage.h" |
| 72 | #include "transaction/proxy.h" |
| 73 | #include "transaction/rw_request_hash.h" |
| 74 | #include "transaction/udf.h" |
| 75 | |
| 76 | |
| 77 | //========================================================== |
| 78 | // Constants. |
| 79 | // |
| 80 | |
| 81 | // String constants in version.c, generated by make. |
| 82 | extern const char aerospike_build_type[]; |
| 83 | extern const char aerospike_build_id[]; |
| 84 | |
| 85 | // Command line options for the Aerospike server. |
| 86 | static const struct option CMD_OPTS[] = { |
| 87 | { "help" , no_argument, NULL, 'h' }, |
| 88 | { "version" , no_argument, NULL, 'v' }, |
| 89 | { "config-file" , required_argument, NULL, 'f' }, |
| 90 | { "foreground" , no_argument, NULL, 'd' }, |
| 91 | { "fgdaemon" , no_argument, NULL, 'F' }, |
| 92 | { "cold-start" , no_argument, NULL, 'c' }, |
| 93 | { "instance" , required_argument, NULL, 'n' }, |
| 94 | { NULL, 0, NULL, 0 } |
| 95 | }; |
| 96 | |
| 97 | static const char HELP[] = |
| 98 | "\n" |
| 99 | "Aerospike server installation installs the script /etc/init.d/aerospike which\n" |
| 100 | "is normally used to start and stop the server. The script is also found as\n" |
| 101 | "as/etc/init-script in the source tree.\n" |
| 102 | "\n" |
| 103 | "asd informative command-line options:\n" |
| 104 | "\n" |
| 105 | "--help" |
| 106 | "\n" |
| 107 | "Print this message and exit.\n" |
| 108 | "\n" |
| 109 | "--version" |
| 110 | "\n" |
| 111 | "Print edition and build version information and exit.\n" |
| 112 | "\n" |
| 113 | "asd runtime command-line options:\n" |
| 114 | "\n" |
| 115 | "--config-file <file>" |
| 116 | "\n" |
| 117 | "Specify the location of the Aerospike server config file. If this option is not\n" |
| 118 | "specified, the default location /etc/aerospike/aerospike.conf is used.\n" |
| 119 | "\n" |
| 120 | "--foreground" |
| 121 | "\n" |
| 122 | "Specify that Aerospike not be daemonized. This is useful for running Aerospike\n" |
| 123 | "in gdb. Alternatively, add 'run-as-daemon false' in the service context of the\n" |
| 124 | "Aerospike config file.\n" |
| 125 | "\n" |
| 126 | "--fgdaemon" |
| 127 | "\n" |
| 128 | "Specify that Aerospike is to be run as a \"new-style\" (foreground) daemon. This\n" |
| 129 | "is useful for running Aerospike under systemd or Docker.\n" |
| 130 | "\n" |
| 131 | "--cold-start" |
| 132 | "\n" |
| 133 | "(Enterprise edition only.) At startup, force the Aerospike server to read all\n" |
| 134 | "records from storage devices to rebuild the index.\n" |
| 135 | "\n" |
| 136 | "--instance <0-15>" |
| 137 | "\n" |
| 138 | "(Enterprise edition only.) If running multiple instances of Aerospike on one\n" |
| 139 | "machine (not recommended), each instance must be uniquely designated via this\n" |
| 140 | "option.\n" |
| 141 | ; |
| 142 | |
| 143 | static const char USAGE[] = |
| 144 | "\n" |
| 145 | "asd informative command-line options:\n" |
| 146 | "[--help]\n" |
| 147 | "[--version]\n" |
| 148 | "\n" |
| 149 | "asd runtime command-line options:\n" |
| 150 | "[--config-file <file>] " |
| 151 | "[--foreground] " |
| 152 | "[--fgdaemon] " |
| 153 | "[--cold-start] " |
| 154 | "[--instance <0-15>]\n" |
| 155 | ; |
| 156 | |
| 157 | static const char DEFAULT_CONFIG_FILE[] = "/etc/aerospike/aerospike.conf" ; |
| 158 | |
| 159 | static const char SMD_DIR_NAME[] = "/smd" ; |
| 160 | |
| 161 | |
| 162 | //========================================================== |
| 163 | // Globals. |
| 164 | // |
| 165 | |
| 166 | // Not cf_mutex, which won't tolerate unlock if already unlocked. |
| 167 | pthread_mutex_t g_main_deadlock = PTHREAD_MUTEX_INITIALIZER; |
| 168 | |
| 169 | bool g_startup_complete = false; |
| 170 | bool g_shutdown_started = false; |
| 171 | |
| 172 | |
| 173 | //========================================================== |
| 174 | // Forward declarations. |
| 175 | // |
| 176 | |
| 177 | // signal.c doesn't have header file. |
| 178 | extern void as_signal_setup(); |
| 179 | |
| 180 | static void write_pidfile(char *pidfile); |
| 181 | static void validate_directory(const char *path, const char *log_tag); |
| 182 | static void validate_smd_directory(); |
| 183 | |
| 184 | |
| 185 | //========================================================== |
| 186 | // Aerospike server entry point. |
| 187 | // |
| 188 | |
| 189 | int |
| 190 | main(int argc, char **argv) |
| 191 | { |
| 192 | g_start_sec = cf_get_seconds(); |
| 193 | |
| 194 | // Initialize cf_thread wrapper. |
| 195 | cf_thread_init(); |
| 196 | |
| 197 | // Initialize memory allocation. |
| 198 | cf_alloc_init(); |
| 199 | |
| 200 | // Initialize fault management framework. |
| 201 | cf_fault_init(); |
| 202 | |
| 203 | // Setup signal handlers. |
| 204 | as_signal_setup(); |
| 205 | |
| 206 | // Initialize TLS library. |
| 207 | tls_check_init(); |
| 208 | |
| 209 | int opt; |
| 210 | int opt_i; |
| 211 | const char *config_file = DEFAULT_CONFIG_FILE; |
| 212 | bool run_in_foreground = false; |
| 213 | bool new_style_daemon = false; |
| 214 | bool cold_start_cmd = false; |
| 215 | uint32_t instance = 0; |
| 216 | |
| 217 | // Parse command line options. |
| 218 | while ((opt = getopt_long(argc, argv, "" , CMD_OPTS, &opt_i)) != -1) { |
| 219 | switch (opt) { |
| 220 | case 'h': |
| 221 | // printf() since we want stdout and don't want cf_fault's prefix. |
| 222 | printf("%s\n" , HELP); |
| 223 | return 0; |
| 224 | case 'v': |
| 225 | // printf() since we want stdout and don't want cf_fault's prefix. |
| 226 | printf("%s build %s\n" , aerospike_build_type, aerospike_build_id); |
| 227 | return 0; |
| 228 | case 'f': |
| 229 | config_file = cf_strdup(optarg); |
| 230 | break; |
| 231 | case 'F': |
| 232 | // As a "new-style" daemon(*), asd runs in the foreground and |
| 233 | // ignores the following configuration items: |
| 234 | // - user ('user') |
| 235 | // - group ('group') |
| 236 | // - PID file ('pidfile') |
| 237 | // |
| 238 | // If ignoring configuration items, or if the 'console' sink is not |
| 239 | // specified, warnings will appear in stderr. |
| 240 | // |
| 241 | // (*) http://0pointer.de/public/systemd-man/daemon.html#New-Style%20Daemons |
| 242 | run_in_foreground = true; |
| 243 | new_style_daemon = true; |
| 244 | break; |
| 245 | case 'd': |
| 246 | run_in_foreground = true; |
| 247 | break; |
| 248 | case 'c': |
| 249 | cold_start_cmd = true; |
| 250 | break; |
| 251 | case 'n': |
| 252 | instance = (uint32_t)strtol(optarg, NULL, 0); |
| 253 | break; |
| 254 | default: |
| 255 | // fprintf() since we don't want cf_fault's prefix. |
| 256 | fprintf(stderr, "%s\n" , USAGE); |
| 257 | return 1; |
| 258 | } |
| 259 | } |
| 260 | |
| 261 | // Set all fields in the global runtime configuration instance. This parses |
| 262 | // the configuration file, and creates as_namespace objects. (Return value |
| 263 | // is a shortcut pointer to the global runtime configuration instance.) |
| 264 | as_config *c = as_config_init(config_file); |
| 265 | |
| 266 | // Detect NUMA topology and, if requested, prepare for CPU and NUMA pinning. |
| 267 | cf_topo_config(c->auto_pin, (cf_topo_numa_node_index)instance, |
| 268 | &c->service.bind); |
| 269 | |
| 270 | // Perform privilege separation as necessary. If configured user & group |
| 271 | // don't have root privileges, all resources created or reopened past this |
| 272 | // point must be set up so that they are accessible without root privileges. |
| 273 | // If not, the process will self-terminate with (hopefully!) a log message |
| 274 | // indicating which resource is not set up properly. |
| 275 | cf_process_privsep(c->uid, c->gid); |
| 276 | |
| 277 | // |
| 278 | // All resources such as files, devices, and shared memory must be created |
| 279 | // or reopened below this line! (The configuration file is the only thing |
| 280 | // that must be opened above, in order to parse the user & group.) |
| 281 | //========================================================================== |
| 282 | |
| 283 | // A "new-style" daemon expects console logging to be configured. (If not, |
| 284 | // log messages won't be seen via the standard path.) |
| 285 | if (new_style_daemon) { |
| 286 | if (! cf_fault_console_is_held()) { |
| 287 | cf_warning(AS_AS, "in new-style daemon mode, console logging is not configured" ); |
| 288 | } |
| 289 | } |
| 290 | |
| 291 | // Activate log sinks. Up to this point, 'cf_' log output goes to stderr, |
| 292 | // filtered according to NO_SINKS_LIMIT in fault.c. After this point, 'cf_' |
| 293 | // log output will appear in all log file sinks specified in configuration, |
| 294 | // with specified filtering. If console sink is specified in configuration, |
| 295 | // 'cf_' log output will continue going to stderr, but filtering will switch |
| 296 | // from NO_SINKS_LIMIT to that specified in console sink configuration. |
| 297 | if (0 != cf_fault_sink_activate_all_held()) { |
| 298 | // Specifics of failure are logged in cf_fault_sink_activate_all_held(). |
| 299 | cf_crash_nostack(AS_AS, "can't open log sink(s)" ); |
| 300 | } |
| 301 | |
| 302 | // Daemonize asd if specified. After daemonization, output to stderr will no |
| 303 | // longer appear in terminal. Instead, check /tmp/aerospike-console.<pid> |
| 304 | // for console output. |
| 305 | if (! run_in_foreground && c->run_as_daemon) { |
| 306 | // Don't close any open files when daemonizing. At this point only log |
| 307 | // sink files are open - instruct cf_process_daemonize() to ignore them. |
| 308 | int open_fds[CF_FAULT_SINKS_MAX]; |
| 309 | int num_open_fds = cf_fault_sink_get_fd_list(open_fds); |
| 310 | |
| 311 | cf_process_daemonize(open_fds, num_open_fds); |
| 312 | } |
| 313 | |
| 314 | // Log which build this is - should be the first line in the log file. |
| 315 | cf_info(AS_AS, "<><><><><><><><><><> %s build %s <><><><><><><><><><>" , |
| 316 | aerospike_build_type, aerospike_build_id); |
| 317 | |
| 318 | // Includes echoing the configuration file to log. |
| 319 | as_config_post_process(c, config_file); |
| 320 | |
| 321 | xdr_config_post_process(); |
| 322 | |
| 323 | // If we allocated a non-default config file name, free it. |
| 324 | if (config_file != DEFAULT_CONFIG_FILE) { |
| 325 | cf_free((void*)config_file); |
| 326 | } |
| 327 | |
| 328 | // Write the pid file, if specified. |
| 329 | if (! new_style_daemon) { |
| 330 | write_pidfile(c->pidfile); |
| 331 | } |
| 332 | else { |
| 333 | if (c->pidfile) { |
| 334 | cf_warning(AS_AS, "will not write PID file in new-style daemon mode" ); |
| 335 | } |
| 336 | } |
| 337 | |
| 338 | // Check that required directories are set up properly. |
| 339 | validate_directory(c->work_directory, "work" ); |
| 340 | validate_directory(c->mod_lua.user_path, "Lua user" ); |
| 341 | validate_smd_directory(); |
| 342 | |
| 343 | // Initialize subsystems. At this point we're allocating local resources, |
| 344 | // starting worker threads, etc. (But no communication with other server |
| 345 | // nodes or clients yet.) |
| 346 | |
| 347 | as_json_init(); // Jansson JSON API used by System Metadata |
| 348 | as_index_tree_gc_init(); // thread to purge dropped index trees |
| 349 | as_sindex_thr_init(); // defrag secondary index (ok during population) |
| 350 | as_nsup_init(); // load previous evict-void-time(s) |
| 351 | |
| 352 | // Initialize namespaces. Each namespace decides here whether it will do a |
| 353 | // warm or cold start. Index arenas, partition structures and index tree |
| 354 | // structures are initialized. Secondary index system metadata is restored. |
| 355 | as_namespaces_init(cold_start_cmd, instance); |
| 356 | |
| 357 | // Initialize the storage system. For warm and cool restarts, this includes |
| 358 | // fully resuming persisted indexes - this may take a few minutes. |
| 359 | as_storage_init(); |
| 360 | |
| 361 | // Migrate memory to correct NUMA node (includes resumed index arenas). |
| 362 | cf_topo_migrate_memory(); |
| 363 | |
| 364 | // Drop capabilities that we kept only for initialization. |
| 365 | cf_process_drop_startup_caps(); |
| 366 | |
| 367 | // Activate the storage system. For cold starts and cool restarts, this |
| 368 | // includes full drive scans - this may take several hours. The defrag |
| 369 | // subsystem starts operating at the end of this call. |
| 370 | as_storage_load(); |
| 371 | |
| 372 | // Populate all secondary indexes. This may block for a long time. |
| 373 | as_sindex_boot_populateall(); |
| 374 | |
| 375 | cf_info(AS_AS, "initializing services..." ); |
| 376 | |
| 377 | cf_dns_init(); // DNS resolver |
| 378 | as_netio_init(); // query responses |
| 379 | as_security_init(); // security features |
| 380 | as_service_init(); // server may process internal transactions |
| 381 | as_hb_init(); // inter-node heartbeat |
| 382 | as_skew_monitor_init(); // clock skew monitor |
| 383 | as_fabric_init(); // inter-node communications |
| 384 | as_exchange_init(); // initialize the cluster exchange subsystem |
| 385 | as_clustering_init(); // clustering-v5 start |
| 386 | as_info_init(); // info transaction handling |
| 387 | as_migrate_init(); // move data between nodes |
| 388 | as_proxy_init(); // do work on behalf of others |
| 389 | as_rw_init(); // read & write service |
| 390 | as_query_init(); // query transaction handling |
| 391 | as_udf_init(); // user-defined functions |
| 392 | as_scan_init(); // scan a namespace or set |
| 393 | as_batch_init(); // batch transaction handling |
| 394 | as_xdr_init(); // cross data-center replication |
| 395 | as_mon_init(); // monitor |
| 396 | |
| 397 | // Wait for enough available storage. We've been defragging all along, but |
| 398 | // here we wait until it's enough. This may block for a long time. |
| 399 | as_storage_wait_for_defrag(); |
| 400 | |
| 401 | // Start subsystems. At this point we may begin communicating with other |
| 402 | // cluster nodes, and ultimately with clients. |
| 403 | |
| 404 | as_smd_start(); // enables receiving cluster state change events |
| 405 | as_health_start(); // starts before fabric and hb to capture them |
| 406 | as_fabric_start(); // may send & receive fabric messages |
| 407 | as_xdr_start(); // XDR should start before it joins other nodes |
| 408 | as_hb_start(); // start inter-node heartbeat |
| 409 | as_exchange_start(); // start the cluster exchange subsystem |
| 410 | as_clustering_start(); // clustering-v5 start |
| 411 | as_nsup_start(); // may send evict-void-time(s) to other nodes |
| 412 | as_service_start(); // server will now receive client transactions |
| 413 | as_info_port_start(); // server will now receive info transactions |
| 414 | as_ticker_start(); // only after everything else is started |
| 415 | |
| 416 | // Relevant for enterprise edition only. |
| 417 | as_storage_start_tomb_raider(); |
| 418 | |
| 419 | // Log a service-ready message. |
| 420 | cf_info(AS_AS, "service ready: soon there will be cake!" ); |
| 421 | |
| 422 | //-------------------------------------------- |
| 423 | // Startup is done. This thread will now wait |
| 424 | // quietly for a shutdown signal. |
| 425 | // |
| 426 | |
| 427 | // Stop this thread from finishing. Intentionally deadlocking on a mutex is |
| 428 | // a remarkably efficient way to do this. |
| 429 | pthread_mutex_lock(&g_main_deadlock); |
| 430 | g_startup_complete = true; |
| 431 | pthread_mutex_lock(&g_main_deadlock); |
| 432 | |
| 433 | // When the service is running, you are here (deadlocked) - the signals that |
| 434 | // stop the service (yes, these signals always occur in this thread) will |
| 435 | // unlock the mutex, allowing us to continue. |
| 436 | |
| 437 | g_shutdown_started = true; |
| 438 | pthread_mutex_unlock(&g_main_deadlock); |
| 439 | pthread_mutex_destroy(&g_main_deadlock); |
| 440 | |
| 441 | //-------------------------------------------- |
| 442 | // Received a shutdown signal. |
| 443 | // |
| 444 | |
| 445 | cf_info(AS_AS, "initiating clean shutdown ..." ); |
| 446 | |
| 447 | as_storage_shutdown(instance); |
| 448 | as_xdr_shutdown(); |
| 449 | |
| 450 | cf_info(AS_AS, "finished clean shutdown - exiting" ); |
| 451 | |
| 452 | // If shutdown was totally clean (all threads joined) we could just return, |
| 453 | // but for now we exit to make sure all threads die. |
| 454 | #ifdef DOPROFILE |
| 455 | exit(0); // exit(0) so profile build actually dumps gmon.out |
| 456 | #else |
| 457 | _exit(0); |
| 458 | #endif |
| 459 | |
| 460 | return 0; |
| 461 | } |
| 462 | |
| 463 | |
| 464 | //========================================================== |
| 465 | // Local helpers. |
| 466 | // |
| 467 | |
| 468 | static void |
| 469 | write_pidfile(char *pidfile) |
| 470 | { |
| 471 | if (! pidfile) { |
| 472 | // If there's no pid file specified in the config file, just move on. |
| 473 | return; |
| 474 | } |
| 475 | |
| 476 | // Note - the directory the pid file is in must already exist. |
| 477 | |
| 478 | remove(pidfile); |
| 479 | |
| 480 | int pid_fd = open(pidfile, O_CREAT | O_RDWR, |
| 481 | S_IWUSR | S_IRUSR | S_IRGRP | S_IROTH); |
| 482 | |
| 483 | if (pid_fd < 0) { |
| 484 | cf_crash_nostack(AS_AS, "failed to open pid file %s: %s" , pidfile, |
| 485 | cf_strerror(errno)); |
| 486 | } |
| 487 | |
| 488 | char pidstr[16]; |
| 489 | sprintf(pidstr, "%u\n" , (uint32_t)getpid()); |
| 490 | |
| 491 | // If we can't access this resource, just log a warning and continue - |
| 492 | // it is not critical to the process. |
| 493 | if (write(pid_fd, pidstr, strlen(pidstr)) == -1) { |
| 494 | cf_warning(AS_AS, "failed write to pid file %s: %s" , pidfile, |
| 495 | cf_strerror(errno)); |
| 496 | } |
| 497 | |
| 498 | close(pid_fd); |
| 499 | } |
| 500 | |
| 501 | static void |
| 502 | validate_directory(const char *path, const char *log_tag) |
| 503 | { |
| 504 | struct stat buf; |
| 505 | |
| 506 | if (stat(path, &buf) != 0) { |
| 507 | cf_crash_nostack(AS_AS, "%s directory '%s' is not set up properly: %s" , |
| 508 | log_tag, path, cf_strerror(errno)); |
| 509 | } |
| 510 | else if (! S_ISDIR(buf.st_mode)) { |
| 511 | cf_crash_nostack(AS_AS, "%s directory '%s' is not set up properly: Not a directory" , |
| 512 | log_tag, path); |
| 513 | } |
| 514 | } |
| 515 | |
| 516 | static void |
| 517 | validate_smd_directory() |
| 518 | { |
| 519 | size_t len = strlen(g_config.work_directory); |
| 520 | char smd_path[len + sizeof(SMD_DIR_NAME)]; |
| 521 | |
| 522 | strcpy(smd_path, g_config.work_directory); |
| 523 | strcpy(smd_path + len, SMD_DIR_NAME); |
| 524 | validate_directory(smd_path, "system metadata" ); |
| 525 | } |
| 526 | |