1 | /* |
2 | * This Source Code Form is subject to the terms of the Mozilla Public |
3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5 | * |
6 | * Copyright 1997 - July 2008 CWI, August 2008 - 2019 MonetDB B.V. |
7 | */ |
8 | |
9 | /** |
10 | * Merovingian |
11 | * Fabian Groffen |
12 | * The MonetDB keeper |
13 | * |
14 | * The role of Merovingian within the MonetDB suite is to act as a smart |
15 | * proxy, with capabilities to start mserver5s when necessary. |
16 | * |
17 | * Since some people appear to have trouble pronouncing or remembering |
18 | * its name, one can also refer to Merovingian as Mero or its name for |
19 | * dummies: monetdbd. In any case, people having difficulties here |
20 | * should watch The Matrix once more. |
21 | * |
22 | * Most of Merovingian's decisions are based on information provided by |
23 | * Sabaoth. Sabaoth is a file-system based administration shared |
24 | * between all mserver5s in the same farm on a local machine. It keeps |
25 | * track of how mserver5s can be reached, with which scenarios, and what |
26 | * the crashcounter of each server is. |
27 | * |
28 | * Merovingian will fork off an mserver5 whenever a client requests a |
29 | * database which is not running yet. Sabaoth will assure Merovingian |
30 | * can find already running or previously forked mserver5s. |
31 | * |
32 | * While Merovingian currently just starts a database on the fly when a |
33 | * client asks for it, in the future, Merovingian can decide to only |
34 | * start a database if the crashlog information maintained by Sabaoth |
35 | * shows that the mserver5 doesn't behave badly. For example |
36 | * Merovingian can refuse to start a database if it has crashed a |
37 | * number of times over a recent period. Note that to date, no such |
38 | * thing has been implemented as the need for it has not arisen yet. |
39 | * |
40 | * merovingian will monitor and control the dbfarm given by path in the |
41 | * first argument. This allows users to create their own dbfarm, but |
42 | * also expert users to run multiple merovingians on the same system |
43 | * easily, since the (persistent) configuration is read from the dbfarm |
44 | * directory. |
45 | */ |
46 | |
47 | #include "monetdb_config.h" |
48 | #include "msabaoth.h" |
49 | #include "mutils.h" /* MT_lockf */ |
50 | #include "mcrypt.h" /* mcrypt_BackendSum */ |
51 | #include "utils/utils.h" |
52 | #include "utils/properties.h" |
53 | #include "utils/glob.h" |
54 | #include "utils/database.h" |
55 | #include "utils/control.h" |
56 | |
57 | #include <sys/types.h> |
58 | #include <sys/stat.h> /* stat */ |
59 | #include <sys/wait.h> /* wait */ |
60 | #include <sys/socket.h> |
61 | #include <sys/un.h> |
62 | #include <netdb.h> |
63 | #include <netinet/in.h> |
64 | #ifdef HAVE_POLL_H |
65 | #include <poll.h> |
66 | #endif |
67 | #include <fcntl.h> |
68 | #include <unistd.h> /* unlink, isatty */ |
69 | #include <string.h> /* strerror */ |
70 | #include <signal.h> /* handle Ctrl-C, etc. */ |
71 | #include <time.h> |
72 | |
73 | #include "merovingian.h" |
74 | #include "client.h" |
75 | #include "connections.h" |
76 | #include "controlrunner.h" |
77 | #include "discoveryrunner.h" |
78 | #include "handlers.h" |
79 | #include "argvcmds.h" |
80 | #include "multiplex-funnel.h" |
81 | |
82 | #ifndef O_CLOEXEC |
83 | #define O_CLOEXEC 0 |
84 | #endif |
85 | |
86 | #ifndef HAVE_PIPE2 |
87 | #define pipe2(pipefd, flags) pipe(pipefd) |
88 | #endif |
89 | |
90 | /* private structs */ |
91 | |
92 | typedef struct _threadlist { |
93 | pthread_t tid; /* thread id */ |
94 | struct _threadlist* next; |
95 | }* threadlist; |
96 | |
97 | |
98 | /* globals */ |
99 | |
100 | /* full path to the mserver5 binary */ |
101 | char *_mero_mserver = NULL; |
102 | /* list of databases that we have started */ |
103 | dpair _mero_topdp = NULL; |
104 | /* lock to _mero_topdp, initialised as recursive lateron */ |
105 | pthread_mutex_t _mero_topdp_lock = PTHREAD_MUTEX_INITIALIZER; |
106 | /* for the logger, when set to 0, the logger terminates */ |
107 | volatile int _mero_keep_logging = 1; |
108 | /* for accepting connections, when set to 0, listening socket terminates */ |
109 | volatile sig_atomic_t _mero_keep_listening = 1; |
110 | /* stream to where to write the log */ |
111 | FILE *_mero_logfile = NULL; |
112 | /* stream to the stdout for the neighbour discovery service */ |
113 | FILE *_mero_discout = NULL; |
114 | /* stream to the stderr for the neighbour discovery service */ |
115 | FILE *_mero_discerr = NULL; |
116 | /* stream to the stdout for the control runner */ |
117 | FILE *_mero_ctlout = NULL; |
118 | /* stream to the stderr for the control runner */ |
119 | FILE *_mero_ctlerr = NULL; |
120 | /* broadcast socket for announcements */ |
121 | int _mero_broadcastsock = -1; |
122 | /* ipv6 global any bind address constant */ |
123 | const struct in6_addr ipv6_any_addr = IN6ADDR_ANY_INIT; |
124 | /* broadcast address/port */ |
125 | struct sockaddr_in _mero_broadcastaddr; |
126 | /* hostname of this machine */ |
127 | char _mero_hostname[128]; |
128 | /* default options read from config file */ |
129 | confkeyval *_mero_db_props = NULL; |
130 | /* merovingian's own properties */ |
131 | confkeyval *_mero_props = NULL; |
132 | |
133 | |
134 | /* funcs */ |
135 | |
136 | inline void |
137 | logFD(int fd, char *type, char *dbname, long long int pid, FILE *stream, int rest) |
138 | { |
139 | time_t now; |
140 | char buf[8096]; |
141 | int len = 0; |
142 | char *p, *q; |
143 | struct tm *tmp; |
144 | char mytime[20]; |
145 | char writeident = 1; |
146 | |
147 | do { |
148 | if ((len = read(fd, buf, sizeof(buf) - 1)) <= 0) |
149 | break; |
150 | buf[len] = '\0'; |
151 | q = buf; |
152 | now = time(NULL); |
153 | tmp = localtime(&now); |
154 | strftime(mytime, sizeof(mytime), "%Y-%m-%d %H:%M:%S" , tmp); |
155 | while ((p = strchr(q, '\n')) != NULL) { |
156 | if (writeident == 1) |
157 | fprintf(stream, "%s %s %s[%lld]: " , |
158 | mytime, type, dbname, pid); |
159 | *p = '\0'; |
160 | fprintf(stream, "%s\n" , q); |
161 | q = p + 1; |
162 | writeident = 1; |
163 | } |
164 | if ((int)(q - buf) < len) { |
165 | if (writeident == 1) |
166 | fprintf(stream, "%s %s %s[%lld]: " , |
167 | mytime, type, dbname, pid); |
168 | writeident = 0; |
169 | fprintf(stream, "%s\n" , q); |
170 | } |
171 | } while (rest); |
172 | fflush(stream); |
173 | } |
174 | |
175 | static void * |
176 | logListener(void *x) |
177 | { |
178 | dpair d = _mero_topdp; |
179 | dpair w; |
180 | #ifdef HAVE_POLL |
181 | struct pollfd *pfd; |
182 | #else |
183 | struct timeval tv; |
184 | fd_set readfds; |
185 | #endif |
186 | int nfds; |
187 | |
188 | (void)x; |
189 | |
190 | /* the first entry in the list of d is where our output should go to |
191 | * but we only use the streams, so we don't care about it in the |
192 | * normal loop */ |
193 | d = d->next; |
194 | |
195 | do { |
196 | /* wait max 1 second, tradeoff between performance and being |
197 | * able to catch up new logger streams */ |
198 | #ifndef HAVE_POLL |
199 | tv.tv_sec = 1; |
200 | tv.tv_usec = 0; |
201 | FD_ZERO(&readfds); |
202 | #endif |
203 | nfds = 0; |
204 | |
205 | /* make sure noone is killing or adding entries here */ |
206 | pthread_mutex_lock(&_mero_topdp_lock); |
207 | |
208 | #ifdef HAVE_POLL |
209 | for (w = d; w != NULL; w = w->next) { |
210 | nfds += 2; |
211 | } |
212 | pfd = malloc(nfds * sizeof(struct pollfd)); |
213 | nfds = 0; |
214 | for (w = d; w != NULL; w = w->next) { |
215 | pfd[nfds++] = (struct pollfd) {.fd = w->out, .events = POLLIN}; |
216 | if (w->out != w->err) |
217 | pfd[nfds++] = (struct pollfd) {.fd = w->err, .events = POLLIN}; |
218 | w->flag |= 1; |
219 | } |
220 | #else |
221 | for (w = d; w != NULL; w = w->next) { |
222 | FD_SET(w->out, &readfds); |
223 | if (nfds < w->out) |
224 | nfds = w->out; |
225 | FD_SET(w->err, &readfds); |
226 | if (nfds < w->err) |
227 | nfds = w->err; |
228 | w->flag |= 1; |
229 | } |
230 | #endif |
231 | |
232 | pthread_mutex_unlock(&_mero_topdp_lock); |
233 | |
234 | if ( |
235 | #ifdef HAVE_POLL |
236 | poll(pfd, nfds, 1000) |
237 | #else |
238 | select(nfds + 1, &readfds, NULL, NULL, &tv) |
239 | #endif |
240 | <= 0) { |
241 | #ifdef HAVE_POLL |
242 | free(pfd); |
243 | #endif |
244 | if (_mero_keep_logging != 0) { |
245 | continue; |
246 | } else { |
247 | break; |
248 | } |
249 | } |
250 | reinitialize(); |
251 | |
252 | pthread_mutex_lock(&_mero_topdp_lock); |
253 | |
254 | w = d; |
255 | while (w != NULL) { |
256 | /* only look at records we've added in the previous loop */ |
257 | if (w->flag & 1) { |
258 | #ifdef HAVE_POLL |
259 | for (int i = 0; i < nfds; i++) { |
260 | if (pfd[i].fd == w->out && pfd[i].revents & POLLIN) |
261 | logFD(w->out, "MSG" , w->dbname, |
262 | (long long int)w->pid, _mero_logfile, 0); |
263 | else if (pfd[i].fd == w->err && pfd[i].revents & POLLIN) |
264 | logFD(w->err, "ERR" , w->dbname, |
265 | (long long int)w->pid, _mero_logfile, 0); |
266 | } |
267 | #else |
268 | if (FD_ISSET(w->out, &readfds) != 0) |
269 | logFD(w->out, "MSG" , w->dbname, |
270 | (long long int)w->pid, _mero_logfile, 0); |
271 | if (w->err != w->out && FD_ISSET(w->err, &readfds) != 0) |
272 | logFD(w->err, "ERR" , w->dbname, |
273 | (long long int)w->pid, _mero_logfile, 0); |
274 | #endif |
275 | w->flag &= ~1; |
276 | } |
277 | w = w->next; |
278 | } |
279 | |
280 | pthread_mutex_unlock(&_mero_topdp_lock); |
281 | |
282 | #ifdef HAVE_POLL |
283 | free(pfd); |
284 | #endif |
285 | fflush(_mero_logfile); |
286 | } while (_mero_keep_logging); |
287 | return NULL; |
288 | } |
289 | |
290 | /** |
291 | * Creates a new error, allocated with malloc. The error should be |
292 | * freed using freeErr(). |
293 | */ |
294 | char * |
295 | newErr(const char *fmt, ...) |
296 | { |
297 | va_list ap; |
298 | char message[4096]; |
299 | char *ret; |
300 | |
301 | va_start(ap, fmt); |
302 | |
303 | (void) vsnprintf(message, sizeof(message), fmt, ap); |
304 | |
305 | va_end(ap); |
306 | |
307 | ret = strdup(message); |
308 | return(ret); |
309 | } |
310 | |
311 | |
312 | static void * |
313 | doTerminateProcess(void *p) |
314 | { |
315 | dpair dp = p; |
316 | terminateProcess(dp->pid, strdup(dp->dbname), dp->type, 1); |
317 | return NULL; |
318 | } |
319 | |
320 | int |
321 | main(int argc, char *argv[]) |
322 | { |
323 | err e; |
324 | int argp; |
325 | char dbfarm[1024]; |
326 | char *pidfilename = NULL; |
327 | char *p; |
328 | FILE *pidfile = NULL; |
329 | char control_usock[1024]; |
330 | char mapi_usock[1024]; |
331 | dpair d = NULL; |
332 | struct _dpair dpcons; |
333 | struct _dpair dpmero; |
334 | struct _dpair dpdisc; |
335 | struct _dpair dpcont; |
336 | int pfd[2]; |
337 | pthread_t tid = 0; |
338 | struct sigaction sa; |
339 | int ret; |
340 | int lockfd = -1; |
341 | int sock = -1; |
342 | int discsock = -1; |
343 | int unsock = -1; |
344 | int socku = -1; |
345 | char* host = NULL; |
346 | unsigned short port = 0; |
347 | char discovery = 0; |
348 | struct stat sb; |
349 | FILE *oerr = NULL; |
350 | int thret; |
351 | char merodontfork = 0; |
352 | bool use_ipv6 = false; |
353 | confkeyval ckv[] = { |
354 | {"logfile" , strdup("merovingian.log" ), 0, STR}, |
355 | {"pidfile" , strdup("merovingian.pid" ), 0, STR}, |
356 | |
357 | {"sockdir" , strdup("/tmp" ), 0, STR}, |
358 | {"listenaddr" , strdup("localhost" ), 0, STR}, |
359 | {"port" , strdup(MERO_PORT), atoi(MERO_PORT), INT}, |
360 | {"ipv6" , strdup("false" ), 0, BOOLEAN}, |
361 | |
362 | {"exittimeout" , strdup("60" ), 60, INT}, |
363 | {"forward" , strdup("proxy" ), 0, OTHER}, |
364 | |
365 | {"discovery" , strdup("true" ), 1, BOOLEAN}, |
366 | {"discoveryttl" , strdup("600" ), 600, INT}, |
367 | |
368 | {"control" , strdup("false" ), 0, BOOLEAN}, |
369 | {"passphrase" , NULL, 0, STR}, |
370 | |
371 | { NULL, NULL, 0, INVALID} |
372 | }; |
373 | confkeyval *kv; |
374 | int retfd = -1; |
375 | int dup_err; |
376 | |
377 | /* seed the randomiser for when we create a database, send responses |
378 | * to HELO, etc */ |
379 | srand(time(NULL)); |
380 | /* figure out our hostname */ |
381 | gethostname(_mero_hostname, 128); |
382 | /* where is the mserver5 binary we fork on demand? |
383 | * first try to locate it based on our binary location, fall-back to |
384 | * hardcoded bin-dir */ |
385 | _mero_mserver = get_bin_path(); |
386 | if (_mero_mserver != NULL) { |
387 | /* Find where the string monetdbd actually starts */ |
388 | char *s = strstr(_mero_mserver, "monetdbd" ); |
389 | if (s != NULL) { |
390 | /* Replace the 8 following characters with the characters mserver5. |
391 | * This should work even if the executables have prefixes or |
392 | * suffixes */ |
393 | int i; |
394 | for (i = 0; i < 8; i++) |
395 | s[i] = "mserver5" [i]; |
396 | if (stat(_mero_mserver, &sb) == -1) |
397 | _mero_mserver = NULL; |
398 | } |
399 | } |
400 | /* setup default database properties, constants: unlike historical |
401 | * versions, we do not want changing defaults any more */ |
402 | _mero_db_props = getDefaultProps(); |
403 | kv = findConfKey(_mero_db_props, "shared" ); |
404 | kv->val = strdup("yes" ); |
405 | kv = findConfKey(_mero_db_props, "readonly" ); |
406 | kv->val = strdup("no" ); |
407 | kv = findConfKey(_mero_db_props, "embedr" ); |
408 | kv->val = strdup("no" ); |
409 | kv = findConfKey(_mero_db_props, "embedpy" ); |
410 | kv->val = strdup("no" ); |
411 | kv = findConfKey(_mero_db_props, "embedpy3" ); |
412 | kv->val = strdup("no" ); |
413 | kv = findConfKey(_mero_db_props, "embedc" ); |
414 | kv->val = strdup("no" ); |
415 | kv = findConfKey(_mero_db_props, "ipv6" ); |
416 | kv->val = strdup("no" ); |
417 | kv = findConfKey(_mero_db_props, "nclients" ); |
418 | kv->val = strdup("64" ); |
419 | kv = findConfKey(_mero_db_props, "type" ); |
420 | kv->val = strdup("database" ); |
421 | kv = findConfKey(_mero_db_props, "optpipe" ); |
422 | kv->val = strdup("default_pipe" ); |
423 | { /* nrthreads */ |
424 | int ncpus = -1; |
425 | char cnt[8]; |
426 | |
427 | #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) |
428 | /* this works on Linux, Solaris and AIX */ |
429 | ncpus = sysconf(_SC_NPROCESSORS_ONLN); |
430 | #elif defined(HW_NCPU) /* BSD */ |
431 | size_t len = sizeof(int); |
432 | int mib[3]; |
433 | |
434 | /* Everyone should have permission to make this call, |
435 | * if we get a failure something is really wrong. */ |
436 | mib[0] = CTL_HW; |
437 | mib[1] = HW_NCPU; |
438 | mib[2] = -1; |
439 | sysctl(mib, 3, &ncpus, &len, NULL, 0); |
440 | #elif defined(WIN32) |
441 | SYSTEM_INFO sysinfo; |
442 | |
443 | GetSystemInfo(&sysinfo); |
444 | ncpus = sysinfo.dwNumberOfProcessors; |
445 | #endif |
446 | if (ncpus > 0) { |
447 | snprintf(cnt, sizeof(cnt), "%d" , ncpus); |
448 | kv = findConfKey(_mero_db_props, "nthreads" ); |
449 | kv->val = strdup(cnt); |
450 | } |
451 | } |
452 | |
453 | *dbfarm = '\0'; |
454 | if (argc > 1) { |
455 | if (strcmp(argv[1], "--help" ) == 0 || |
456 | strcmp(argv[1], "-h" ) == 0 || |
457 | strcmp(argv[1], "help" ) == 0) |
458 | { |
459 | exit(command_help(argc - 1, &argv[1])); |
460 | } else if (strcmp(argv[1], "--version" ) == 0 || |
461 | strcmp(argv[1], "-v" ) == 0 || |
462 | strcmp(argv[1], "version" ) == 0) |
463 | { |
464 | exit(command_version()); |
465 | } else if (strcmp(argv[1], "create" ) == 0) { |
466 | exit(command_create(argc - 1, &argv[1])); |
467 | } else if (strcmp(argv[1], "get" ) == 0) { |
468 | exit(command_get(ckv, argc - 1, &argv[1])); |
469 | } else if (strcmp(argv[1], "set" ) == 0) { |
470 | exit(command_set(ckv, argc - 1, &argv[1])); |
471 | } else if (strcmp(argv[1], "start" ) == 0) { |
472 | if (argc > 3 && strcmp(argv[2], "-n" ) == 0) |
473 | merodontfork = 1; |
474 | if (argc == 3 + merodontfork) { |
475 | int len; |
476 | len = snprintf(dbfarm, sizeof(dbfarm), "%s" , |
477 | argv[2 + merodontfork]); |
478 | |
479 | if (len > 0 && (size_t)len >= sizeof(dbfarm)) { |
480 | Mfprintf(stderr, "fatal: dbfarm exceeds allocated " \ |
481 | "path length, please file a bug at " \ |
482 | "http://bugs.monetdb.org/\n" ); |
483 | exit(1); |
484 | } |
485 | } else { |
486 | command_help(argc, argv); |
487 | exit(1); |
488 | } |
489 | } else if (strcmp(argv[1], "stop" ) == 0) { |
490 | exit(command_stop(ckv, argc - 1, &argv[1])); |
491 | } else { |
492 | fprintf(stderr, "monetdbd: unknown command: %s\n" , argv[1]); |
493 | command_help(0, NULL); |
494 | exit(1); |
495 | } |
496 | } else { |
497 | command_help(0, NULL); |
498 | exit(1); |
499 | } |
500 | |
501 | assert(*dbfarm != '\0'); |
502 | |
503 | /* fork into background before doing anything more */ |
504 | if (!merodontfork) { |
505 | char buf[4]; |
506 | |
507 | /* Fork into the background immediately. By doing this, our child |
508 | * can simply do everything it needs to do itself. Via a pipe it |
509 | * will tell us if it is happy or not. */ |
510 | if (pipe2(pfd, O_CLOEXEC) == -1) { |
511 | Mfprintf(stderr, "unable to create pipe: %s\n" , strerror(errno)); |
512 | return(1); |
513 | } |
514 | switch (fork()) { |
515 | case -1: |
516 | /* oops, forking went wrong! */ |
517 | Mfprintf(stderr, "unable to fork into background: %s\n" , |
518 | strerror(errno)); |
519 | return(1); |
520 | case 0: |
521 | /* detach client from controlling tty, we only write to the |
522 | * pipe to daddy */ |
523 | if (setsid() < 0) |
524 | Mfprintf(stderr, "hmmm, can't detach from controlling tty, " |
525 | "continuing anyway\n" ); |
526 | if((retfd = open("/dev/null" , O_RDONLY | O_CLOEXEC)) < 0) { |
527 | Mfprintf(stderr, "unable to dup stdin\n" ); |
528 | return(1); |
529 | } |
530 | dup_err = dup2(retfd, 0); |
531 | close(retfd); |
532 | close(pfd[0]); /* close unused read end */ |
533 | retfd = pfd[1]; /* store the write end */ |
534 | if(dup_err == -1) { |
535 | Mfprintf(stderr, "unable to dup stdin\n" ); |
536 | return(1); |
537 | } |
538 | #if !defined(HAVE_PIPE2) || O_CLOEXEC == 0 |
539 | (void) fcntl(retfd, F_SETFD, FD_CLOEXEC); |
540 | #endif |
541 | break; |
542 | default: |
543 | /* the parent, we want it to die, after we know the child |
544 | * is having a good time */ |
545 | close(pfd[1]); /* close unused write end */ |
546 | freeConfFile(ckv); /* make debug tools happy */ |
547 | if (read(pfd[0], &buf, 1) != 1) { |
548 | Mfprintf(stderr, "unable to retrieve startup status\n" ); |
549 | return(1); |
550 | } |
551 | close(pfd[0]); |
552 | return(buf[0]); /* whatever the child returned, we return */ |
553 | } |
554 | } |
555 | |
556 | /* use after the logger thread has started */ |
557 | #define MERO_EXIT(status) \ |
558 | do { \ |
559 | if (!merodontfork) { \ |
560 | char s = status; \ |
561 | if (write(retfd, &s, 1) != 1 || close(retfd) != 0) { \ |
562 | Mfprintf(stderr, "could not write to parent\n"); \ |
563 | } \ |
564 | if (status != 0) { \ |
565 | Mfprintf(stderr, "fatal startup condition encountered, " \ |
566 | "aborting startup\n"); \ |
567 | goto shutdown; \ |
568 | } \ |
569 | } else { \ |
570 | if (status != 0) { \ |
571 | Mfprintf(stderr, "fatal startup condition encountered, " \ |
572 | "aborting startup\n"); \ |
573 | goto shutdown; \ |
574 | } \ |
575 | } \ |
576 | } while (0) |
577 | |
578 | /* use before logger thread has started */ |
579 | #define MERO_EXIT_CLEAN(status) \ |
580 | do { \ |
581 | if (!merodontfork) { \ |
582 | char s = status; \ |
583 | if (write(retfd, &s, 1) != 1 || close(retfd) != 0) { \ |
584 | Mfprintf(stderr, "could not write to parent\n"); \ |
585 | } \ |
586 | } \ |
587 | exit(status); \ |
588 | } while (0) |
589 | |
590 | /* chdir to dbfarm so we are at least in a known to exist location */ |
591 | if (chdir(dbfarm) < 0) { |
592 | if (errno == ENOENT) |
593 | Mfprintf(stderr, "dbfarm directory '%s' does not exist, " |
594 | "use monetdbd create first\n" , dbfarm); |
595 | else |
596 | Mfprintf(stderr, "could not move to dbfarm '%s': %s\n" , |
597 | dbfarm, strerror(errno)); |
598 | MERO_EXIT_CLEAN(1); |
599 | } |
600 | /* absolutise dbfarm if it isn't yet (we're in it now) */ |
601 | if (dbfarm[0] != '/') { |
602 | if (getcwd(dbfarm, sizeof(dbfarm)) == NULL) { |
603 | if (errno == ERANGE) { |
604 | Mfprintf(stderr, "current path exceeds allocated path length" \ |
605 | "please file a bug at http://bugs.monetdb.org\n" ); |
606 | } else { |
607 | Mfprintf(stderr, "could not get dbfarm working directory: %s\n" , |
608 | strerror(errno)); |
609 | } |
610 | MERO_EXIT_CLEAN(1); |
611 | } |
612 | } |
613 | |
614 | if (_mero_mserver == NULL) { |
615 | _mero_mserver = BINDIR "/mserver5" ; |
616 | if (stat(_mero_mserver, &sb) == -1) { |
617 | /* exit early if this is not going to work well */ |
618 | Mfprintf(stderr, "cannot stat %s executable: %s\n" , |
619 | _mero_mserver, strerror(errno)); |
620 | MERO_EXIT_CLEAN(1); |
621 | } |
622 | } |
623 | |
624 | /* read the merovingian properties from the dbfarm */ |
625 | if (readProps(ckv, "." ) != 0) { |
626 | Mfprintf(stderr, "cannot find or read properties file, was " |
627 | "this dbfarm created by `monetdbd create`?\n" ); |
628 | MERO_EXIT_CLEAN(1); |
629 | } |
630 | _mero_props = ckv; |
631 | |
632 | use_ipv6 = getConfNum(_mero_props, "ipv6" ) == 1; |
633 | pidfilename = getConfVal(_mero_props, "pidfile" ); |
634 | |
635 | p = getConfVal(_mero_props, "forward" ); |
636 | if (strcmp(p, "redirect" ) != 0 && strcmp(p, "proxy" ) != 0) { |
637 | Mfprintf(stderr, "invalid forwarding mode: %s, defaulting to proxy\n" , |
638 | p); |
639 | kv = findConfKey(_mero_props, "forward" ); |
640 | setConfVal(kv, "proxy" ); |
641 | writeProps(_mero_props, "." ); |
642 | } |
643 | |
644 | kv = findConfKey(_mero_props, "listenaddr" ); |
645 | if (kv->val == NULL || strlen(kv->val) < 1) { |
646 | Mfprintf(stderr, "invalid host name: %s, defaulting to localhost\n" , |
647 | kv->val); |
648 | setConfVal(kv, "localhost" ); |
649 | writeProps(_mero_props, "." ); |
650 | } |
651 | host = kv->val; |
652 | |
653 | kv = findConfKey(_mero_props, "port" ); |
654 | if (kv->ival <= 0 || kv->ival > 65535) { |
655 | Mfprintf(stderr, "invalid port number: %s, defaulting to %s\n" , |
656 | kv->val, MERO_PORT); |
657 | setConfVal(kv, MERO_PORT); |
658 | writeProps(_mero_props, "." ); |
659 | } |
660 | port = (unsigned short)kv->ival; |
661 | |
662 | discovery = getConfNum(_mero_props, "discovery" ); |
663 | |
664 | /* check and trim the hash-algo from the passphrase for easy use |
665 | * lateron */ |
666 | kv = findConfKey(_mero_props, "passphrase" ); |
667 | if (kv->val != NULL) { |
668 | char *h = kv->val + 1; |
669 | if ((p = strchr(h, '}')) == NULL) { |
670 | Mfprintf(stderr, "warning: incompatible passphrase (not hashed as " |
671 | MONETDB5_PASSWDHASH "), disabling passphrase\n" ); |
672 | } else { |
673 | *p = '\0'; |
674 | if (strcmp(h, MONETDB5_PASSWDHASH) != 0) { |
675 | Mfprintf(stderr, "warning: passphrase hash '%s' incompatible, " |
676 | "expected '%s', disabling passphrase\n" , |
677 | h, MONETDB5_PASSWDHASH); |
678 | } else { |
679 | /* p points into kv->val which gets freed before p |
680 | * gets copied inside setConfVal, hence we need to |
681 | * make a temporary copy */ |
682 | p = strdup(p + 1); |
683 | setConfVal(kv, p); |
684 | free(p); |
685 | } |
686 | } |
687 | } |
688 | |
689 | /* lock such that we are alone on this world */ |
690 | if ((lockfd = MT_lockf(".merovingian_lock" , F_TLOCK, 4, 1)) == -1) { |
691 | /* locking failed */ |
692 | Mfprintf(stderr, "another monetdbd is already running\n" ); |
693 | MERO_EXIT_CLEAN(1); |
694 | } else if (lockfd == -2) { |
695 | /* directory or something doesn't exist */ |
696 | Mfprintf(stderr, "unable to create %s/.merovingian_lock file: %s\n" , |
697 | dbfarm, strerror(errno)); |
698 | MERO_EXIT_CLEAN(1); |
699 | } |
700 | |
701 | /* set up UNIX socket paths for control and mapi */ |
702 | p = getConfVal(_mero_props, "sockdir" ); |
703 | snprintf(control_usock, sizeof(control_usock), "%s/" CONTROL_SOCK "%d" , |
704 | p, port); |
705 | snprintf(mapi_usock, sizeof(mapi_usock), "%s/" MERO_SOCK "%d" , |
706 | p, port); |
707 | |
708 | if ((remove(control_usock) != 0 && errno != ENOENT) || |
709 | (remove(mapi_usock) != 0 && errno != ENOENT)) { |
710 | /* cannot remove socket files */ |
711 | Mfprintf(stderr, "cannot remove socket files\n" ); |
712 | MERO_EXIT_CLEAN(1); |
713 | } |
714 | |
715 | _mero_topdp = &dpcons; |
716 | _mero_topdp->pid = 0; |
717 | _mero_topdp->type = MERO; |
718 | _mero_topdp->dbname = NULL; |
719 | _mero_topdp->flag = 0; |
720 | |
721 | /* where should our msg output go to? */ |
722 | p = getConfVal(_mero_props, "logfile" ); |
723 | /* write to the given file */ |
724 | _mero_topdp->out = open(p, O_WRONLY | O_APPEND | O_CREAT | O_CLOEXEC, |
725 | S_IRUSR | S_IWUSR); |
726 | if (_mero_topdp->out == -1) { |
727 | Mfprintf(stderr, "unable to open '%s': %s\n" , |
728 | p, strerror(errno)); |
729 | MERO_EXIT_CLEAN(1); |
730 | } |
731 | #if O_CLOEXEC == 0 |
732 | (void) fcntl(_mero_topdp->out, F_SETFD, FD_CLOEXEC); |
733 | #endif |
734 | _mero_topdp->err = _mero_topdp->out; |
735 | |
736 | if(!(_mero_logfile = fdopen(_mero_topdp->out, "a" ))) { |
737 | Mfprintf(stderr, "unable to open file descriptor: %s\n" , |
738 | strerror(errno)); |
739 | MERO_EXIT(1); |
740 | } |
741 | |
742 | d = _mero_topdp->next = &dpmero; |
743 | |
744 | /* redirect stdout */ |
745 | if (pipe2(pfd, O_CLOEXEC) == -1) { |
746 | Mfprintf(stderr, "unable to create pipe: %s\n" , |
747 | strerror(errno)); |
748 | MERO_EXIT(1); |
749 | } |
750 | #if !defined(HAVE_PIPE2) || O_CLOEXEC == 0 |
751 | (void) fcntl(pfd[0], F_SETFD, FD_CLOEXEC); |
752 | #endif |
753 | d->out = pfd[0]; |
754 | dup_err = dup2(pfd[1], 1); |
755 | close(pfd[1]); |
756 | if(dup_err == -1) { |
757 | Mfprintf(stderr, "unable to dup stderr\n" ); |
758 | MERO_EXIT(1); |
759 | } |
760 | |
761 | /* redirect stderr */ |
762 | if (pipe2(pfd, O_CLOEXEC) == -1) { |
763 | Mfprintf(stderr, "unable to create pipe: %s\n" , |
764 | strerror(errno)); |
765 | MERO_EXIT(1); |
766 | } |
767 | #if !defined(HAVE_PIPE2) || O_CLOEXEC == 0 |
768 | (void) fcntl(pfd[0], F_SETFD, FD_CLOEXEC); |
769 | #endif |
770 | /* before it is too late, save original stderr */ |
771 | #ifdef F_DUPFD_CLOEXEC |
772 | if ((ret = fcntl(2, F_DUPFD_CLOEXEC, 3)) < 0) { |
773 | Mfprintf(stderr, "unable to dup stderr\n" ); |
774 | MERO_EXIT(1); |
775 | } |
776 | #else |
777 | if ((ret = dup(2)) < 0) { |
778 | Mfprintf(stderr, "unable to dup stderr\n" ); |
779 | MERO_EXIT(1); |
780 | } |
781 | (void) fcntl(ret, F_SETFD, FD_CLOEXEC); |
782 | #endif |
783 | oerr = fdopen(ret, "w" ); |
784 | if (oerr == NULL) { |
785 | Mfprintf(stderr, "unable to dup stderr\n" ); |
786 | MERO_EXIT(1); |
787 | } |
788 | d->err = pfd[0]; |
789 | dup_err = dup2(pfd[1], 2); |
790 | close(pfd[1]); |
791 | if(dup_err == -1) { |
792 | Mfprintf(stderr, "unable to dup stderr\n" ); |
793 | MERO_EXIT(1); |
794 | } |
795 | |
796 | d->pid = getpid(); |
797 | d->type = MERO; |
798 | d->dbname = "merovingian" ; |
799 | d->flag = 0; |
800 | |
801 | /* separate entry for the neighbour discovery service */ |
802 | d = d->next = &dpdisc; |
803 | if (pipe2(pfd, O_CLOEXEC) == -1) { |
804 | Mfprintf(stderr, "unable to create pipe: %s\n" , |
805 | strerror(errno)); |
806 | MERO_EXIT(1); |
807 | } |
808 | #if !defined(HAVE_PIPE2) || O_CLOEXEC == 0 |
809 | (void) fcntl(pfd[0], F_SETFD, FD_CLOEXEC); |
810 | (void) fcntl(pfd[1], F_SETFD, FD_CLOEXEC); |
811 | #endif |
812 | d->out = pfd[0]; |
813 | if(!(_mero_discout = fdopen(pfd[1], "a" ))) { |
814 | Mfprintf(stderr, "unable to open file descriptor: %s\n" , |
815 | strerror(errno)); |
816 | MERO_EXIT(1); |
817 | } |
818 | if (pipe2(pfd, O_CLOEXEC) == -1) { |
819 | Mfprintf(stderr, "unable to create pipe: %s\n" , |
820 | strerror(errno)); |
821 | MERO_EXIT(1); |
822 | } |
823 | #if !defined(HAVE_PIPE2) || O_CLOEXEC == 0 |
824 | (void) fcntl(pfd[0], F_SETFD, FD_CLOEXEC); |
825 | (void) fcntl(pfd[1], F_SETFD, FD_CLOEXEC); |
826 | #endif |
827 | d->err = pfd[0]; |
828 | if(!(_mero_discerr = fdopen(pfd[1], "a" ))) { |
829 | Mfprintf(stderr, "unable to open file descriptor: %s\n" , |
830 | strerror(errno)); |
831 | MERO_EXIT(1); |
832 | } |
833 | d->pid = getpid(); |
834 | d->type = MERO; |
835 | d->dbname = "discovery" ; |
836 | d->next = NULL; |
837 | d->flag = 0; |
838 | |
839 | /* separate entry for the control runner */ |
840 | d = d->next = &dpcont; |
841 | if (pipe2(pfd, O_CLOEXEC) == -1) { |
842 | Mfprintf(stderr, "unable to create pipe: %s\n" , |
843 | strerror(errno)); |
844 | MERO_EXIT(1); |
845 | } |
846 | #if !defined(HAVE_PIPE2) || O_CLOEXEC == 0 |
847 | (void) fcntl(pfd[0], F_SETFD, FD_CLOEXEC); |
848 | (void) fcntl(pfd[1], F_SETFD, FD_CLOEXEC); |
849 | #endif |
850 | d->out = pfd[0]; |
851 | if(!(_mero_ctlout = fdopen(pfd[1], "a" ))) { |
852 | Mfprintf(stderr, "unable to open file descriptor: %s\n" , |
853 | strerror(errno)); |
854 | MERO_EXIT(1); |
855 | } |
856 | if (pipe2(pfd, O_CLOEXEC) == -1) { |
857 | Mfprintf(stderr, "unable to create pipe: %s\n" , |
858 | strerror(errno)); |
859 | MERO_EXIT(1); |
860 | } |
861 | #if !defined(HAVE_PIPE2) || O_CLOEXEC == 0 |
862 | (void) fcntl(pfd[0], F_SETFD, FD_CLOEXEC); |
863 | (void) fcntl(pfd[1], F_SETFD, FD_CLOEXEC); |
864 | #endif |
865 | d->err = pfd[0]; |
866 | if(!(_mero_ctlerr = fdopen(pfd[1], "a" ))) { |
867 | Mfprintf(stderr, "unable to open file descriptor: %s\n" , |
868 | strerror(errno)); |
869 | MERO_EXIT(1); |
870 | } |
871 | d->pid = getpid(); |
872 | d->type = MERO; |
873 | d->dbname = "control" ; |
874 | d->next = NULL; |
875 | d->flag = 0; |
876 | |
877 | if ((thret = pthread_create(&tid, NULL, logListener, NULL)) != 0) { |
878 | Mfprintf(oerr, "%s: FATAL: unable to create logthread: %s\n" , |
879 | argv[0], strerror(thret)); |
880 | MERO_EXIT(1); |
881 | } |
882 | |
883 | (void) sigemptyset(&sa.sa_mask); |
884 | sa.sa_flags = 0; |
885 | sa.sa_handler = handler; |
886 | if (sigaction(SIGINT, &sa, NULL) == -1 || |
887 | sigaction(SIGQUIT, &sa, NULL) == -1 || |
888 | sigaction(SIGTERM, &sa, NULL) == -1) { |
889 | Mfprintf(oerr, "%s: FATAL: unable to create signal handlers: %s\n" , |
890 | argv[0], strerror(errno)); |
891 | MERO_EXIT(1); |
892 | } |
893 | |
894 | (void) sigemptyset(&sa.sa_mask); |
895 | sa.sa_flags = 0; |
896 | sa.sa_handler = huphandler; |
897 | if (sigaction(SIGHUP, &sa, NULL) == -1) { |
898 | Mfprintf(oerr, "%s: FATAL: unable to create signal handlers: %s\n" , |
899 | argv[0], strerror(errno)); |
900 | MERO_EXIT(1); |
901 | } |
902 | |
903 | (void) sigemptyset(&sa.sa_mask); |
904 | sa.sa_flags = 0; |
905 | sa.sa_handler = segvhandler; |
906 | if (sigaction(SIGSEGV, &sa, NULL) == -1) { |
907 | Mfprintf(oerr, "%s: FATAL: unable to create signal handlers: %s\n" , |
908 | argv[0], strerror(errno)); |
909 | MERO_EXIT(1); |
910 | } |
911 | |
912 | (void) sigemptyset(&sa.sa_mask); |
913 | sa.sa_flags = 0; |
914 | sa.sa_handler = SIG_IGN; |
915 | if (sigaction(SIGPIPE, &sa, NULL) == -1) { |
916 | Mfprintf(oerr, "%s: FATAL: unable to create signal handlers: %s\n" , |
917 | argv[0], strerror(errno)); |
918 | MERO_EXIT(1); |
919 | } |
920 | |
921 | /* make sure we will be able to write our pid */ |
922 | if ((pidfile = fopen(pidfilename, "w" )) == NULL) { |
923 | Mfprintf(stderr, "unable to open '%s%s%s' for writing: %s\n" , |
924 | pidfilename[0] != '/' ? dbfarm : "" , |
925 | pidfilename[0] != '/' ? "/" : "" , |
926 | pidfilename, strerror(errno)); |
927 | MERO_EXIT(1); |
928 | } |
929 | |
930 | msab_dbfarminit(dbfarm); |
931 | |
932 | |
933 | /* write out the pid */ |
934 | Mfprintf(pidfile, "%d\n" , (int)d->pid); |
935 | fclose(pidfile); |
936 | |
937 | { |
938 | Mfprintf(stdout, "Merovingian %s" , VERSION); |
939 | #ifdef MONETDB_RELEASE |
940 | Mfprintf(stdout, " (%s)" , MONETDB_RELEASE); |
941 | #else |
942 | const char *rev = mercurial_revision(); |
943 | if (strcmp(rev, "Unknown" ) != 0) |
944 | Mfprintf(stdout, " (hg id: %s)" , rev); |
945 | #endif |
946 | Mfprintf(stdout, " starting\n" ); |
947 | } |
948 | Mfprintf(stdout, "monitoring dbfarm %s\n" , dbfarm); |
949 | |
950 | /* open up connections */ |
951 | if ((e = openConnectionTCP(&sock, use_ipv6, host, port, stdout)) == NO_ERR && |
952 | (e = openConnectionUNIX(&socku, mapi_usock, 0, stdout)) == NO_ERR && |
953 | (discovery == 0 || (e = openConnectionUDP(&discsock, false, host, port)) == NO_ERR) && |
954 | (e = openConnectionUNIX(&unsock, control_usock, S_IRWXO, _mero_ctlout)) == NO_ERR) { |
955 | pthread_t ctid = 0; |
956 | pthread_t dtid = 0; |
957 | |
958 | if (discovery == 1) { |
959 | _mero_broadcastsock = socket(AF_INET, SOCK_DGRAM |
960 | #ifdef SOCK_CLOEXEC |
961 | | SOCK_CLOEXEC |
962 | #endif |
963 | , 0); |
964 | ret = 1; |
965 | if (_mero_broadcastsock == -1 || |
966 | setsockopt(_mero_broadcastsock, |
967 | SOL_SOCKET, SO_BROADCAST, &ret, sizeof(ret)) == -1) |
968 | { |
969 | Mfprintf(stderr, "cannot create broadcast package, " |
970 | "discovery services disabled\n" ); |
971 | closesocket(discsock); |
972 | discsock = -1; |
973 | } |
974 | #ifndef SOCK_CLOEXEC |
975 | (void) fcntl(_mero_broadcastsock, F_SETFD, FD_CLOEXEC); |
976 | #endif |
977 | |
978 | _mero_broadcastaddr.sin_family = AF_INET; |
979 | _mero_broadcastaddr.sin_addr.s_addr = htonl(INADDR_BROADCAST); |
980 | /* the target port is our configured port, not elegant, but how |
981 | * else can we do it? can't broadcast to all ports or something */ |
982 | _mero_broadcastaddr.sin_port = htons(port); |
983 | } |
984 | |
985 | /* Paranoia umask, but good, because why would people have to sniff |
986 | * our private parts? */ |
987 | umask(S_IRWXG | S_IRWXO); |
988 | |
989 | /* handle control commands */ |
990 | if ((thret = pthread_create(&ctid, NULL, |
991 | controlRunner, |
992 | (void *) &unsock)) != 0) |
993 | { |
994 | Mfprintf(stderr, "unable to create control command thread: %s\n" , |
995 | strerror(thret)); |
996 | ctid = 0; |
997 | closesocket(unsock); |
998 | if (discsock >= 0) |
999 | closesocket(discsock); |
1000 | MERO_EXIT(1); |
1001 | } |
1002 | |
1003 | /* start neighbour discovery and notification thread */ |
1004 | if (discsock >= 0 && (thret = pthread_create(&dtid, NULL, |
1005 | discoveryRunner, (void *)&discsock)) != 0) |
1006 | { |
1007 | Mfprintf(stderr, "unable to start neighbour discovery thread: %s\n" , |
1008 | strerror(thret)); |
1009 | dtid = 0; |
1010 | closesocket(discsock); |
1011 | } |
1012 | |
1013 | /* From this point merovingian considers itself to be in position to |
1014 | * start running, so flag the parent we will have fun. */ |
1015 | MERO_EXIT(0); |
1016 | |
1017 | /* handle external connections main loop */ |
1018 | e = acceptConnections(sock, socku); |
1019 | |
1020 | /* wait for the control runner and discovery thread to have |
1021 | * finished announcing they're going down */ |
1022 | if (ctid != 0) |
1023 | pthread_join(ctid, NULL); |
1024 | if (dtid != 0) |
1025 | pthread_join(dtid, NULL); |
1026 | } |
1027 | |
1028 | /* control channel is already closed at this point */ |
1029 | if (unsock != -1 && remove(control_usock) != 0) |
1030 | Mfprintf(stderr, "unable to remove control socket '%s': %s\n" , |
1031 | control_usock, strerror(errno)); |
1032 | if (socku != -1 && remove(mapi_usock) != 0) |
1033 | Mfprintf(stderr, "unable to remove mapi socket '%s': %s\n" , |
1034 | mapi_usock, strerror(errno)); |
1035 | |
1036 | if (e != NO_ERR) { |
1037 | /* console */ |
1038 | Mfprintf(oerr, "%s: %s\n" , argv[0], e); |
1039 | /* logfile */ |
1040 | Mfprintf(stderr, "%s\n" , e); |
1041 | MERO_EXIT(1); |
1042 | } |
1043 | |
1044 | shutdown: |
1045 | /* stop started mservers */ |
1046 | |
1047 | kv = findConfKey(ckv, "exittimeout" ); |
1048 | if (d->next != NULL && atoi(kv->val) > 0) { |
1049 | dpair t; |
1050 | threadlist tl = NULL, tlw = tl; |
1051 | |
1052 | pthread_mutex_lock(&_mero_topdp_lock); |
1053 | t = d->next; |
1054 | while (t != NULL) { |
1055 | if (tl == NULL) { |
1056 | tl = tlw = malloc(sizeof(struct _threadlist)); |
1057 | } else { |
1058 | tlw = tlw->next = malloc(sizeof(struct _threadlist)); |
1059 | } |
1060 | |
1061 | tlw->next = NULL; |
1062 | if ((thret = pthread_create(&(tlw->tid), NULL, |
1063 | doTerminateProcess, t)) != 0) |
1064 | { |
1065 | Mfprintf(stderr, "%s: unable to create thread to terminate " |
1066 | "database '%s': %s\n" , |
1067 | argv[0], t->dbname, strerror(thret)); |
1068 | tlw->tid = 0; |
1069 | } |
1070 | |
1071 | t = t->next; |
1072 | } |
1073 | pthread_mutex_unlock(&_mero_topdp_lock); |
1074 | |
1075 | /* wait for all processes to be terminated */ |
1076 | tlw = tl; |
1077 | while (tlw != NULL) { |
1078 | if (tlw->tid != 0 && (argp = pthread_join(tlw->tid, NULL)) != 0) { |
1079 | Mfprintf(stderr, "failed to wait for termination thread: " |
1080 | "%s\n" , strerror(argp)); |
1081 | } |
1082 | tl = tlw->next; |
1083 | free(tlw); |
1084 | tlw = tl; |
1085 | } |
1086 | } |
1087 | |
1088 | /* need to do this here, since the logging thread is shut down as |
1089 | * next thing */ |
1090 | Mfprintf(stdout, "Merovingian %s stopped\n" , VERSION); |
1091 | |
1092 | _mero_keep_logging = 0; |
1093 | if (tid != 0 && (argp = pthread_join(tid, NULL)) != 0) { |
1094 | Mfprintf(oerr, "failed to wait for logging thread: %s\n" , |
1095 | strerror(argp)); |
1096 | } |
1097 | |
1098 | if (_mero_topdp != NULL) { |
1099 | close(_mero_topdp->out); |
1100 | if (_mero_topdp->out != _mero_topdp->err) |
1101 | close(_mero_topdp->err); |
1102 | } |
1103 | |
1104 | /* remove files that suggest our existence */ |
1105 | if (pidfilename != NULL) { |
1106 | remove(pidfilename); |
1107 | } |
1108 | |
1109 | /* mostly for valgrind... */ |
1110 | freeConfFile(ckv); |
1111 | if (_mero_db_props != NULL) { |
1112 | freeConfFile(_mero_db_props); |
1113 | free(_mero_db_props); |
1114 | } |
1115 | |
1116 | if (lockfd >= 0) { |
1117 | MT_lockf(".merovingian_lock" , F_ULOCK, 4, 1); |
1118 | close(lockfd); |
1119 | } |
1120 | |
1121 | /* the child's return code at this point doesn't matter, as noone |
1122 | * will see it */ |
1123 | return(0); |
1124 | } |
1125 | |
1126 | /* vim:set ts=4 sw=4 noexpandtab: */ |
1127 | |