1/*
2 * socket.c
3 *
4 * Copyright (C) 2008-2018 Aerospike, Inc.
5 *
6 * Portions may be licensed to Aerospike, Inc. under one or more contributor
7 * license agreements.
8 *
9 * This program is free software: you can redistribute it and/or modify it under
10 * the terms of the GNU Affero General Public License as published by the Free
11 * Software Foundation, either version 3 of the License, or (at your option) any
12 * later version.
13 *
14 * This program is distributed in the hope that it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16 * FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
17 * details.
18 *
19 * You should have received a copy of the GNU Affero General Public License
20 * along with this program. If not, see http://www.gnu.org/licenses/
21 */
22
23#define CF_SOCKET_PRIVATE
24#include "socket.h"
25
26#include <errno.h>
27#include <fcntl.h>
28#include <ifaddrs.h>
29#include <inttypes.h>
30#include <poll.h>
31#include <regex.h>
32#include <stdbool.h>
33#include <stddef.h>
34#include <stdint.h>
35#include <stdio.h>
36#include <unistd.h>
37
38#include <asm/types.h>
39#include <linux/netlink.h>
40#include <linux/rtnetlink.h>
41#include <net/if.h>
42#include <netinet/in.h>
43#include <netinet/tcp.h>
44#include <sys/epoll.h>
45#include <sys/ioctl.h>
46#include <sys/socket.h>
47#include <sys/types.h>
48
49#include "dns.h"
50#include "fault.h"
51#include "tls.h"
52
53#include "citrusleaf/alloc.h"
54#include "citrusleaf/cf_digest.h"
55
56typedef struct dns_resolve_udata_s {
57 cf_ip_addr_from_string_cb cb;
58 void *udata;
59} dns_resolve_udata;
60
61void
62cf_ip_addr_to_string_safe(const cf_ip_addr *addr, char *string, size_t size)
63{
64 if (cf_ip_addr_to_string(addr, string, size) < 0) {
65 cf_crash(CF_SOCKET, "String buffer overflow");
66 }
67}
68
69int32_t
70cf_ip_addr_to_string_multi(const cf_ip_addr *addrs, uint32_t n_addrs, char *string, size_t size)
71{
72 size_t off = 0;
73
74 for (uint32_t i = 0; i < n_addrs; ++i) {
75 if (i > 0) {
76 if (off >= size) {
77 cf_warning(CF_SOCKET, "Output buffer overflow");
78 return -1;
79 }
80
81 string[off] = ',';
82 ++off;
83 }
84
85 int32_t len = cf_ip_addr_to_string(&addrs[i], string + off, size - off);
86
87 if (len < 0) {
88 return -1;
89 }
90
91 off += len;
92 }
93
94 if (off >= size) {
95 cf_warning(CF_SOCKET, "Output buffer overflow");
96 return -1;
97 }
98
99 string[off] = 0;
100 return off;
101}
102
103void
104cf_ip_addr_to_string_multi_safe(const cf_ip_addr *addrs, uint32_t n_addrs, char *string,
105 size_t size)
106{
107 if (cf_ip_addr_to_string_multi(addrs, n_addrs, string, size) < 0) {
108 cf_crash(CF_SOCKET, "String buffer overflow");
109 }
110}
111
112static void
113dns_resolve_cb(const int status, const char *name, addrinfo *addrs, void *udata)
114{
115 dns_resolve_udata *wrapped = (dns_resolve_udata*)udata;
116
117 if (status != 0) {
118 cf_ticker_warning(CF_SOCKET, "Error while converting address '%s': %s",
119 name, cf_dns_strerror(status));
120 wrapped->cb(false, name, NULL, 0, wrapped->udata);
121 cf_free(udata);
122 return;
123 }
124
125 uint32_t n_addrs = CF_SOCK_CFG_MAX;
126 cf_ip_addr ip_addrs[CF_SOCK_CFG_MAX];
127
128 int32_t res = cf_ip_addr_from_addrinfo(name, addrs, ip_addrs, &n_addrs);
129 cf_dns_free(addrs);
130
131 if (res < 0) {
132 wrapped->cb(false, name, NULL, 0, wrapped->udata);
133 cf_free(udata);
134 return;
135 }
136
137 wrapped->cb(true, name, ip_addrs, n_addrs, wrapped->udata);
138 cf_free(udata);
139}
140
141static int32_t
142ip_addr_from_string_multi_local(const char *string, cf_ip_addr *addrs,
143 uint32_t *n_addrs)
144{
145 if (strcmp(string, "any") == 0) {
146 if (*n_addrs < 1) {
147 cf_warning(CF_SOCKET, "Too many IP addresses");
148 return -1;
149 }
150
151 cf_ip_addr_set_any(&addrs[0]);
152 *n_addrs = 1;
153 return 0;
154 }
155
156 if (strcmp(string, "local") == 0) {
157 if (*n_addrs < 1) {
158 cf_warning(CF_SOCKET, "Too many IP addresses");
159 return -1;
160 }
161
162 cf_ip_addr_set_local(&addrs[0]);
163 *n_addrs = 1;
164 return 0;
165 }
166
167 if (cf_inter_is_inter_name(string)) {
168 cf_ip_addr if_addrs[CF_SOCK_CFG_MAX];
169 uint32_t n_if_addrs = CF_SOCK_CFG_MAX;
170
171 if (cf_inter_get_addr_name(if_addrs, &n_if_addrs, string) < 0) {
172 cf_warning(CF_SOCKET,
173 "Error while getting interface addresses for '%s'", string);
174 return -1;
175 }
176
177 if (n_if_addrs == 0) {
178 cf_warning(CF_SOCKET, "Interface %s does not have any IP addresses",
179 string);
180 return -1;
181 }
182
183 if (n_if_addrs > *n_addrs) {
184 cf_warning(CF_SOCKET, "Too many IP addresses");
185 return -1;
186 }
187
188 for (uint32_t i = 0; i < n_if_addrs; ++i) {
189 cf_ip_addr_copy(&if_addrs[i], &addrs[i]);
190 }
191
192 *n_addrs = n_if_addrs;
193 return 0;
194 }
195
196 // Return of 1 indicates the input hostname is not a local interface and
197 // needs to be resolved using dns.
198 return 1;
199}
200
201int32_t
202cf_ip_addr_from_string_multi(const char *string, cf_ip_addr *addrs,
203 uint32_t *n_addrs)
204{
205 int32_t res = ip_addr_from_string_multi_local(string, addrs, n_addrs);
206
207 if (res <= 0) {
208 return res;
209 }
210
211 addrinfo *info = NULL;
212 res = cf_dns_resolve(string, &g_cf_ip_addr_dns_hints, &info);
213
214 if (res != 0) {
215 cf_warning(CF_SOCKET, "Error while converting address '%s': %s", string,
216 cf_dns_strerror(res));
217 return -1;
218 }
219
220 res = cf_ip_addr_from_addrinfo(string, info, addrs, n_addrs);
221
222 cf_dns_free(info);
223 return res;
224}
225
226void
227cf_ip_addr_from_string_multi_a(const char *string, cf_ip_addr_from_string_cb cb,
228 void *udata)
229{
230 uint32_t n_addrs = CF_SOCK_CFG_MAX;
231 cf_ip_addr ip_addrs[n_addrs];
232
233 int32_t res = ip_addr_from_string_multi_local(string, ip_addrs, &n_addrs);
234
235 if (res < 0) {
236 cb(false, string, NULL, 0, udata);
237 return;
238 }
239
240 if (res == 0) {
241 cb(true, string, ip_addrs, n_addrs, udata);
242 return;
243 }
244
245 dns_resolve_udata *wrapped = cf_malloc(sizeof(dns_resolve_udata));
246 wrapped->cb = cb;
247 wrapped->udata = udata;
248 cf_dns_resolve_a(string, &g_cf_ip_addr_dns_hints, dns_resolve_cb, wrapped);
249}
250
251int32_t
252cf_ip_addr_from_string(const char *string, cf_ip_addr *addr)
253{
254 cf_ip_addr addrs[CF_SOCK_CFG_MAX];
255 uint32_t n_addrs = CF_SOCK_CFG_MAX;
256
257 if (cf_ip_addr_from_string_multi(string, addrs, &n_addrs) < 0) {
258 return -1;
259 }
260
261 cf_ip_addr_copy(&addrs[0], addr);
262 return 0;
263}
264
265void
266cf_ip_addr_sort(cf_ip_addr *addrs, uint32_t n_addrs)
267{
268 int32_t n = n_addrs;
269 bool swapped;
270
271 do {
272 swapped = false;
273
274 for (int32_t i = 0; i < n - 1; ++i) {
275 if (cf_ip_addr_compare(&addrs[i], &addrs[i + 1]) < 0) {
276 cf_ip_addr tmp;
277 cf_ip_addr_copy(&addrs[i], &tmp);
278 cf_ip_addr_copy(&addrs[i + 1], &addrs[i]);
279 cf_ip_addr_copy(&tmp, &addrs[i + 1]);
280 swapped = true;
281 }
282 }
283
284 --n;
285 }
286 while (swapped);
287}
288
289static int32_t
290validate_dns_label(const char *label)
291{
292 int32_t i;
293
294 for (i = 0; label[i] != 0 && label[i] != '.'; ++i) {
295 bool ok = (label[i] >= '0' && label[i] <= '9') ||
296 (label[i] >= 'a' && label[i] <= 'z') ||
297 (label[i] >= 'A' && label[i] <= 'Z') ||
298 label[i] == '-';
299
300 if (!ok) {
301 return -1;
302 }
303 }
304
305 if (i == 0) {
306 return -1;
307 }
308
309 return i;
310}
311
312bool
313cf_ip_addr_is_dns_name(const char *string)
314{
315 if (cf_inter_is_inter_name(string)) {
316 return false;
317 }
318
319 if (string[0] >= '0' && string[0] <= '9') {
320 return false;
321 }
322
323 int32_t n_labels = 0;
324 int32_t i = 0;
325
326 while (string[i] != 0) {
327 int32_t len = validate_dns_label(string + i);
328
329 if (len < 0) {
330 return false;
331 }
332
333 i += len;
334 ++n_labels;
335
336 if (string[i] == '.') {
337 ++i;
338 }
339 }
340
341 return n_labels > 1;
342}
343
344int32_t
345cf_ip_port_from_string(const char *string, cf_ip_port *port)
346{
347 char *end;
348 uint64_t tmp = strtoul(string, &end, 10);
349
350 if (*end != 0 || tmp > 65535) {
351 cf_warning(CF_SOCKET, "Invalid port '%s'", string);
352 return -1;
353 }
354
355 *port = (cf_ip_port)tmp;
356 return 0;
357}
358
359int32_t
360cf_ip_port_to_string(cf_ip_port port, char *string, size_t size)
361{
362 int32_t count = snprintf(string, size, "%hu", port);
363
364 if ((size_t)count >= size) {
365 cf_warning(CF_SOCKET, "Output buffer overflow");
366 return -1;
367 }
368
369 return count;
370}
371
372void
373cf_ip_port_to_string_safe(cf_ip_port port, char *string, size_t size)
374{
375 if (cf_ip_port_to_string(port, string, size) < 0) {
376 cf_crash(CF_SOCKET, "String buffer overflow");
377 }
378}
379
380int32_t
381cf_ip_port_from_binary(const uint8_t *binary, size_t size, cf_ip_port *port)
382{
383 if (size < 2) {
384 cf_warning(CF_SOCKET, "Input buffer underflow");
385 return -1;
386 }
387
388 *port = (binary[0] << 8) | binary[1];
389 return 2;
390}
391
392int32_t
393cf_ip_port_to_binary(cf_ip_port port, uint8_t *binary, size_t size)
394{
395 if (size < 2) {
396 cf_warning(CF_SOCKET, "Output buffer overflow");
397 return -1;
398 }
399
400 binary[0] = port >> 8;
401 binary[1] = port & 255;
402 return 2;
403}
404
405void
406cf_ip_port_from_node_id(cf_node id, cf_ip_port *port)
407{
408 uint8_t *buff = (uint8_t *)&id;
409 memcpy(port, buff + 6, 2);
410}
411
412void
413cf_sock_addr_to_string_safe(const cf_sock_addr *addr, char *string, size_t size)
414{
415 if (cf_sock_addr_to_string(addr, string, size) < 0) {
416 cf_crash(CF_SOCKET, "String buffer overflow");
417 }
418}
419
420int32_t
421cf_sock_addr_from_binary(const uint8_t *binary, size_t size, cf_sock_addr *addr)
422{
423 int32_t total = 0;
424 int32_t count = cf_ip_addr_from_binary(binary, size, &addr->addr);
425
426 if (count < 0) {
427 return -1;
428 }
429
430 total += count;
431 count = cf_ip_port_from_binary(binary + total, size - total, &addr->port);
432
433 if (count < 0) {
434 return -1;
435 }
436
437 total += count;
438 return total;
439}
440
441int32_t
442cf_sock_addr_to_binary(const cf_sock_addr *addr, uint8_t *binary, size_t size)
443{
444 int32_t total = 0;
445 int32_t count = cf_ip_addr_to_binary(&addr->addr, binary, size);
446
447 if (count < 0) {
448 return -1;
449 }
450
451 total += count;
452 count = cf_ip_port_to_binary(addr->port, binary + total, size - total);
453
454 if (count < 0) {
455 return -1;
456 }
457
458 total += count;
459 return total;
460}
461
462int32_t
463cf_sock_addr_from_host_port(const char *host, cf_ip_port port, cf_sock_addr *addr)
464{
465 if (cf_ip_addr_from_string(host, &addr->addr) < 0) {
466 cf_warning(CF_SOCKET, "Invalid host address '%s'", host);
467 return -1;
468 }
469
470 addr->port = port;
471 return 0;
472}
473
474void
475cf_sock_addr_from_addr_port(const cf_ip_addr *ip_addr, cf_ip_port port, cf_sock_addr *addr)
476{
477 cf_ip_addr_copy(ip_addr, &addr->addr);
478 addr->port = port;
479}
480
481int32_t
482cf_sock_addr_compare(const cf_sock_addr *lhs, const cf_sock_addr *rhs)
483{
484 int32_t res = cf_ip_addr_compare(&lhs->addr, &rhs->addr);
485
486 if (res != 0) {
487 return res;
488 }
489
490 if (lhs->port == rhs->port) {
491 return 0;
492 }
493
494 return (int32_t)lhs->port - (int32_t)rhs->port;
495}
496
497void
498cf_sock_addr_copy(const cf_sock_addr *from, cf_sock_addr *to)
499{
500 cf_ip_addr_copy(&from->addr, &to->addr);
501 to->port = from->port;
502}
503
504void
505cf_sock_addr_set_any(cf_sock_addr *addr)
506{
507 cf_ip_addr_set_any(&addr->addr);
508 addr->port = 0;
509}
510
511bool
512cf_sock_addr_is_any(const cf_sock_addr *addr)
513{
514 return cf_ip_addr_is_any(&addr->addr) && addr->port == 0;
515}
516
517void
518cf_sock_cfg_init(cf_sock_cfg *cfg, cf_sock_owner owner)
519{
520 cfg->owner = owner;
521 cfg->port = 0;
522 cf_ip_addr_set_any(&cfg->addr);
523}
524
525void
526cf_sock_cfg_copy(const cf_sock_cfg *from, cf_sock_cfg *to)
527{
528 to->owner = from->owner;
529 to->port = from->port;
530 cf_ip_addr_copy(&from->addr, &to->addr);
531}
532
533void
534cf_serv_cfg_init(cf_serv_cfg *cfg)
535{
536 cfg->n_cfgs = 0;
537}
538
539int32_t
540cf_serv_cfg_add_sock_cfg(cf_serv_cfg *serv_cfg, const cf_sock_cfg *sock_cfg)
541{
542 if (serv_cfg->n_cfgs >= CF_SOCK_CFG_MAX) {
543 cf_warning(CF_SOCKET, "Too many socket configurations in server configuration");
544 return -1;
545 }
546
547 uint32_t n = serv_cfg->n_cfgs;
548
549 for (uint32_t i = 0; i < n; ++i) {
550 cf_sock_cfg *walker = &serv_cfg->cfgs[i];
551
552 if (walker->owner == sock_cfg->owner && walker->port == sock_cfg->port &&
553 cf_ip_addr_compare(&walker->addr, &sock_cfg->addr) == 0) {
554 return 0;
555 }
556 }
557
558 cf_sock_cfg_copy(sock_cfg, &serv_cfg->cfgs[n]);
559 serv_cfg->n_cfgs = ++n;
560 return 0;
561}
562
563void
564cf_sockets_init(cf_sockets *socks)
565{
566 socks->n_socks = 0;
567}
568
569bool
570cf_sockets_has_socket(const cf_sockets *socks, const cf_socket *sock)
571{
572 return socks != NULL && sock >= &socks->socks[0] && sock < &socks->socks[socks->n_socks];
573}
574
575void
576cf_sockets_close(cf_sockets *socks)
577{
578 for (uint32_t i = 0; i < socks->n_socks; ++i) {
579 cf_socket_close(&socks->socks[i]);
580 cf_socket_term(&socks->socks[i]);
581 }
582}
583
584static int32_t
585safe_fcntl(int32_t fd, int32_t cmd, int32_t arg)
586{
587 int32_t res = fcntl(fd, cmd, arg);
588
589 if (res < 0) {
590 cf_crash(CF_SOCKET, "fcntl(%d) failed on FD %d: %d (%s)",
591 cmd, fd, errno, cf_strerror(errno));
592 }
593
594 return res;
595}
596
597static int32_t
598safe_ioctl(int32_t fd, int32_t req, int32_t *arg)
599{
600 int32_t res = ioctl(fd, req, arg);
601
602 if (res < 0) {
603 cf_crash(CF_SOCKET, "ioctl(%d) failed on FD %d: %d (%s)",
604 req, fd, errno, cf_strerror(errno));
605 }
606
607 return res;
608}
609
610static void
611safe_setsockopt(int32_t fd, int32_t level, int32_t name, const void *val, socklen_t len)
612{
613 if (setsockopt(fd, level, name, val, len) < 0) {
614 cf_crash(CF_SOCKET, "setsockopt(%d¸ %d) failed on FD %d: %d (%s)",
615 level, name, fd, errno, cf_strerror(errno));
616 }
617}
618
619static void
620safe_getsockopt(int32_t fd, int32_t level, int32_t name, void *val, socklen_t *len)
621{
622 if (getsockopt(fd, level, name, val, len) < 0) {
623 cf_crash(CF_SOCKET, "getsockopt(%d, %d) failed on FD %d: %d (%s)",
624 level, name, fd, errno, cf_strerror(errno));
625 }
626}
627
628static int32_t
629safe_wait(int32_t efd, struct epoll_event *events, int32_t max, int32_t timeout)
630{
631 while (true) {
632 cf_debug(CF_SOCKET, "Waiting on epoll FD %d", efd);
633 int32_t count = epoll_wait(efd, events, max, timeout);
634
635 if (count < 0) {
636 if (errno == EINTR) {
637 cf_debug(CF_SOCKET, "Interrupted");
638 continue;
639 }
640
641 cf_crash(CF_SOCKET, "epoll_wait() failed on epoll FD %d: %d (%s)",
642 efd, errno, cf_strerror(errno));
643 }
644
645 return count;
646 }
647}
648
649static void
650safe_close(int32_t fd)
651{
652 if (close(fd) < 0) {
653 cf_crash(CF_SOCKET, "Error while closing FD %d: %d (%s)",
654 fd, errno, cf_strerror(errno));
655 }
656}
657
658void
659cf_fd_disable_blocking(int32_t fd)
660{
661 int32_t flags = safe_fcntl(fd, F_GETFL, 0);
662 safe_fcntl(fd, F_SETFL, flags | O_NONBLOCK);
663}
664
665void
666cf_socket_disable_blocking(cf_socket *sock)
667{
668 cf_fd_disable_blocking(sock->fd);
669}
670
671void
672cf_socket_enable_blocking(cf_socket *sock)
673{
674 int32_t flags = safe_fcntl(sock->fd, F_GETFL, 0);
675 safe_fcntl(sock->fd, F_SETFL, flags & ~O_NONBLOCK);
676}
677
678void
679cf_socket_disable_nagle(cf_socket *sock)
680{
681 static const int32_t flag = 1;
682 safe_setsockopt(sock->fd, SOL_TCP, TCP_NODELAY, &flag, sizeof(flag));
683}
684
685void
686cf_socket_enable_nagle(cf_socket *sock)
687{
688 static const int32_t flag = 0;
689 safe_setsockopt(sock->fd, SOL_TCP, TCP_NODELAY, &flag, sizeof(flag));
690}
691
692void
693cf_socket_set_cork(cf_socket *sock, int cork)
694{
695 setsockopt(sock->fd, IPPROTO_TCP, TCP_CORK, &cork, sizeof(int));
696}
697
698void
699cf_socket_keep_alive(cf_socket *sock, int32_t idle, int32_t interval, int32_t count)
700{
701 static const int32_t flag = 1;
702 safe_setsockopt(sock->fd, SOL_SOCKET, SO_KEEPALIVE, &flag, sizeof(flag));
703
704 if (idle > 0) {
705 safe_setsockopt(sock->fd, SOL_TCP, TCP_KEEPIDLE, &idle, sizeof(idle));
706 }
707
708 if (interval > 0) {
709 safe_setsockopt(sock->fd, SOL_TCP, TCP_KEEPINTVL, &interval, sizeof(interval));
710 }
711
712 if (count > 0) {
713 safe_setsockopt(sock->fd, SOL_TCP, TCP_KEEPCNT, &count, sizeof(count));
714 }
715}
716
717void
718cf_socket_set_send_buffer(cf_socket *sock, int32_t size)
719{
720 safe_setsockopt(sock->fd, SOL_SOCKET, SO_SNDBUF, &size, sizeof(size));
721}
722
723void
724cf_socket_set_receive_buffer(cf_socket *sock, int32_t size)
725{
726 safe_setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size));
727}
728
729void
730cf_socket_set_window(cf_socket *sock, int32_t size)
731{
732 safe_setsockopt(sock->fd, SOL_TCP, TCP_WINDOW_CLAMP, &size, sizeof(size));
733}
734
735void
736cf_socket_init(cf_socket *sock)
737{
738 sock->fd = -1;
739 sock->state = CF_SOCKET_STATE_NON_TLS;
740 sock->cfg = NULL;
741 tls_socket_init(sock);
742}
743
744bool
745cf_socket_exists(cf_socket *sock)
746{
747 return sock->fd >= 0;
748}
749
750int32_t
751cf_socket_init_server(cf_serv_cfg *cfg, cf_sockets *socks)
752{
753 int32_t res = -1;
754
755 if (cfg->n_cfgs < 1) {
756 cf_warning(CF_SOCKET, "Missing service socket configuration");
757 goto cleanup0;
758 }
759
760 cf_socket_fix_bind(cfg);
761
762 cf_debug(CF_SOCKET, "Initializing %u server socket(s)", cfg->n_cfgs);
763 uint32_t n;
764 cf_socket *sock;
765
766 for (n = 0; n < cfg->n_cfgs; ++n) {
767 sock = &socks->socks[n];
768
769 if (cfg->cfgs[n].port == 0) {
770 cf_warning(CF_SOCKET, "Missing service port");
771 goto cleanup1;
772 }
773
774 cf_sock_addr addr;
775 cf_sock_addr_from_addr_port(&cfg->cfgs[n].addr, cfg->cfgs[n].port, &addr);
776
777 struct sockaddr_storage sas;
778 cf_sock_addr_to_native(&addr, (struct sockaddr *)&sas);
779
780 cf_debug(CF_SOCKET, "Initializing server for %s", cf_sock_addr_print(&addr));
781 int32_t fd = socket(sas.ss_family, SOCK_STREAM, 0);
782
783 if (fd < 0) {
784 cf_warning(CF_SOCKET, "Error while creating socket for %s: %d (%s)",
785 cf_sock_addr_print(&addr), errno, cf_strerror(errno));
786 goto cleanup1;
787 }
788
789 cf_socket_init(sock);
790 sock->fd = fd;
791 fd = -1;
792
793 cf_socket_fix_server(sock);
794 cf_socket_disable_blocking(sock);
795
796 // No Nagle here. It will be disabled for the accepted connections.
797
798 static const int32_t flag = 1;
799 safe_setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, &flag, sizeof(flag));
800
801 while (bind(sock->fd, (struct sockaddr *)&sas,
802 cf_socket_addr_len((struct sockaddr *)&sas)) < 0) {
803 if (errno != EADDRINUSE) {
804 cf_warning(CF_SOCKET, "Error while binding to %s: %d (%s)",
805 cf_sock_addr_print(&addr), errno, cf_strerror(errno));
806 goto cleanup2;
807 }
808
809 cf_warning(CF_SOCKET, "Socket %s in use, waiting", cf_sock_addr_print(&addr));
810 usleep(5 * 1000 * 1000);
811 }
812
813 if (listen(sock->fd, 512) < 0) {
814 cf_warning(CF_SOCKET, "Error while listening on %s: %d (%s)",
815 cf_sock_addr_print(&addr), errno, cf_strerror(errno));
816 goto cleanup2;
817 }
818
819 sock->cfg = &cfg->cfgs[n];
820 }
821
822 socks->n_socks = n;
823 res = 0;
824 goto cleanup0;
825
826cleanup2:
827 cf_socket_close(sock);
828 cf_socket_term(sock);
829
830cleanup1:
831 for (uint32_t i = 0; i < n; ++i) {
832 cf_socket_close(&socks->socks[i]);
833 cf_socket_term(&socks->socks[i]);
834 }
835
836cleanup0:
837 return res;
838}
839
840void
841cf_socket_show_server(cf_fault_context cont, const char *tag, const cf_sockets *socks)
842{
843 for (uint32_t i = 0; i < socks->n_socks; ++i) {
844 cf_sock_cfg *cfg = socks->socks[i].cfg;
845 cf_sock_addr addr;
846 cf_sock_addr_from_addr_port(&cfg->addr, cfg->port, &addr);
847 cf_info(cont, "Started %s endpoint %s", tag, cf_sock_addr_print(&addr));
848 }
849}
850
851static int32_t
852connect_socket(const cf_socket *sock, struct sockaddr *sa, int32_t timeout)
853{
854 cf_debug(CF_SOCKET, "Connecting FD %d", sock->fd);
855 int32_t res = -1;
856 int32_t rv = connect(sock->fd, sa, cf_socket_addr_len(sa));
857
858 if (rv == 0) {
859 cf_debug(CF_SOCKET, "FD %d connected [1]", sock->fd);
860 res = 0;
861 goto cleanup0;
862 }
863
864 if (errno != EINPROGRESS) {
865 cf_ticker_warning(CF_SOCKET, "Error while connecting: %d (%s)", errno, cf_strerror(errno));
866 goto cleanup0;
867 }
868
869 if (timeout == 0) {
870 cf_debug(CF_SOCKET, "FD %d still connecting, but no timeout", sock->fd);
871 res = 0;
872 goto cleanup0;
873 }
874
875 int32_t efd = epoll_create(1);
876
877 if (efd < 0) {
878 cf_crash(CF_SOCKET, "epoll_create() failed: %d (%s)", errno, cf_strerror(errno));
879 }
880
881 struct epoll_event event = { .data.fd = sock->fd, .events = EPOLLOUT };
882
883 if (epoll_ctl(efd, EPOLL_CTL_ADD, sock->fd, &event) < 0) {
884 cf_crash(CF_SOCKET, "epoll_ctl() failed for FD %d: %d (%s)",
885 sock->fd, errno, cf_strerror(errno));
886 }
887
888 int32_t count = safe_wait(efd, &event, 1, timeout);
889
890 if (count == 0) {
891 cf_ticker_warning(CF_SOCKET, "Timeout while connecting");
892 goto cleanup1;
893 }
894
895 int32_t err;
896 socklen_t err_len = sizeof(err);
897 safe_getsockopt(sock->fd, SOL_SOCKET, SO_ERROR, &err, &err_len);
898
899 if (err != 0) {
900 cf_ticker_warning(CF_SOCKET, "Error while connecting: %d (%s)", err, cf_strerror(err));
901 goto cleanup1;
902 }
903
904 cf_debug(CF_SOCKET, "FD %d connected [2]", sock->fd);
905 res = 0;
906
907cleanup1:
908 if (epoll_ctl(efd, EPOLL_CTL_DEL, sock->fd, NULL) < 0) {
909 cf_crash(CF_SOCKET, "epoll_ctl() failed for FD %d: %d (%s)",
910 sock->fd, errno, cf_strerror(errno));
911 }
912
913 safe_close(efd);
914
915cleanup0:
916 return res;
917}
918
919int32_t
920cf_socket_init_client(cf_sock_cfg *cfg, int32_t timeout, cf_socket *sock)
921{
922 int32_t res = -1;
923
924 if (cf_ip_addr_is_any(&cfg->addr)) {
925 cf_warning(CF_SOCKET, "Missing IP address");
926 goto cleanup0;
927 }
928
929 if (cfg->port == 0) {
930 cf_warning(CF_SOCKET, "Missing port");
931 goto cleanup0;
932 }
933
934 cf_sock_addr addr;
935 cf_sock_addr_from_addr_port(&cfg->addr, cfg->port, &addr);
936
937 struct sockaddr_storage sas;
938 cf_sock_addr_to_native(&addr, (struct sockaddr *)&sas);
939
940 cf_debug(CF_SOCKET, "Initializing client for %s", cf_sock_addr_print(&addr));
941 int32_t fd = socket(sas.ss_family, SOCK_STREAM, 0);
942
943 if (fd < 0) {
944 cf_warning(CF_SOCKET, "Error while creating socket for %s: %d (%s)",
945 cf_sock_addr_print(&addr), errno, cf_strerror(errno));
946 goto cleanup0;
947 }
948
949 cf_socket_init(sock);
950 sock->fd = fd;
951 fd = -1;
952
953 cf_socket_fix_client(sock);
954 cf_socket_disable_blocking(sock);
955 cf_socket_disable_nagle(sock);
956
957 if (connect_socket(sock, (struct sockaddr *)&sas, timeout) < 0) {
958 cf_ticker_warning(CF_SOCKET, "Error while connecting socket to %s",
959 cf_sock_addr_print(&addr));
960 goto cleanup1;
961 }
962
963 sock->cfg = cfg;
964 res = 0;
965 goto cleanup0;
966
967cleanup1:
968 cf_socket_close(sock);
969 cf_socket_term(sock);
970
971cleanup0:
972 return res;
973}
974
975int32_t
976cf_socket_accept(cf_socket *lsock, cf_socket *sock, cf_sock_addr *addr)
977{
978 int32_t res = -1;
979
980 struct sockaddr_storage sas;
981 struct sockaddr *sa = NULL;
982 socklen_t sa_len = 0;
983
984 if (addr != NULL) {
985 sa = (struct sockaddr *)&sas;
986 sa_len = sizeof(sas);
987 }
988
989 int32_t fd = accept(lsock->fd, sa, &sa_len);
990
991 if (fd < 0) {
992 cf_debug(CF_SOCKET, "Error while accepting from FD %d: %d (%s)",
993 lsock->fd, errno, cf_strerror(errno));
994 goto cleanup0;
995 }
996
997 if (addr != NULL) {
998 cf_sock_addr_from_native(sa, addr);
999 }
1000
1001 cf_socket_init(sock);
1002 sock->fd = fd;
1003 fd = -1;
1004
1005 cf_socket_disable_blocking(sock);
1006 cf_socket_disable_nagle(sock);
1007
1008 sock->cfg = lsock->cfg;
1009 res = 0;
1010
1011cleanup0:
1012 return res;
1013}
1014
1015typedef int32_t (*name_func)(int32_t fd, struct sockaddr *sa, socklen_t *sa_len);
1016
1017static int32_t
1018x_name(name_func func, const char *which, int32_t fd, cf_sock_addr *addr)
1019{
1020 struct sockaddr_storage sas;
1021 socklen_t sas_len = sizeof(sas);
1022
1023 if (func(fd, (struct sockaddr *)&sas, &sas_len) < 0) {
1024 cf_warning(CF_SOCKET, "Error while getting %s name: %d (%s)",
1025 which, errno, cf_strerror(errno));
1026 return -1;
1027 }
1028
1029 cf_sock_addr_from_native((struct sockaddr *)&sas, addr);
1030 return 0;
1031}
1032
1033int32_t
1034cf_socket_remote_name(const cf_socket *sock, cf_sock_addr *addr)
1035{
1036 return x_name(getpeername, "remote", sock->fd, addr);
1037}
1038
1039int32_t
1040cf_socket_local_name(const cf_socket *sock, cf_sock_addr *addr)
1041{
1042 return x_name(getsockname, "local", sock->fd, addr);
1043}
1044
1045int32_t
1046cf_socket_available(cf_socket *sock)
1047{
1048 int32_t size;
1049 safe_ioctl(sock->fd, FIONREAD, &size);
1050
1051 size += tls_socket_pending(sock);
1052
1053 return size;
1054}
1055
1056int32_t
1057cf_socket_send_to(cf_socket *sock, const void *buff, size_t size, int32_t flags, const cf_sock_addr *addr)
1058{
1059 cf_assert(sock->ssl == NULL, CF_SOCKET, "cannot use cf_socket_send_to() with TLS");
1060
1061 struct sockaddr_storage sas;
1062 struct sockaddr *sa = NULL;
1063 socklen_t sa_len = 0;
1064
1065 if (addr != NULL) {
1066 cf_sock_addr_to_native(addr, (struct sockaddr *)&sas);
1067 sa = (struct sockaddr *)&sas;
1068 sa_len = cf_socket_addr_len((struct sockaddr *)&sas);
1069 }
1070
1071 int32_t res = sendto(sock->fd, buff, size, flags | MSG_NOSIGNAL, sa, sa_len);
1072
1073 if (res < 0) {
1074 cf_debug(CF_SOCKET, "Error while sending on FD %d: %d (%s)",
1075 sock->fd, errno, cf_strerror(errno));
1076 }
1077
1078 return res;
1079}
1080
1081int32_t
1082cf_socket_send(cf_socket *sock, const void *buff, size_t size, int32_t flags)
1083{
1084 if (sock->ssl) {
1085 ssize_t res = tls_socket_send(sock, buff, size, flags, 0);
1086
1087 if (res < 0 && errno == ETIMEDOUT) {
1088 // Make the caller call again with the same buff and the same size,
1089 // which is what OpenSSL wants.
1090 errno = EAGAIN;
1091 }
1092
1093 return res;
1094 }
1095
1096 return cf_socket_send_to(sock, buff, size, flags, NULL);
1097}
1098
1099int32_t
1100cf_socket_send_msg(cf_socket *sock, struct msghdr *m, int32_t flags)
1101{
1102 if (sock->ssl) {
1103 int32_t bytes = 0;
1104 struct iovec *v = m->msg_iov;
1105
1106 for (socklen_t i = 0; i < m->msg_iovlen; i++) {
1107 int rv = 0;
1108
1109 if (v->iov_len > 0) {
1110 rv = tls_socket_send(sock, v->iov_base, v->iov_len, flags, 0);
1111 }
1112
1113 if (rv < 0) {
1114 // errno is set by tls_socket_send.
1115 if (errno == ETIMEDOUT) {
1116 errno = EAGAIN;
1117 }
1118
1119 if (errno == EAGAIN && bytes != 0) {
1120 break; // partial send
1121 }
1122
1123 return rv;
1124 }
1125 else if (rv < v->iov_len) {
1126 bytes += rv;
1127 break;
1128 }
1129
1130 bytes += rv;
1131 v++;
1132 }
1133
1134 return bytes;
1135 }
1136
1137 int32_t res = sendmsg(sock->fd, m, flags | MSG_NOSIGNAL);
1138
1139 if (res < 0) {
1140 cf_debug(CF_SOCKET, "Error while sending on FD %d: %d (%s)",
1141 sock->fd, errno, cf_strerror(errno));
1142 }
1143
1144 return res;
1145}
1146
1147int32_t
1148cf_socket_recv_from(cf_socket *sock, void *buff, size_t size, int32_t flags, cf_sock_addr *addr)
1149{
1150 cf_assert(sock->ssl == NULL, CF_SOCKET, "cannot use cf_socket_recv_from() with TLS");
1151
1152 struct sockaddr_storage sas;
1153 struct sockaddr *sa = NULL;
1154 socklen_t sa_len = 0;
1155
1156 if (addr != NULL) {
1157 sa = (struct sockaddr *)&sas;
1158 sa_len = sizeof(sas);
1159 }
1160
1161 int32_t res = recvfrom(sock->fd, buff, size, flags, sa, &sa_len);
1162
1163 if (res < 0) {
1164 cf_debug(CF_SOCKET, "Error while receiving on FD %d: %d (%s)",
1165 sock->fd, errno, cf_strerror(errno));
1166 }
1167 else if (addr != NULL) {
1168 cf_sock_addr_from_native(sa, addr);
1169 }
1170
1171 return res;
1172}
1173
1174int32_t
1175cf_socket_recv(cf_socket *sock, void *buff, size_t size, int32_t flags)
1176{
1177 if (sock->ssl) {
1178 ssize_t res = tls_socket_recv(sock, buff, size, flags, 0);
1179
1180 // We only get ETIMEDOUT, if we read never read any bytes. In case of
1181 // a partial read, we get the number of bytes instead.
1182 if (res < 0 && errno == ETIMEDOUT) {
1183 errno = EAGAIN;
1184 }
1185
1186 return res;
1187 }
1188
1189 return cf_socket_recv_from(sock, buff, size, flags, NULL);
1190}
1191
1192int32_t
1193cf_socket_recv_msg(cf_socket *sock, struct msghdr *m, int32_t flags)
1194{
1195 if (sock->ssl) {
1196 int32_t bytes = 0;
1197 struct iovec *v = m->msg_iov;
1198
1199 for (socklen_t i = 0; i < m->msg_iovlen; i++) {
1200 int rv = tls_socket_recv(sock, v->iov_base, v->iov_len, flags, 0);
1201
1202 if (rv < 0) {
1203 // errno is set by tls_socket_recv.
1204 if (errno == ETIMEDOUT) {
1205 errno = EAGAIN;
1206 }
1207
1208 if (errno == EAGAIN && bytes != 0) {
1209 break; // partial recv
1210 }
1211
1212 return rv;
1213 }
1214 else if (rv < v->iov_len) {
1215 bytes += rv;
1216 break;
1217 }
1218
1219 bytes += rv;
1220 v++;
1221 }
1222
1223 return bytes;
1224 }
1225
1226 int32_t res = recvmsg(sock->fd, m, flags);
1227
1228 if (res < 0) {
1229 cf_debug(CF_SOCKET, "Error while receiving on FD %d: %d (%s)",
1230 sock->fd, errno, cf_strerror(errno));
1231 }
1232
1233 return res;
1234}
1235
1236static uint16_t
1237socket_wait(const cf_socket *sock, uint16_t events, int32_t timeout)
1238{
1239 cf_detail(CF_SOCKET, "Waiting for events 0x%x on FD %d with timeout %d",
1240 events, sock->fd, timeout);
1241
1242 struct pollfd pfd = { .fd = sock->fd, .events = events | POLLRDHUP };
1243
1244 while (true) {
1245 int32_t count = poll(&pfd, 1, timeout);
1246
1247 if (count < 0) {
1248 if (errno == EINTR) {
1249 cf_debug(CF_SOCKET, "Interrupted while polling FD %d", pfd.fd);
1250 continue;
1251 }
1252
1253 cf_crash(CF_SOCKET, "Error while polling FD %d: %d (%s)",
1254 pfd.fd, errno, cf_strerror(errno));
1255 }
1256
1257 if (count > 1) {
1258 cf_crash(CF_SOCKET, "Unexpected number of events on FD %d: %d", sock->fd, count);
1259 }
1260
1261 if (count == 0) {
1262 cf_detail(CF_SOCKET, "Timeout while waiting on FD %d", sock->fd);
1263 return 0;
1264 }
1265
1266 cf_detail(CF_SOCKET, "Got events 0x%x on FD %d", pfd.revents, sock->fd);
1267 return pfd.revents;
1268 }
1269}
1270
1271static int32_t
1272do_try_send_all(cf_socket *sock, const void *buffp, size_t size, int32_t flags,
1273 int32_t timeout)
1274{
1275 uint8_t *buff = (uint8_t *) buffp;
1276 cf_detail(CF_SOCKET, "Blocking send on FD %d, size = %zu", sock->fd, size);
1277 size_t off = 0;
1278
1279 while (off < size) {
1280 ssize_t count = cf_socket_send(sock, buff + off, size - off, flags);
1281
1282 if (count < 0) {
1283 if (errno == EAGAIN) {
1284 cf_debug(CF_SOCKET, "FD %d is blocking", sock->fd);
1285
1286 if (timeout != 0) {
1287 uint16_t events = socket_wait(sock, POLLOUT, timeout);
1288
1289 if (events == POLLOUT) {
1290 cf_debug(CF_SOCKET, "FD %d can write", sock->fd);
1291 continue;
1292 }
1293
1294 if (events != 0) {
1295 cf_detail(CF_SOCKET, "Unexpected events 0x%x on FD %d", events, sock->fd);
1296 return off;
1297 }
1298 }
1299
1300 cf_debug(CF_SOCKET, "Timeout during blocking send on FD %d", sock->fd);
1301 return off;
1302 }
1303
1304 return -1;
1305 }
1306
1307 if (count == 0) {
1308 // TODO - remove warning if this turns out to be normal.
1309 cf_warning(CF_SOCKET, "Sent 0 bytes on FD %d", sock->fd);
1310 errno = ENOTCONN;
1311 return -1;
1312 }
1313
1314 off += count;
1315 }
1316
1317 cf_detail(CF_SOCKET, "Blocking send on FD %d complete", sock->fd);
1318 return off;
1319}
1320
1321int32_t
1322cf_socket_send_all(cf_socket *sock, const void *buff, size_t size, int32_t flags,
1323 int32_t timeout)
1324{
1325 int32_t res;
1326
1327 if (sock->ssl) {
1328 res = tls_socket_send(sock, buff, size, flags, timeout);
1329 }
1330 else {
1331 res = do_try_send_all(sock, buff, size, flags, timeout);
1332 }
1333
1334 if (res < 0) {
1335 return -1;
1336 }
1337
1338 // Only ever happens for non-TLS.
1339 if (res < size) {
1340 errno = ETIMEDOUT;
1341 return -1;
1342 }
1343
1344 return 0;
1345}
1346
1347int32_t
1348cf_socket_try_send_all(cf_socket *sock, const void *buff, size_t size, int32_t flags)
1349{
1350 if (sock->ssl) {
1351 int32_t res = tls_socket_send(sock, buff, size, flags, 5);
1352
1353 if (res < 0 && errno == ETIMEDOUT) {
1354 // Make the caller call again with the same buff and the same size,
1355 // which is what OpenSSL wants.
1356 res = 0;
1357 }
1358
1359 return res;
1360 }
1361
1362 return do_try_send_all(sock, buff, size, flags, 0);
1363}
1364
1365static int32_t
1366do_recv_all(cf_socket *sock, void *buffp, size_t size, int32_t flags,
1367 int32_t timeout)
1368{
1369 uint8_t *buff = (uint8_t *) buffp;
1370 cf_detail(CF_SOCKET, "Blocking receive on FD %d, size = %zu", sock->fd, size);
1371 size_t off = 0;
1372
1373 while (off < size) {
1374 ssize_t count = cf_socket_recv(sock, buff + off, size - off, flags);
1375
1376 if (count < 0) {
1377 if (errno == EAGAIN) {
1378 cf_debug(CF_SOCKET, "FD %d is blocking", sock->fd);
1379
1380 if (timeout != 0) {
1381 uint16_t events = socket_wait(sock, POLLIN, timeout);
1382
1383 if (events == POLLIN) {
1384 cf_debug(CF_SOCKET, "FD %d can read", sock->fd);
1385 continue;
1386 }
1387
1388 if (events != 0) {
1389 cf_warning(CF_SOCKET, "Unexpected events 0x%x on FD %d", events, sock->fd);
1390 return off;
1391 }
1392 }
1393
1394 cf_debug(CF_SOCKET, "Timeout during blocking receive on FD %d", sock->fd);
1395 errno = ETIMEDOUT;
1396 return -1;
1397 }
1398
1399 return -1;
1400 }
1401
1402 if (count == 0) {
1403 errno = ENOTCONN;
1404 return -1;
1405 }
1406
1407 off += count;
1408 }
1409
1410 cf_detail(CF_SOCKET, "Blocking receive on FD %d complete", sock->fd);
1411 return 0;
1412}
1413
1414int32_t
1415cf_socket_recv_all(cf_socket *sock, void *buff, size_t size, int32_t flags, int32_t timeout)
1416{
1417 if (sock->ssl) {
1418 int32_t res = tls_socket_recv(sock, buff, size, flags, timeout);
1419
1420 if (res < 0) {
1421 return -1;
1422 }
1423
1424 if (res < size) {
1425 errno = ETIMEDOUT;
1426 return -1;
1427 }
1428
1429 return 0;
1430 }
1431
1432 return do_recv_all(sock, buff, size, flags, timeout);
1433}
1434
1435static void
1436x_shutdown(cf_socket *sock, int32_t how)
1437{
1438 if (sock->ssl) {
1439 tls_socket_shutdown(sock);
1440 }
1441
1442 if (shutdown(sock->fd, how) < 0) {
1443 if (errno != ENOTCONN) {
1444 cf_crash(CF_SOCKET, "shutdown() failed on FD %d: %d (%s)",
1445 sock->fd, errno, cf_strerror(errno));
1446 }
1447 else {
1448 cf_debug(CF_SOCKET, "shutdown() on disconnected FD %d: %d (%s)",
1449 sock->fd, errno, cf_strerror(errno));
1450 }
1451 }
1452}
1453
1454void
1455cf_socket_write_shutdown(cf_socket *sock)
1456{
1457 cf_debug(CF_SOCKET, "Shutting down write channel of FD %d", sock->fd);
1458 x_shutdown(sock, SHUT_WR);
1459}
1460
1461void
1462cf_socket_shutdown(cf_socket *sock)
1463{
1464 cf_debug(CF_SOCKET, "Shutting down FD %d", sock->fd);
1465 x_shutdown(sock, SHUT_RDWR);
1466}
1467
1468void
1469cf_socket_close(cf_socket *sock)
1470{
1471 cf_debug(CF_SOCKET, "Closing FD %d", sock->fd);
1472 tls_socket_close(sock);
1473 safe_close(sock->fd);
1474 sock->fd = -1;
1475}
1476
1477void
1478cf_socket_drain_close(cf_socket *sock)
1479{
1480 cf_debug(CF_SOCKET, "Draining and closing FD %d", sock->fd);
1481 int32_t efd = epoll_create(1);
1482
1483 if (efd < 0) {
1484 cf_crash(CF_SOCKET, "epoll_create() failed: %d (%s)", errno, cf_strerror(errno));
1485 }
1486
1487 struct epoll_event event = { .data.fd = sock->fd, .events = EPOLLRDHUP };
1488
1489 if (epoll_ctl(efd, EPOLL_CTL_ADD, sock->fd, &event) < 0) {
1490 cf_crash(CF_SOCKET, "epoll_ctl() failed for FD %d: %d (%s)",
1491 sock->fd, errno, cf_strerror(errno));
1492 }
1493
1494 cf_socket_shutdown(sock);
1495 int32_t count = safe_wait(efd, &event, 1, 5000);
1496
1497 if (count == 0) {
1498 cf_warning(CF_SOCKET, "Timeout while waiting for FD %d to drain", sock->fd);
1499 goto cleanup1;
1500 }
1501
1502 cf_debug(CF_SOCKET, "FD %d drained", sock->fd);
1503
1504cleanup1:
1505 if (epoll_ctl(efd, EPOLL_CTL_DEL, sock->fd, NULL) < 0) {
1506 cf_crash(CF_SOCKET, "epoll_ctl() failed for FD %d: %d (%s)",
1507 sock->fd, errno, cf_strerror(errno));
1508 }
1509
1510 safe_close(efd);
1511 cf_socket_close(sock);
1512 cf_socket_term(sock);
1513}
1514
1515void
1516cf_socket_term(cf_socket *sock)
1517{
1518 tls_socket_term(sock);
1519 sock->fd = -1;
1520}
1521
1522void
1523cf_msock_cfg_init(cf_msock_cfg *cfg, cf_sock_owner owner)
1524{
1525 cfg->owner = owner;
1526 cfg->port = 0;
1527 cf_ip_addr_set_any(&cfg->addr);
1528 cf_ip_addr_set_any(&cfg->if_addr);
1529 cfg->ttl = 0;
1530}
1531
1532void
1533cf_msock_cfg_copy(const cf_msock_cfg *from, cf_msock_cfg *to)
1534{
1535 to->owner = from->owner;
1536 to->port = from->port;
1537 cf_ip_addr_copy(&from->addr, &to->addr);
1538 cf_ip_addr_copy(&from->if_addr, &to->if_addr);
1539 to->ttl = from->ttl;
1540}
1541
1542void
1543cf_mserv_cfg_init(cf_mserv_cfg *cfg)
1544{
1545 cfg->n_cfgs = 0;
1546}
1547
1548int32_t
1549cf_mserv_cfg_add_msock_cfg(cf_mserv_cfg *serv_cfg, const cf_msock_cfg *sock_cfg)
1550{
1551 if (serv_cfg->n_cfgs >= CF_SOCK_CFG_MAX) {
1552 cf_warning(CF_SOCKET, "Too many socket configurations in server configuration");
1553 return -1;
1554 }
1555
1556 uint32_t n = serv_cfg->n_cfgs;
1557
1558 for (uint32_t i = 0; i < n; ++i) {
1559 cf_msock_cfg *walker = &serv_cfg->cfgs[i];
1560
1561 if (walker->owner == sock_cfg->owner && walker->port == sock_cfg->port &&
1562 cf_ip_addr_compare(&walker->addr, &sock_cfg->addr) == 0 &&
1563 cf_ip_addr_compare(&walker->if_addr, &sock_cfg->if_addr) == 0 &&
1564 walker->ttl == sock_cfg->ttl) {
1565 return 0;
1566 }
1567 }
1568
1569 cf_msock_cfg_copy(sock_cfg, &serv_cfg->cfgs[n]);
1570 serv_cfg->n_cfgs = ++n;
1571 return 0;
1572}
1573
1574int32_t
1575cf_socket_mcast_init(cf_mserv_cfg *cfg, cf_sockets *socks)
1576{
1577 int32_t res = -1;
1578
1579 if (cfg->n_cfgs < 1) {
1580 cf_warning(CF_SOCKET, "Missing multicast socket configuration");
1581 goto cleanup0;
1582 }
1583
1584 cf_debug(CF_SOCKET, "Initializing %u multicast socket(s)", cfg->n_cfgs);
1585 uint32_t n;
1586 cf_socket *sock;
1587
1588 for (n = 0; n < cfg->n_cfgs; ++n) {
1589 sock = &socks->socks[n];
1590
1591 if (cfg->cfgs[n].port == 0) {
1592 cf_warning(CF_SOCKET, "Missing multicast port");
1593 goto cleanup1;
1594 }
1595
1596 cf_sock_addr addr;
1597 cf_sock_addr_from_addr_port(&cfg->cfgs[n].addr, cfg->cfgs[n].port, &addr);
1598
1599 struct sockaddr_storage sas;
1600 cf_sock_addr_to_native(&addr, (struct sockaddr *)&sas);
1601
1602 cf_debug(CF_SOCKET, "Initializing multicast socket for %s", cf_sock_addr_print(&addr));
1603 int32_t fd = socket(sas.ss_family, SOCK_DGRAM, 0);
1604
1605 if (fd < 0) {
1606 cf_warning(CF_SOCKET, "Error while creating socket for %s: %d (%s)",
1607 cf_sock_addr_print(&addr), errno, cf_strerror(errno));
1608 goto cleanup1;
1609 }
1610
1611 cf_socket_init(sock);
1612 sock->fd = fd;
1613 fd = -1;
1614
1615 cf_socket_fix_client(sock);
1616 cf_socket_fix_server(sock);
1617
1618 static const int32_t yes = 1;
1619 safe_setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes));
1620
1621 if (!cf_ip_addr_is_any(&cfg->cfgs[n].if_addr)) {
1622 cf_info(CF_SOCKET, "Setting multicast interface address: %s",
1623 cf_ip_addr_print(&cfg->cfgs[n].if_addr));
1624
1625 if (cf_socket_mcast_set_inter(sock, &cfg->cfgs[n].if_addr) < 0) {
1626 cf_warning(CF_SOCKET, "Error while binding to interface %s",
1627 cf_ip_addr_print(&cfg->cfgs[n].if_addr));
1628 goto cleanup2;
1629 }
1630 }
1631
1632 uint8_t ttl = cfg->cfgs[n].ttl;
1633
1634 if (ttl > 0) {
1635 cf_info(CF_SOCKET, "Setting multicast TTL: %d", ttl);
1636
1637 if (cf_socket_mcast_set_ttl(sock, ttl) < 0) {
1638 cf_warning(CF_SOCKET, "Error while setting multicast TTL");
1639 goto cleanup2;
1640 }
1641 }
1642
1643 while (bind(sock->fd, (struct sockaddr *)&sas,
1644 cf_socket_addr_len((struct sockaddr *)&sas)) < 0) {
1645 if (errno != EADDRINUSE) {
1646 cf_warning(CF_SOCKET, "Error while binding to %s: %d (%s)",
1647 cf_sock_addr_print(&addr), errno, cf_strerror(errno));
1648 goto cleanup2;
1649 }
1650
1651 cf_warning(CF_SOCKET, "Socket %s in use, waiting", cf_sock_addr_print(&addr));
1652 usleep(5 * 1000 * 1000);
1653 }
1654
1655 cf_info(CF_SOCKET, "Joining multicast group: %s", cf_ip_addr_print(&addr.addr));
1656
1657 if (cf_socket_mcast_join_group(sock, &cfg->cfgs[n].if_addr, &addr.addr) < 0) {
1658 cf_warning(CF_SOCKET, "Error while joining multicast group %s",
1659 cf_ip_addr_print(&addr.addr));
1660 goto cleanup2;
1661 }
1662
1663 sock->cfg = &cfg->cfgs[n];
1664 }
1665
1666 socks->n_socks = n;
1667 res = 0;
1668 goto cleanup0;
1669
1670cleanup2:
1671 cf_socket_close(sock);
1672 cf_socket_term(sock);
1673
1674cleanup1:
1675 for (uint32_t i = 0; i < n; ++i) {
1676 cf_socket_close(&socks->socks[i]);
1677 cf_socket_term(&socks->socks[i]);
1678 }
1679
1680cleanup0:
1681 return res;
1682}
1683
1684void
1685cf_socket_mcast_show(cf_fault_context cont, const char *tag, const cf_sockets *socks)
1686{
1687 for (uint32_t i = 0; i < socks->n_socks; ++i) {
1688 cf_msock_cfg *cfg = socks->socks[i].cfg;
1689 cf_sock_addr addr;
1690 cf_sock_addr_from_addr_port(&cfg->if_addr, cfg->port, &addr);
1691 cf_info(cont, "Started %s endpoint %s", tag, cf_sock_addr_print(&addr));
1692 }
1693}
1694
1695// #define VERY_CHATTY
1696
1697void
1698cf_poll_create(cf_poll *poll)
1699{
1700 int32_t fd = epoll_create(1);
1701
1702 if (fd < 0) {
1703 cf_crash(CF_SOCKET, "Error while creating epoll instance: %d (%s)",
1704 errno, cf_strerror(errno));
1705 }
1706
1707 *poll = (cf_poll){ .fd = fd };
1708 cf_debug(CF_SOCKET, "Created new epoll instance with FD %d", fd);
1709}
1710
1711void
1712cf_poll_add_fd(cf_poll poll, int32_t fd, uint32_t events, void *data)
1713{
1714 cf_debug(CF_SOCKET,
1715 "Adding FD %d to epoll instance with FD %d, events = 0x%x",
1716 fd, poll.fd, events);
1717 struct epoll_event ev = { .events = events, .data.ptr = data };
1718
1719 if (epoll_ctl(poll.fd, EPOLL_CTL_ADD, fd, &ev) < 0) {
1720 cf_crash(CF_SOCKET,
1721 "Error while adding FD %d to epoll instance %d: %d (%s)",
1722 fd, poll.fd, errno, cf_strerror(errno));
1723 }
1724}
1725
1726void
1727cf_poll_add_socket(cf_poll poll, const cf_socket *sock, uint32_t events, void *data)
1728{
1729 cf_poll_add_fd(poll, sock->fd, events, data);
1730}
1731
1732int32_t
1733cf_poll_modify_socket_forgiving(cf_poll poll, const cf_socket *sock, uint32_t events, void *data,
1734 uint32_t n_err_ok, const int32_t *err_ok)
1735{
1736#if defined VERY_CHATTY
1737 cf_detail(CF_SOCKET, "Modifying FD %d in epoll instance with FD %d, events = 0x%x",
1738 sock->fd, poll.fd, events);
1739#endif
1740
1741 struct epoll_event ev = { .events = events, .data.ptr = data };
1742
1743 if (epoll_ctl(poll.fd, EPOLL_CTL_MOD, sock->fd, &ev) < 0) {
1744 for (uint32_t i = 0; i < n_err_ok; ++i) {
1745 if (errno == err_ok[i]) {
1746 return errno;
1747 }
1748 }
1749
1750 cf_crash(CF_SOCKET, "Error while modifying FD %d in epoll instance %d: %d (%s)",
1751 sock->fd, poll.fd, errno, cf_strerror(errno));
1752 }
1753
1754 return 0;
1755}
1756
1757int32_t
1758cf_poll_delete_socket_forgiving(cf_poll poll, const cf_socket *sock, uint32_t n_err_ok,
1759 int32_t *err_ok)
1760{
1761 cf_detail(CF_SOCKET, "Deleting FD %d from epoll instance with FD %d", sock->fd, poll.fd);
1762
1763 if (epoll_ctl(poll.fd, EPOLL_CTL_DEL, sock->fd, NULL) < 0) {
1764 for (uint32_t i = 0; i < n_err_ok; ++i) {
1765 if (errno == err_ok[i]) {
1766 return errno;
1767 }
1768 }
1769
1770 cf_crash(CF_SOCKET, "Error while deleting FD %d from epoll instance %d: %d (%s)",
1771 sock->fd, poll.fd, errno, cf_strerror(errno));
1772 }
1773
1774 return 0;
1775}
1776
1777void
1778cf_poll_add_sockets(cf_poll poll, cf_sockets *socks, uint32_t events)
1779{
1780 for (uint32_t i = 0; i < socks->n_socks; ++i) {
1781 cf_poll_add_socket(poll, &socks->socks[i], events, &socks->socks[i]);
1782 }
1783}
1784
1785void
1786cf_poll_delete_sockets(cf_poll poll, cf_sockets *socks)
1787{
1788 for (uint32_t i = 0; i < socks->n_socks; ++i) {
1789 cf_poll_delete_socket(poll, &socks->socks[i]);
1790 }
1791}
1792
1793int32_t
1794cf_poll_wait(cf_poll poll, cf_poll_event *events, int32_t limit, int32_t timeout)
1795{
1796#if defined VERY_CHATTY
1797 cf_detail(CF_SOCKET, "Waiting on epoll instance with FD %d", poll.fd);
1798#endif
1799
1800 while (true) {
1801 int32_t res = epoll_wait(poll.fd, (struct epoll_event *)events, limit, timeout);
1802
1803 if (res >= 0) {
1804#if defined VERY_CHATTY
1805 if (cf_fault_filter[CF_SOCKET] >= CF_DETAIL) {
1806 cf_detail(CF_SOCKET, "Epoll instance with FD %d reports %d event(s)", poll.fd, res);
1807
1808 for (int32_t i = 0; i < res; ++i) {
1809 cf_detail(CF_SOCKET, "Event #%d: 0x%x, %p",
1810 i, events[i].events, events[i].data);
1811 }
1812 }
1813#endif
1814
1815 return res;
1816 }
1817
1818 if (errno != EINTR) {
1819 cf_crash(CF_SOCKET, "Error while waiting for events on epoll instance %d: %d (%s)",
1820 poll.fd, errno, cf_strerror(errno));
1821 }
1822 }
1823}
1824
1825void
1826cf_poll_destroy(cf_poll poll)
1827{
1828 cf_debug(CF_SOCKET, "Destroying epoll instance with FD %d", poll.fd);
1829
1830 if (close(poll.fd) < 0) {
1831 cf_crash(CF_SOCKET, "Error while closing epoll instance: %d (%s)",
1832 errno, cf_strerror(errno));
1833 }
1834}
1835
1836#define RESP_SIZE (2 * 1024 * 1024)
1837#define MAX_INTERS 500
1838#define MAX_ADDRS 20
1839
1840typedef struct inter_entry_s {
1841 uint32_t index;
1842 char name[50];
1843 bool def_route;
1844 bool up;
1845 uint32_t mtu;
1846 uint32_t mac_addr_len;
1847 uint8_t mac_addr[50];
1848 uint32_t n_addrs;
1849 cf_ip_addr addrs[MAX_ADDRS];
1850
1851 union {
1852 struct inter_entry_s *entry;
1853 uint32_t index;
1854 } master;
1855
1856 struct inter_entry_s *phys;
1857} inter_entry;
1858
1859typedef struct inter_info_s {
1860 uint32_t n_inters;
1861 inter_entry inters[MAX_INTERS];
1862} inter_info;
1863
1864typedef struct inter_filter_s {
1865 bool allow_v6;
1866 bool def_route;
1867 bool up;
1868 const char *if_name;
1869} inter_filter;
1870
1871typedef struct cb_context_s {
1872 bool has_label;
1873 bool has_address;
1874 bool has_local;
1875 bool has_index;
1876 bool has_priority;
1877 char curr_label[50];
1878 cf_ip_addr curr_address;
1879 uint32_t curr_index;
1880 uint32_t curr_priority;
1881 bool allow_v6;
1882 inter_info *inter;
1883} cb_context;
1884
1885typedef void (*reset_cb)(cb_context *cont);
1886typedef void (*data_cb)(cb_context *cont, void *info, int32_t type, void *data, size_t len);
1887typedef void (*post_cb)(cb_context *cont);
1888
1889static int32_t
1890netlink_dump(int32_t type, int32_t filter1, int32_t filter2a, int32_t filter2b, int32_t filter2c,
1891 int32_t filter2d, size_t size, reset_cb reset_fn, data_cb data_fn, post_cb post_fn,
1892 cb_context *cont)
1893{
1894 int32_t res = -1;
1895 int32_t nls = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1896
1897 if (nls < 0) {
1898 cf_warning(CF_SOCKET, "Error while creating netlink socket: %d (%s)",
1899 errno, cf_strerror(errno));
1900 goto cleanup0;
1901 }
1902
1903 struct sockaddr_nl loc;
1904 memset(&loc, 0, sizeof(loc));
1905 loc.nl_family = AF_NETLINK;
1906
1907 if (bind(nls, (struct sockaddr *)&loc, sizeof(loc)) < 0) {
1908 cf_warning(CF_SOCKET, "Error while binding netlink socket: %d (%s)",
1909 errno, cf_strerror(errno));
1910 goto cleanup1;
1911 }
1912
1913 static cf_atomic32 seq = 0;
1914 struct {
1915 struct nlmsghdr h;
1916 struct rtgenmsg m;
1917 } req;
1918
1919 memset(&req, 0, sizeof(req));
1920 req.h.nlmsg_len = NLMSG_LENGTH(sizeof(req.m));
1921 req.h.nlmsg_type = type;
1922 req.h.nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
1923 req.h.nlmsg_seq = cf_atomic32_add(&seq, 1);
1924 req.m.rtgen_family = PF_UNSPEC;
1925
1926 struct sockaddr_nl rem;
1927 memset(&rem, 0, sizeof(rem));
1928 rem.nl_family = AF_NETLINK;
1929
1930 struct iovec iov;
1931 memset(&iov, 0, sizeof(iov));
1932 iov.iov_base = &req;
1933 iov.iov_len = req.h.nlmsg_len;
1934
1935 struct msghdr msg;
1936 memset(&msg, 0, sizeof(msg));
1937 msg.msg_iov = &iov;
1938 msg.msg_iovlen = 1;
1939 msg.msg_name = &rem;
1940 msg.msg_namelen = sizeof(rem);
1941
1942 if (sendmsg(nls, &msg, 0) < 0) {
1943 cf_warning(CF_SOCKET, "Error while sending netlink request: %d (%s)",
1944 errno, cf_strerror(errno));
1945 goto cleanup1;
1946 }
1947
1948 uint8_t *resp = cf_malloc(RESP_SIZE);
1949
1950 memset(resp, 0, RESP_SIZE);
1951 bool done = false;
1952
1953 while (!done) {
1954 memset(&rem, 0, sizeof(rem));
1955 memset(&iov, 0, sizeof(iov));
1956 iov.iov_base = resp;
1957 iov.iov_len = RESP_SIZE;
1958
1959 memset(&msg, 0, sizeof(msg));
1960 msg.msg_iov = &iov;
1961 msg.msg_iovlen = 1;
1962 msg.msg_name = &rem;
1963 msg.msg_namelen = sizeof(rem);
1964
1965 ssize_t len = recvmsg(nls, &msg, 0);
1966
1967 if (len < 0) {
1968 cf_warning(CF_SOCKET, "Error while receiving netlink response: %d (%s)",
1969 errno, cf_strerror(errno));
1970 goto cleanup2;
1971 }
1972
1973 if ((msg.msg_flags & MSG_TRUNC) != 0) {
1974 cf_warning(CF_SOCKET, "Received truncated netlink message");
1975 goto cleanup2;
1976 }
1977
1978 struct nlmsghdr *h = (struct nlmsghdr *)resp;
1979
1980 while (NLMSG_OK(h, len)) {
1981 if (h->nlmsg_type == NLMSG_NOOP) {
1982 h = NLMSG_NEXT(h, len);
1983 continue;
1984 }
1985
1986 if (h->nlmsg_type == NLMSG_ERROR) {
1987 int32_t *err = NLMSG_DATA(h);
1988 cf_warning(CF_SOCKET, "Received netlink error message: %d (%s)",
1989 -*err, cf_strerror(-*err));
1990 goto cleanup2;
1991 }
1992
1993 if (h->nlmsg_type == NLMSG_DONE) {
1994 done = true;
1995 break;
1996 }
1997
1998 if (h->nlmsg_type == NLMSG_OVERRUN) {
1999 cf_warning(CF_SOCKET, "Received netlink overrun message");
2000 goto cleanup2;
2001 }
2002
2003 if (h->nlmsg_type == filter1) {
2004 if (reset_fn != NULL) {
2005 reset_fn(cont);
2006 }
2007
2008 void *info = NLMSG_DATA(h);
2009 uint32_t a_len = h->nlmsg_len - NLMSG_LENGTH(size);
2010 struct rtattr *a = (struct rtattr *)((uint8_t *)info + NLMSG_ALIGN(size));
2011
2012 while (RTA_OK(a, a_len)) {
2013 if (a->rta_type == filter2a || a->rta_type == filter2b ||
2014 a->rta_type == filter2c || a->rta_type == filter2d) {
2015 data_fn(cont, info, a->rta_type, RTA_DATA(a), RTA_PAYLOAD(a));
2016 }
2017
2018 a = RTA_NEXT(a, a_len);
2019 }
2020
2021 if (post_fn != NULL) {
2022 post_fn(cont);
2023 }
2024 }
2025
2026 if ((h->nlmsg_flags & NLM_F_MULTI) == 0) {
2027 done = true;
2028 break;
2029 }
2030
2031 h = NLMSG_NEXT(h, len);
2032 }
2033 }
2034
2035 res = 0;
2036
2037cleanup2:
2038 cf_free(resp);
2039
2040cleanup1:
2041 close(nls);
2042
2043cleanup0:
2044 return res;
2045}
2046
2047static void
2048reset_fn(cb_context *cont)
2049{
2050 cont->has_label = false;
2051 cont->has_address = false;
2052 cont->has_local = false;
2053 cont->has_index = false;
2054 cont->has_priority = false;
2055 memset(&cont->curr_label, 0, sizeof(cont->curr_label));
2056 cf_ip_addr_set_any(&cont->curr_address);
2057 cont->curr_index = 0;
2058 cont->curr_priority = 0;
2059}
2060
2061static void
2062link_fn(cb_context *cont, void *info_, int32_t type, void *data, size_t len)
2063{
2064 struct ifinfomsg *info = info_;
2065 inter_info *inter = cont->inter;
2066 inter_entry *entry = NULL;
2067
2068 for (uint32_t i = 0; i < inter->n_inters; ++i) {
2069 if (inter->inters[i].index == info->ifi_index) {
2070 entry = &inter->inters[i];
2071 break;
2072 }
2073 }
2074
2075 if (entry == NULL) {
2076 uint32_t i = inter->n_inters;
2077
2078 if (i >= MAX_INTERS) {
2079 cf_crash(CF_SOCKET, "Too many interfaces");
2080 }
2081
2082 entry = &inter->inters[i];
2083 ++inter->n_inters;
2084
2085 entry->index = info->ifi_index;
2086 entry->up = (info->ifi_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING);
2087 }
2088
2089 if (type == IFLA_IFNAME) {
2090 if (len > sizeof(entry->name)) {
2091 cf_crash(CF_SOCKET, "Interface name too long: %s", (char *)data);
2092 }
2093
2094 // Length includes terminating NUL.
2095 memcpy(entry->name, data, len);
2096 cf_detail(CF_SOCKET, "Collected interface name %s", entry->name);
2097 }
2098 else if (type == IFLA_ADDRESS) {
2099 if (len > sizeof(entry->mac_addr)) {
2100 cf_crash(CF_SOCKET, "MAC address too long");
2101 }
2102
2103 entry->mac_addr_len = (uint32_t)len;
2104 memcpy(entry->mac_addr, data, len);
2105 }
2106 else if (type == IFLA_MTU) {
2107 if (len != 4) {
2108 cf_crash(CF_SOCKET, "MTU value has invalid length: %zu", len);
2109 }
2110
2111 memcpy(&entry->mtu, data, len);
2112 cf_detail(CF_SOCKET, "Collected interface MTU %s -> %u", entry->name, entry->mtu);
2113 }
2114 else if (type == IFLA_MASTER) {
2115 if (len != 4) {
2116 cf_crash(CF_SOCKET, "Master index has invalid length: %zu", len);
2117 }
2118
2119 memcpy(&entry->master.index, data, len);
2120 cf_detail(CF_SOCKET, "Collected interface master index %s -> %u",
2121 entry->name, entry->master.index);
2122 }
2123}
2124
2125static void
2126addr_fn(cb_context *cont, void *info_, int32_t type, void *data, size_t len)
2127{
2128 struct ifaddrmsg *info = info_;
2129
2130 if (cont->curr_index == 0) {
2131 cont->curr_index = info->ifa_index;
2132 }
2133
2134 if (type == IFA_LABEL) {
2135 if (len > sizeof(cont->curr_label)) {
2136 cf_crash(CF_SOCKET, "Interface label too long: %s", (char *)data);
2137 }
2138
2139 // Length includes terminating NUL.
2140 memcpy(cont->curr_label, data, len);
2141 cont->has_label = true;
2142 cf_detail(CF_SOCKET, "Collected interface label %s", cont->curr_label);
2143 }
2144 else if (type == IFA_ADDRESS) {
2145 // IFA_LOCAL takes precedence over IFA_ADDRESS.
2146 if (cont->has_local) {
2147 cf_detail(CF_SOCKET, "Prioritizing local address");
2148 return;
2149 }
2150
2151 if (cf_socket_parse_netlink(cont->allow_v6, info->ifa_family, info->ifa_flags,
2152 data, len, &cont->curr_address) < 0) {
2153 return;
2154 }
2155
2156 cont->has_address = true;
2157 cf_detail(CF_SOCKET, "Considering interface address %s",
2158 cf_ip_addr_print(&cont->curr_address));
2159 }
2160 else if (type == IFA_LOCAL) {
2161 if (cf_socket_parse_netlink(cont->allow_v6, info->ifa_family, info->ifa_flags,
2162 data, len, &cont->curr_address) < 0) {
2163 return;
2164 }
2165
2166 cont->has_local = true;
2167 cf_detail(CF_SOCKET, "Considering local interface address %s",
2168 cf_ip_addr_print(&cont->curr_address));
2169 }
2170}
2171
2172static void
2173addr_fix_fn(cb_context *cont)
2174{
2175 if (!cont->has_address && !cont->has_local) {
2176 return;
2177 }
2178
2179 inter_info *inter = cont->inter;
2180 inter_entry *by_index = NULL;
2181 inter_entry *by_label = NULL;
2182
2183 for (uint32_t i = 0; i < inter->n_inters; ++i) {
2184 if (inter->inters[i].index == cont->curr_index) {
2185 by_index = &inter->inters[i];
2186 break;
2187 }
2188 }
2189
2190 if (by_index == NULL) {
2191 cf_crash(CF_SOCKET, "Invalid interface index: %u", cont->curr_index);
2192 }
2193
2194 if (cont->has_label) {
2195 for (uint32_t i = 0; i < inter->n_inters; ++i) {
2196 if (strcmp(inter->inters[i].name, cont->curr_label) == 0) {
2197 by_label = &inter->inters[i];
2198 break;
2199 }
2200 }
2201
2202 if (by_label == NULL) {
2203 cf_detail(CF_SOCKET, "New interface for label %s", cont->curr_label);
2204 uint32_t i = inter->n_inters;
2205
2206 if (i >= MAX_INTERS) {
2207 cf_crash(CF_SOCKET, "Too many interfaces");
2208 }
2209
2210 by_label = &inter->inters[i];
2211 ++inter->n_inters;
2212
2213 by_label->index = by_index->index;
2214 by_label->up = by_index->up;
2215 memcpy(&by_label->mac_addr, &by_index->mac_addr, sizeof(by_label->mac_addr));
2216 by_label->mac_addr_len = by_index->mac_addr_len;
2217 by_label->mtu = by_index->mtu;
2218 by_label->phys = by_index;
2219
2220 memcpy(&by_label->name, cont->curr_label, sizeof(by_label->name));
2221 }
2222 }
2223
2224 inter_entry *entry = by_label != NULL ? by_label : by_index;
2225 uint32_t i = entry->n_addrs;
2226
2227 if (i >= MAX_ADDRS) {
2228 cf_crash(CF_SOCKET, "Too many addresses for interface %s", entry->name);
2229 }
2230
2231 cf_ip_addr *addr = &entry->addrs[i];
2232 cf_ip_addr_copy(&cont->curr_address, addr);
2233
2234 ++entry->n_addrs;
2235 cf_detail(CF_SOCKET, "Collected interface address %s -> %s",
2236 entry->name, cf_ip_addr_print(addr));
2237}
2238
2239static void
2240route_fn(cb_context *cont, void *info_, int32_t type, void *data, size_t len)
2241{
2242 struct rtmsg *info = info_;
2243
2244 // Ignore entries with RTM_F_CLONED, because they are route cache entries.
2245 if ((info->rtm_flags & RTM_F_CLONED) != 0) {
2246 return;
2247 }
2248
2249 if (type == RTA_DST) {
2250 if (cf_socket_parse_netlink(cont->allow_v6, info->rtm_family, 0,
2251 data, len, &cont->curr_address) < 0) {
2252 // If the address is not allowed, set to a non-zero address, because
2253 // zero means default route.
2254 cf_ip_addr_set_local(&cont->curr_address);
2255 }
2256
2257 cont->has_address = true;
2258 }
2259 else if (type == RTA_OIF) {
2260 if (len != 4) {
2261 cf_detail(CF_SOCKET, "Invalid interface index");
2262 return;
2263 }
2264
2265 cont->curr_index = *(uint32_t *)data;
2266 cont->has_index = true;
2267 }
2268 else if (type == RTA_PRIORITY) {
2269 if (len != 4) {
2270 cf_detail(CF_SOCKET, "Invalid route priority");
2271 return;
2272 }
2273
2274 cont->curr_priority = *(uint32_t *)data;
2275 cont->has_priority = true;
2276 }
2277}
2278
2279static void
2280route_fix_fn(cb_context *cont)
2281{
2282 // It's not a default route, if it has an address and the address isn't zero.
2283 if (cont->has_address && !cf_ip_addr_is_any(&cont->curr_address)) {
2284 return;
2285 }
2286
2287 // It's one of the catch-all entries.
2288 if (cont->has_priority && cont->curr_priority == UINT32_MAX) {
2289 return;
2290 }
2291
2292 // It doesn't have an interface index.
2293 if (!cont->has_index) {
2294 return;
2295 }
2296
2297 inter_info *inter = cont->inter;
2298 bool found = false;
2299
2300 for (uint32_t i = 0; i < inter->n_inters; ++i) {
2301 inter_entry *entry = &inter->inters[i];
2302
2303 if (inter->inters[i].index == cont->curr_index) {
2304 found = true;
2305 entry->def_route = true;
2306 cf_detail(CF_SOCKET, "Collected default route %s -> %s",
2307 entry->name, cf_ip_addr_print(&cont->curr_address));
2308 // Don't stop after the first match. Aliases share the same index.
2309 }
2310 }
2311
2312 if (!found) {
2313 cf_crash(CF_SOCKET, "Invalid interface index: %u", cont->curr_index);
2314 }
2315}
2316
2317static void
2318enumerate_inter(inter_info *inter, bool allow_v6)
2319{
2320 cb_context cont;
2321 memset(&cont, 0, sizeof(cont));
2322 cont.inter = inter;
2323 cont.allow_v6 = allow_v6;
2324
2325 reset_fn(&cont);
2326
2327 if (netlink_dump(RTM_GETLINK, RTM_NEWLINK, IFLA_IFNAME, IFLA_ADDRESS, IFLA_MTU, IFLA_MASTER,
2328 sizeof(struct ifinfomsg), NULL, link_fn, NULL, &cont) < 0) {
2329 cf_crash(CF_SOCKET, "Error while enumerating network links");
2330 }
2331
2332 if (netlink_dump(RTM_GETADDR, RTM_NEWADDR, IFA_LABEL, IFA_ADDRESS, IFA_LOCAL, -1,
2333 sizeof(struct ifaddrmsg), reset_fn, addr_fn, addr_fix_fn, &cont) < 0) {
2334 cf_crash(CF_SOCKET, "Error while enumerating network addresses");
2335 }
2336
2337 if (netlink_dump(RTM_GETROUTE, RTM_NEWROUTE, RTA_DST, RTA_OIF, RTA_PRIORITY, -1,
2338 sizeof(struct rtmsg), reset_fn, route_fn, route_fix_fn, &cont) < 0) {
2339 cf_crash(CF_SOCKET, "Error while enumerating network routes");
2340 }
2341
2342 for (int32_t i = 0; i < inter->n_inters; ++i) {
2343 inter_entry *entry = &inter->inters[i];
2344 cf_ip_addr_sort(entry->addrs, entry->n_addrs);
2345
2346 if (entry->master.index == 0) {
2347 entry->master.entry = NULL;
2348 continue;
2349 }
2350
2351 inter_entry *master = NULL;
2352
2353 for (int32_t k = 0; k < inter->n_inters; ++k) {
2354 inter_entry *cand = &inter->inters[k];
2355
2356 if (cand->index == entry->master.index) {
2357 master = cand;
2358 break;
2359 }
2360 }
2361
2362 if (master == NULL) {
2363 cf_crash(CF_SOCKET, "Invalid master index: %u", entry->master.index);
2364 }
2365
2366 entry->master.entry = master;
2367 }
2368
2369 if (cf_fault_filter[CF_SOCKET] >= CF_DETAIL) {
2370 cf_detail(CF_SOCKET, "%u interface(s)", inter->n_inters);
2371
2372 for (uint32_t i = 0; i < inter->n_inters; ++i) {
2373 inter_entry *entry = &inter->inters[i];
2374 cf_detail(CF_SOCKET, "Name = %s", entry->name);
2375 cf_detail(CF_SOCKET, "MAC address = %02x:%02x:%02x:%02x:%02x:%02x",
2376 entry->mac_addr[0], entry->mac_addr[1], entry->mac_addr[2],
2377 entry->mac_addr[3], entry->mac_addr[4], entry->mac_addr[5]);
2378 cf_detail(CF_SOCKET, "Default route = %d", (int32_t)entry->def_route);
2379 cf_detail(CF_SOCKET, "Up = %d", (int32_t)entry->up);
2380 cf_detail(CF_SOCKET, "MTU = %u", entry->mtu);
2381
2382 for (int32_t k = 0; k < entry->n_addrs; ++k) {
2383 cf_ip_addr *addr = &entry->addrs[k];
2384 cf_detail(CF_SOCKET, "Address = %s", cf_ip_addr_print(addr));
2385 }
2386
2387 cf_detail(CF_SOCKET, "Master = %s",
2388 entry->master.entry != NULL ? entry->master.entry->name : "(none)");
2389
2390 cf_detail(CF_SOCKET, "Physical = %s",
2391 entry->phys != NULL ? entry->phys->name : "(none)");
2392 }
2393 }
2394}
2395
2396static int32_t
2397inter_get_addr(cf_ip_addr *addrs, uint32_t *n_addrs, inter_filter *filter)
2398{
2399 inter_info inter;
2400 memset(&inter, 0, sizeof(inter));
2401 enumerate_inter(&inter, filter->allow_v6);
2402
2403 uint32_t count = 0;
2404
2405 for (uint32_t i = 0; i < inter.n_inters; ++i) {
2406 inter_entry *entry = &inter.inters[i];
2407
2408 if (filter->def_route && !entry->def_route) {
2409 continue;
2410 }
2411
2412 if (filter->up && !entry->up) {
2413 continue;
2414 }
2415
2416 if (filter->if_name != NULL && strcmp(filter->if_name, entry->name) != 0) {
2417 continue;
2418 }
2419
2420 for (uint32_t k = 0; k < entry->n_addrs; ++k) {
2421 cf_ip_addr *addr = &entry->addrs[k];
2422
2423 if (count >= *n_addrs) {
2424 cf_warning(CF_SOCKET, "Buffer overflow while enumerating interface addresses");
2425 return -1;
2426 }
2427
2428 cf_ip_addr_copy(addr, &addrs[count]);
2429 ++count;
2430 }
2431 }
2432
2433 *n_addrs = count;
2434 return 0;
2435}
2436
2437int32_t
2438cf_inter_get_addr_all(cf_ip_addr *addrs, uint32_t *n_addrs)
2439{
2440 static inter_filter filter = {
2441 .allow_v6 = true, .def_route = false, .up = true, .if_name = NULL
2442 };
2443
2444 return inter_get_addr(addrs, n_addrs, &filter);
2445}
2446
2447int32_t
2448cf_inter_get_addr_all_legacy(cf_ip_addr *addrs, uint32_t *n_addrs)
2449{
2450 static inter_filter filter = {
2451 .allow_v6 = false, .def_route = false, .up = true, .if_name = NULL
2452 };
2453
2454 return inter_get_addr(addrs, n_addrs, &filter);
2455}
2456
2457int32_t
2458cf_inter_get_addr_def(cf_ip_addr *addrs, uint32_t *n_addrs)
2459{
2460 static inter_filter filter = {
2461 .allow_v6 = true, .def_route = true, .up = true, .if_name = NULL
2462 };
2463
2464 return inter_get_addr(addrs, n_addrs, &filter);
2465}
2466
2467int32_t
2468cf_inter_get_addr_def_legacy(cf_ip_addr *addrs, uint32_t *n_addrs)
2469{
2470 static inter_filter filter = {
2471 .allow_v6 = false, .def_route = true, .up = true, .if_name = NULL
2472 };
2473
2474 return inter_get_addr(addrs, n_addrs, &filter);
2475}
2476
2477int32_t
2478cf_inter_get_addr_name(cf_ip_addr *addrs, uint32_t *n_addrs, const char *if_name)
2479{
2480 inter_filter filter = {
2481 .allow_v6 = true, .def_route = false, .up = false, .if_name = if_name
2482 };
2483
2484 return inter_get_addr(addrs, n_addrs, &filter);
2485}
2486
2487bool
2488cf_inter_is_inter_name(const char *if_name)
2489{
2490 inter_info inter;
2491 memset(&inter, 0, sizeof(inter));
2492 enumerate_inter(&inter, true);
2493
2494 for (uint32_t i = 0; i < inter.n_inters; ++i) {
2495 if (strcmp(inter.inters[i].name, if_name) == 0) {
2496 return true;
2497 }
2498 }
2499
2500 return false;
2501}
2502
2503int32_t
2504cf_inter_addr_to_index_and_name(const cf_ip_addr *addr, int32_t *index, char **name)
2505{
2506 inter_info inter;
2507 memset(&inter, 0, sizeof(inter));
2508 enumerate_inter(&inter, true);
2509
2510 for (uint32_t i = 0; i < inter.n_inters; ++i) {
2511 inter_entry *entry = &inter.inters[i];
2512
2513 for (uint32_t k = 0; k < entry->n_addrs; ++k) {
2514 if (cf_ip_addr_compare(&entry->addrs[k], addr) == 0) {
2515 if (name != NULL) {
2516 *name = cf_strdup(entry->name);
2517 }
2518
2519 if (index != NULL) {
2520 *index = (int32_t)entry->index;
2521 }
2522
2523 return 0;
2524 }
2525 }
2526 }
2527
2528 return -1;
2529}
2530
2531int32_t
2532cf_inter_get_physical(const char *if_name, char *phys_name,
2533 uint32_t phys_name_sz)
2534{
2535 inter_info inter;
2536 memset(&inter, 0, sizeof(inter));
2537 enumerate_inter(&inter, true);
2538
2539 inter_entry *entry = NULL;
2540
2541 for (uint32_t i = 0; i < inter.n_inters; ++i) {
2542 if (strcmp(inter.inters[i].name, if_name) == 0) {
2543 entry = &inter.inters[i];
2544 break;
2545 }
2546 }
2547
2548 if (entry == NULL) {
2549 return -1;
2550 }
2551
2552 const char *name = entry->phys != NULL ? entry->phys->name : entry->name;
2553
2554 if (strlen(name) >= phys_name_sz) {
2555 cf_crash(CF_SOCKET, "Name buffer overflow");
2556 }
2557
2558 strcpy(phys_name, name);
2559 return 0;
2560}
2561
2562void
2563cf_inter_expand_bond(const char *if_name, char **out_names, uint32_t *n_out)
2564{
2565 inter_info inter;
2566 memset(&inter, 0, sizeof(inter));
2567 enumerate_inter(&inter, true);
2568
2569 uint32_t n = 0;
2570
2571 for (uint32_t i = 0; i < inter.n_inters; ++i) {
2572 inter_entry *entry = &inter.inters[i];
2573
2574 if (entry->master.entry == NULL || strcmp(entry->master.entry->name, if_name) != 0) {
2575 continue;
2576 }
2577
2578 if (n >= *n_out) {
2579 cf_crash(CF_SOCKET, "Output buffer overflow");
2580 }
2581
2582 out_names[n] = cf_strdup(entry->name);
2583 ++n;
2584 }
2585
2586 if (n == 0) {
2587 out_names[0] = cf_strdup(if_name);
2588 n = 1;
2589 }
2590
2591 *n_out = n;
2592}
2593
2594int32_t
2595cf_inter_mtu(const cf_ip_addr *inter_addr)
2596{
2597 inter_info inter;
2598 memset(&inter, 0, sizeof(inter));
2599 enumerate_inter(&inter, true);
2600
2601 for (uint32_t i = 0; i < inter.n_inters; ++i) {
2602 inter_entry *entry = &inter.inters[i];
2603
2604 for (uint32_t k = 0; k < entry->n_addrs; ++k) {
2605 cf_ip_addr *entry_addr = &entry->addrs[k];
2606
2607 if (cf_ip_addr_compare(inter_addr, entry_addr) == 0) {
2608 return entry->mtu;
2609 }
2610 }
2611 }
2612
2613 return -1;
2614}
2615
2616int32_t
2617cf_inter_min_mtu(void)
2618{
2619 uint32_t min = UINT32_MAX;
2620 inter_info inter;
2621 memset(&inter, 0, sizeof(inter));
2622 enumerate_inter(&inter, true);
2623
2624 for (uint32_t i = 0; i < inter.n_inters; ++i) {
2625 inter_entry *entry = &inter.inters[i];
2626
2627 if (entry->up && entry->mtu < min) {
2628 min = entry->mtu;
2629 }
2630 }
2631
2632 return (int32_t)min;
2633}
2634
2635static bool
2636detect_changes(bool legacy, cf_ip_addr *addrs, uint32_t *n_addrs, uint32_t limit)
2637{
2638 cf_ip_addr curr[CF_SOCK_CFG_MAX];
2639 uint32_t n_curr = CF_SOCK_CFG_MAX;
2640 int32_t res;
2641
2642 if (legacy) {
2643 res = cf_inter_get_addr_all_legacy(curr, &n_curr);
2644 }
2645 else {
2646 res = cf_inter_get_addr_all(curr, &n_curr);
2647 }
2648
2649 if (res < 0) {
2650 cf_crash(AS_INFO, "Error while getting interface addresses");
2651 }
2652
2653 if (n_curr > limit) {
2654 cf_crash(AS_INFO, "Too many network interface addresses: %d", n_curr);
2655 }
2656
2657 cf_ip_addr_sort(curr, n_curr);
2658 uint32_t n_filter = 0;
2659
2660 for (uint32_t i = 0; i < n_curr; ++i) {
2661 if (cf_ip_addr_is_local(&curr[i])) {
2662 continue;
2663 }
2664
2665 if (i > n_filter) {
2666 cf_ip_addr_copy(&curr[i], &curr[n_filter]);
2667 }
2668
2669 ++n_filter;
2670 }
2671
2672 n_curr = n_filter;
2673 bool change = false;
2674
2675 if (n_curr != *n_addrs) {
2676 change = true;
2677 }
2678 else {
2679 for (uint32_t i = 0; i < n_curr; ++i) {
2680 if (cf_ip_addr_compare(&addrs[i], &curr[i]) != 0) {
2681 change = true;
2682 break;
2683 }
2684 }
2685 }
2686
2687 if (change) {
2688 for (uint32_t i = 0; i < n_curr; ++i) {
2689 cf_ip_addr_copy(&curr[i], &addrs[i]);
2690 }
2691
2692 *n_addrs = n_curr;
2693 }
2694
2695 return change;
2696}
2697
2698bool
2699cf_inter_detect_changes(cf_ip_addr *addrs, uint32_t *n_addrs, uint32_t limit)
2700{
2701 return detect_changes(false, addrs, n_addrs, limit);
2702}
2703
2704bool
2705cf_inter_detect_changes_legacy(cf_ip_addr *addrs, uint32_t *n_addrs, uint32_t limit)
2706{
2707 return detect_changes(true, addrs, n_addrs, limit);
2708}
2709
2710static const char *if_in_order[] = {
2711 "eth", "bond", "wlan",
2712 NULL
2713};
2714
2715static const char *if_default[] = {
2716 "^eth[[:digit:]]+$", "^bond[[:digit:]]+$", "^wlan[[:digit:]]+$",
2717 "^em[[:digit:]]+_[[:digit:]]+$", "^p[[:digit:]]+p[[:digit:]]+_[[:digit:]]+$",
2718 NULL
2719};
2720
2721static const char *if_default2[] = {
2722 "^em[[:digit:]]+$", "^p[[:digit:]]+p[[:digit:]]+$", NULL
2723};
2724
2725static const char *if_any[] = {
2726 "^.*$",
2727 NULL
2728};
2729
2730static bool
2731validate_inter(inter_entry *entry)
2732{
2733 cf_debug(CF_SOCKET, "Validating interface %s", entry->name);
2734
2735 if (entry->n_addrs == 0) {
2736 cf_debug(CF_SOCKET, "No IP addresses");
2737 return false;
2738 }
2739
2740 if (entry->mac_addr_len < 6) {
2741 cf_debug(CF_SOCKET, "Invalid MAC address length: %d", entry->mac_addr_len);
2742 return false;
2743 }
2744
2745 static const uint8_t all0[6] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
2746 static const uint8_t all1[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
2747
2748 if (memcmp(entry->mac_addr, all0, 6) == 0 || memcmp(entry->mac_addr, all1, 6) == 0) {
2749 cf_debug(CF_SOCKET, "Invalid MAC address: %02x:%02x:%02x:%02x:%02x:%02x",
2750 entry->mac_addr[0], entry->mac_addr[1], entry->mac_addr[2],
2751 entry->mac_addr[3], entry->mac_addr[4], entry->mac_addr[5]);
2752 return false;
2753 }
2754
2755 cf_debug(CF_SOCKET, "Interface OK");
2756 return true;
2757}
2758
2759static inter_entry *
2760find_inter(inter_info *inter, const char *name, bool validate)
2761{
2762 cf_debug(CF_SOCKET, "Looking for %s", name);
2763
2764 for (uint32_t i = 0; i < inter->n_inters; ++i) {
2765 inter_entry *entry = &inter->inters[i];
2766 cf_debug(CF_SOCKET, "Checking %s", entry->name);
2767
2768 if (strcmp(entry->name, name) == 0 && (!validate || validate_inter(entry))) {
2769 return entry;
2770 }
2771 }
2772
2773 return NULL;
2774}
2775
2776static inter_entry *
2777match_inter(inter_info *inter, const char **patterns)
2778{
2779 for (uint32_t i = 0; i < inter->n_inters; ++i) {
2780 inter_entry *entry = &inter->inters[i];
2781 cf_debug(CF_SOCKET, "Matching %s", entry->name);
2782
2783 for (uint32_t k = 0; patterns[k] != NULL; ++k) {
2784 cf_debug(CF_SOCKET, "Matching with %s", patterns[k]);
2785 regex_t rex;
2786
2787 if (regcomp(&rex, patterns[k], REG_EXTENDED | REG_NOSUB) != 0) {
2788 cf_crash(CF_SOCKET, "Error while compiling regular expression %s", patterns[k]);
2789 }
2790
2791 bool ok = regexec(&rex, entry->name, 0, NULL, 0) == 0 && validate_inter(entry);
2792 regfree(&rex);
2793
2794 if (ok) {
2795 return entry;
2796 }
2797 }
2798 }
2799
2800 return NULL;
2801}
2802
2803int32_t
2804cf_node_id_get(cf_ip_port port, const char *if_hint, cf_node *id)
2805{
2806 cf_debug(CF_SOCKET, "Getting node ID");
2807 inter_info inter;
2808 memset(&inter, 0, sizeof(inter));
2809 enumerate_inter(&inter, true);
2810
2811 inter_entry *entry;
2812
2813 if (if_hint != NULL) {
2814 cf_debug(CF_SOCKET, "Checking user-specified interface %s", if_hint);
2815 entry = find_inter(&inter, if_hint, false);
2816
2817 if (entry != NULL) {
2818 goto success;
2819 }
2820
2821 cf_warning(CF_SOCKET, "Unable to find interface %s specified in configuration file",
2822 if_hint);
2823 return -1;
2824 }
2825
2826 cf_debug(CF_SOCKET, "Trying default interfaces in order");
2827
2828 for (int32_t i = 0; if_in_order[i] != NULL; ++i) {
2829 for (int32_t k = 0; k < 11; ++k) {
2830 char tmp[100];
2831 snprintf(tmp, sizeof(tmp), "%s%d", if_in_order[i], k);
2832 entry = find_inter(&inter, tmp, true);
2833
2834 if (entry != NULL) {
2835 goto success;
2836 }
2837 }
2838 }
2839
2840 cf_debug(CF_SOCKET, "Trying default interfaces");
2841 entry = match_inter(&inter, if_default);
2842
2843 if (entry != NULL) {
2844 goto success;
2845 }
2846
2847 cf_debug(CF_SOCKET, "Trying secondary default interfaces");
2848 entry = match_inter(&inter, if_default2);
2849
2850 if (entry != NULL) {
2851 goto success;
2852 }
2853
2854 cf_debug(CF_SOCKET, "Trying any interface");
2855 entry = match_inter(&inter, if_any);
2856
2857 if (entry != NULL) {
2858 goto success;
2859 }
2860
2861 cf_warning(CF_SOCKET, "Unable to find any suitable network device for node ID");
2862 return -1;
2863
2864success:
2865 ;
2866 uint8_t *buff = (uint8_t *)id;
2867
2868 if (entry->mac_addr_len == 6) {
2869 memcpy(buff, entry->mac_addr, 6);
2870 }
2871 else {
2872 cf_digest dig;
2873 cf_digest_compute(entry->mac_addr, entry->mac_addr_len, &dig);
2874 memcpy(buff, dig.digest, 6);
2875 }
2876
2877 memcpy(buff + 6, &port, 2);
2878
2879 cf_info(CF_SOCKET, "Node port %d, node ID %" PRIx64, port, *id);
2880 return 0;
2881}
2882