1 | /* |
2 | * Copyright 6WIND S.A., 2014 |
3 | * |
4 | * This work is licensed under the terms of the GNU GPL, version 2 or |
5 | * (at your option) any later version. See the COPYING file in the |
6 | * top-level directory. |
7 | */ |
8 | #include "qemu/osdep.h" |
9 | #include "qemu/host-utils.h" |
10 | #include "qemu/sockets.h" |
11 | |
12 | #include <sys/socket.h> |
13 | #include <sys/un.h> |
14 | |
15 | #include "ivshmem-server.h" |
16 | |
17 | /* log a message on stdout if verbose=1 */ |
18 | #define IVSHMEM_SERVER_DEBUG(server, fmt, ...) do { \ |
19 | if ((server)->verbose) { \ |
20 | printf(fmt, ## __VA_ARGS__); \ |
21 | } \ |
22 | } while (0) |
23 | |
24 | /** maximum size of a huge page, used by ivshmem_server_ftruncate() */ |
25 | #define IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE (1024 * 1024 * 1024) |
26 | |
27 | /** default listen backlog (number of sockets not accepted) */ |
28 | #define IVSHMEM_SERVER_LISTEN_BACKLOG 10 |
29 | |
30 | /* send message to a client unix socket */ |
31 | static int |
32 | ivshmem_server_send_one_msg(int sock_fd, int64_t peer_id, int fd) |
33 | { |
34 | int ret; |
35 | struct msghdr msg; |
36 | struct iovec iov[1]; |
37 | union { |
38 | struct cmsghdr cmsg; |
39 | char control[CMSG_SPACE(sizeof(int))]; |
40 | } msg_control; |
41 | struct cmsghdr *cmsg; |
42 | |
43 | peer_id = GINT64_TO_LE(peer_id); |
44 | iov[0].iov_base = &peer_id; |
45 | iov[0].iov_len = sizeof(peer_id); |
46 | |
47 | memset(&msg, 0, sizeof(msg)); |
48 | msg.msg_iov = iov; |
49 | msg.msg_iovlen = 1; |
50 | |
51 | /* if fd is specified, add it in a cmsg */ |
52 | if (fd >= 0) { |
53 | memset(&msg_control, 0, sizeof(msg_control)); |
54 | msg.msg_control = &msg_control; |
55 | msg.msg_controllen = sizeof(msg_control); |
56 | cmsg = CMSG_FIRSTHDR(&msg); |
57 | cmsg->cmsg_level = SOL_SOCKET; |
58 | cmsg->cmsg_type = SCM_RIGHTS; |
59 | cmsg->cmsg_len = CMSG_LEN(sizeof(int)); |
60 | memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd)); |
61 | } |
62 | |
63 | ret = sendmsg(sock_fd, &msg, 0); |
64 | if (ret <= 0) { |
65 | return -1; |
66 | } |
67 | |
68 | return 0; |
69 | } |
70 | |
71 | /* free a peer when the server advertises a disconnection or when the |
72 | * server is freed */ |
73 | static void |
74 | ivshmem_server_free_peer(IvshmemServer *server, IvshmemServerPeer *peer) |
75 | { |
76 | unsigned vector; |
77 | IvshmemServerPeer *other_peer; |
78 | |
79 | IVSHMEM_SERVER_DEBUG(server, "free peer %" PRId64 "\n" , peer->id); |
80 | close(peer->sock_fd); |
81 | QTAILQ_REMOVE(&server->peer_list, peer, next); |
82 | |
83 | /* advertise the deletion to other peers */ |
84 | QTAILQ_FOREACH(other_peer, &server->peer_list, next) { |
85 | ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id, -1); |
86 | } |
87 | |
88 | for (vector = 0; vector < peer->vectors_count; vector++) { |
89 | event_notifier_cleanup(&peer->vectors[vector]); |
90 | } |
91 | |
92 | g_free(peer); |
93 | } |
94 | |
95 | /* send the peer id and the shm_fd just after a new client connection */ |
96 | static int |
97 | ivshmem_server_send_initial_info(IvshmemServer *server, IvshmemServerPeer *peer) |
98 | { |
99 | int ret; |
100 | |
101 | /* send our protocol version first */ |
102 | ret = ivshmem_server_send_one_msg(peer->sock_fd, IVSHMEM_PROTOCOL_VERSION, |
103 | -1); |
104 | if (ret < 0) { |
105 | IVSHMEM_SERVER_DEBUG(server, "cannot send version: %s\n" , |
106 | strerror(errno)); |
107 | return -1; |
108 | } |
109 | |
110 | /* send the peer id to the client */ |
111 | ret = ivshmem_server_send_one_msg(peer->sock_fd, peer->id, -1); |
112 | if (ret < 0) { |
113 | IVSHMEM_SERVER_DEBUG(server, "cannot send peer id: %s\n" , |
114 | strerror(errno)); |
115 | return -1; |
116 | } |
117 | |
118 | /* send the shm_fd */ |
119 | ret = ivshmem_server_send_one_msg(peer->sock_fd, -1, server->shm_fd); |
120 | if (ret < 0) { |
121 | IVSHMEM_SERVER_DEBUG(server, "cannot send shm fd: %s\n" , |
122 | strerror(errno)); |
123 | return -1; |
124 | } |
125 | |
126 | return 0; |
127 | } |
128 | |
129 | /* handle message on listening unix socket (new client connection) */ |
130 | static int |
131 | ivshmem_server_handle_new_conn(IvshmemServer *server) |
132 | { |
133 | IvshmemServerPeer *peer, *other_peer; |
134 | struct sockaddr_un unaddr; |
135 | socklen_t unaddr_len; |
136 | int newfd; |
137 | unsigned i; |
138 | |
139 | /* accept the incoming connection */ |
140 | unaddr_len = sizeof(unaddr); |
141 | newfd = qemu_accept(server->sock_fd, |
142 | (struct sockaddr *)&unaddr, &unaddr_len); |
143 | |
144 | if (newfd < 0) { |
145 | IVSHMEM_SERVER_DEBUG(server, "cannot accept() %s\n" , strerror(errno)); |
146 | return -1; |
147 | } |
148 | |
149 | qemu_set_nonblock(newfd); |
150 | IVSHMEM_SERVER_DEBUG(server, "accept()=%d\n" , newfd); |
151 | |
152 | /* allocate new structure for this peer */ |
153 | peer = g_malloc0(sizeof(*peer)); |
154 | peer->sock_fd = newfd; |
155 | |
156 | /* get an unused peer id */ |
157 | /* XXX: this could use id allocation such as Linux IDA, or simply |
158 | * a free-list */ |
159 | for (i = 0; i < G_MAXUINT16; i++) { |
160 | if (ivshmem_server_search_peer(server, server->cur_id) == NULL) { |
161 | break; |
162 | } |
163 | server->cur_id++; |
164 | } |
165 | if (i == G_MAXUINT16) { |
166 | IVSHMEM_SERVER_DEBUG(server, "cannot allocate new client id\n" ); |
167 | close(newfd); |
168 | g_free(peer); |
169 | return -1; |
170 | } |
171 | peer->id = server->cur_id++; |
172 | |
173 | /* create eventfd, one per vector */ |
174 | peer->vectors_count = server->n_vectors; |
175 | for (i = 0; i < peer->vectors_count; i++) { |
176 | if (event_notifier_init(&peer->vectors[i], FALSE) < 0) { |
177 | IVSHMEM_SERVER_DEBUG(server, "cannot create eventfd\n" ); |
178 | goto fail; |
179 | } |
180 | } |
181 | |
182 | /* send peer id and shm fd */ |
183 | if (ivshmem_server_send_initial_info(server, peer) < 0) { |
184 | IVSHMEM_SERVER_DEBUG(server, "cannot send initial info\n" ); |
185 | goto fail; |
186 | } |
187 | |
188 | /* advertise the new peer to others */ |
189 | QTAILQ_FOREACH(other_peer, &server->peer_list, next) { |
190 | for (i = 0; i < peer->vectors_count; i++) { |
191 | ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id, |
192 | peer->vectors[i].wfd); |
193 | } |
194 | } |
195 | |
196 | /* advertise the other peers to the new one */ |
197 | QTAILQ_FOREACH(other_peer, &server->peer_list, next) { |
198 | for (i = 0; i < peer->vectors_count; i++) { |
199 | ivshmem_server_send_one_msg(peer->sock_fd, other_peer->id, |
200 | other_peer->vectors[i].wfd); |
201 | } |
202 | } |
203 | |
204 | /* advertise the new peer to itself */ |
205 | for (i = 0; i < peer->vectors_count; i++) { |
206 | ivshmem_server_send_one_msg(peer->sock_fd, peer->id, |
207 | event_notifier_get_fd(&peer->vectors[i])); |
208 | } |
209 | |
210 | QTAILQ_INSERT_TAIL(&server->peer_list, peer, next); |
211 | IVSHMEM_SERVER_DEBUG(server, "new peer id = %" PRId64 "\n" , |
212 | peer->id); |
213 | return 0; |
214 | |
215 | fail: |
216 | while (i--) { |
217 | event_notifier_cleanup(&peer->vectors[i]); |
218 | } |
219 | close(newfd); |
220 | g_free(peer); |
221 | return -1; |
222 | } |
223 | |
224 | /* Try to ftruncate a file to next power of 2 of shmsize. |
225 | * If it fails; all power of 2 above shmsize are tested until |
226 | * we reach the maximum huge page size. This is useful |
227 | * if the shm file is in a hugetlbfs that cannot be truncated to the |
228 | * shm_size value. */ |
229 | static int |
230 | ivshmem_server_ftruncate(int fd, unsigned shmsize) |
231 | { |
232 | int ret; |
233 | struct stat mapstat; |
234 | |
235 | /* align shmsize to next power of 2 */ |
236 | shmsize = pow2ceil(shmsize); |
237 | |
238 | if (fstat(fd, &mapstat) != -1 && mapstat.st_size == shmsize) { |
239 | return 0; |
240 | } |
241 | |
242 | while (shmsize <= IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE) { |
243 | ret = ftruncate(fd, shmsize); |
244 | if (ret == 0) { |
245 | return ret; |
246 | } |
247 | shmsize *= 2; |
248 | } |
249 | |
250 | return -1; |
251 | } |
252 | |
253 | /* Init a new ivshmem server */ |
254 | int |
255 | ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path, |
256 | const char *shm_path, bool use_shm_open, |
257 | size_t shm_size, unsigned n_vectors, |
258 | bool verbose) |
259 | { |
260 | int ret; |
261 | |
262 | memset(server, 0, sizeof(*server)); |
263 | server->verbose = verbose; |
264 | |
265 | ret = snprintf(server->unix_sock_path, sizeof(server->unix_sock_path), |
266 | "%s" , unix_sock_path); |
267 | if (ret < 0 || ret >= sizeof(server->unix_sock_path)) { |
268 | IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n" ); |
269 | return -1; |
270 | } |
271 | ret = snprintf(server->shm_path, sizeof(server->shm_path), |
272 | "%s" , shm_path); |
273 | if (ret < 0 || ret >= sizeof(server->shm_path)) { |
274 | IVSHMEM_SERVER_DEBUG(server, "could not copy shm path\n" ); |
275 | return -1; |
276 | } |
277 | |
278 | server->use_shm_open = use_shm_open; |
279 | server->shm_size = shm_size; |
280 | server->n_vectors = n_vectors; |
281 | |
282 | QTAILQ_INIT(&server->peer_list); |
283 | |
284 | return 0; |
285 | } |
286 | |
287 | /* open shm, create and bind to the unix socket */ |
288 | int |
289 | ivshmem_server_start(IvshmemServer *server) |
290 | { |
291 | struct sockaddr_un sun; |
292 | int shm_fd, sock_fd, ret; |
293 | |
294 | /* open shm file */ |
295 | if (server->use_shm_open) { |
296 | IVSHMEM_SERVER_DEBUG(server, "Using POSIX shared memory: %s\n" , |
297 | server->shm_path); |
298 | shm_fd = shm_open(server->shm_path, O_CREAT | O_RDWR, S_IRWXU); |
299 | } else { |
300 | gchar *filename = g_strdup_printf("%s/ivshmem.XXXXXX" , server->shm_path); |
301 | IVSHMEM_SERVER_DEBUG(server, "Using file-backed shared memory: %s\n" , |
302 | server->shm_path); |
303 | shm_fd = mkstemp(filename); |
304 | unlink(filename); |
305 | g_free(filename); |
306 | } |
307 | |
308 | if (shm_fd < 0) { |
309 | fprintf(stderr, "cannot open shm file %s: %s\n" , server->shm_path, |
310 | strerror(errno)); |
311 | return -1; |
312 | } |
313 | if (ivshmem_server_ftruncate(shm_fd, server->shm_size) < 0) { |
314 | fprintf(stderr, "ftruncate(%s) failed: %s\n" , server->shm_path, |
315 | strerror(errno)); |
316 | goto err_close_shm; |
317 | } |
318 | |
319 | IVSHMEM_SERVER_DEBUG(server, "create & bind socket %s\n" , |
320 | server->unix_sock_path); |
321 | |
322 | /* create the unix listening socket */ |
323 | sock_fd = socket(AF_UNIX, SOCK_STREAM, 0); |
324 | if (sock_fd < 0) { |
325 | IVSHMEM_SERVER_DEBUG(server, "cannot create socket: %s\n" , |
326 | strerror(errno)); |
327 | goto err_close_shm; |
328 | } |
329 | |
330 | sun.sun_family = AF_UNIX; |
331 | ret = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s" , |
332 | server->unix_sock_path); |
333 | if (ret < 0 || ret >= sizeof(sun.sun_path)) { |
334 | IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n" ); |
335 | goto err_close_sock; |
336 | } |
337 | if (bind(sock_fd, (struct sockaddr *)&sun, sizeof(sun)) < 0) { |
338 | IVSHMEM_SERVER_DEBUG(server, "cannot connect to %s: %s\n" , sun.sun_path, |
339 | strerror(errno)); |
340 | goto err_close_sock; |
341 | } |
342 | |
343 | if (listen(sock_fd, IVSHMEM_SERVER_LISTEN_BACKLOG) < 0) { |
344 | IVSHMEM_SERVER_DEBUG(server, "listen() failed: %s\n" , strerror(errno)); |
345 | goto err_close_sock; |
346 | } |
347 | |
348 | server->sock_fd = sock_fd; |
349 | server->shm_fd = shm_fd; |
350 | |
351 | return 0; |
352 | |
353 | err_close_sock: |
354 | close(sock_fd); |
355 | err_close_shm: |
356 | close(shm_fd); |
357 | return -1; |
358 | } |
359 | |
360 | /* close connections to clients, the unix socket and the shm fd */ |
361 | void |
362 | ivshmem_server_close(IvshmemServer *server) |
363 | { |
364 | IvshmemServerPeer *peer, *npeer; |
365 | |
366 | IVSHMEM_SERVER_DEBUG(server, "close server\n" ); |
367 | |
368 | QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, npeer) { |
369 | ivshmem_server_free_peer(server, peer); |
370 | } |
371 | |
372 | unlink(server->unix_sock_path); |
373 | close(server->sock_fd); |
374 | close(server->shm_fd); |
375 | server->sock_fd = -1; |
376 | server->shm_fd = -1; |
377 | } |
378 | |
379 | /* get the fd_set according to the unix socket and the peer list */ |
380 | void |
381 | ivshmem_server_get_fds(const IvshmemServer *server, fd_set *fds, int *maxfd) |
382 | { |
383 | IvshmemServerPeer *peer; |
384 | |
385 | if (server->sock_fd == -1) { |
386 | return; |
387 | } |
388 | |
389 | FD_SET(server->sock_fd, fds); |
390 | if (server->sock_fd >= *maxfd) { |
391 | *maxfd = server->sock_fd + 1; |
392 | } |
393 | |
394 | QTAILQ_FOREACH(peer, &server->peer_list, next) { |
395 | FD_SET(peer->sock_fd, fds); |
396 | if (peer->sock_fd >= *maxfd) { |
397 | *maxfd = peer->sock_fd + 1; |
398 | } |
399 | } |
400 | } |
401 | |
402 | /* process incoming messages on the sockets in fd_set */ |
403 | int |
404 | ivshmem_server_handle_fds(IvshmemServer *server, fd_set *fds, int maxfd) |
405 | { |
406 | IvshmemServerPeer *peer, *peer_next; |
407 | |
408 | if (server->sock_fd < maxfd && FD_ISSET(server->sock_fd, fds) && |
409 | ivshmem_server_handle_new_conn(server) < 0 && errno != EINTR) { |
410 | IVSHMEM_SERVER_DEBUG(server, "ivshmem_server_handle_new_conn() " |
411 | "failed\n" ); |
412 | return -1; |
413 | } |
414 | |
415 | QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, peer_next) { |
416 | /* any message from a peer socket result in a close() */ |
417 | IVSHMEM_SERVER_DEBUG(server, "peer->sock_fd=%d\n" , peer->sock_fd); |
418 | if (peer->sock_fd < maxfd && FD_ISSET(peer->sock_fd, fds)) { |
419 | ivshmem_server_free_peer(server, peer); |
420 | } |
421 | } |
422 | |
423 | return 0; |
424 | } |
425 | |
426 | /* lookup peer from its id */ |
427 | IvshmemServerPeer * |
428 | ivshmem_server_search_peer(IvshmemServer *server, int64_t peer_id) |
429 | { |
430 | IvshmemServerPeer *peer; |
431 | |
432 | QTAILQ_FOREACH(peer, &server->peer_list, next) { |
433 | if (peer->id == peer_id) { |
434 | return peer; |
435 | } |
436 | } |
437 | return NULL; |
438 | } |
439 | |
440 | /* dump our info, the list of peers their vectors on stdout */ |
441 | void |
442 | ivshmem_server_dump(const IvshmemServer *server) |
443 | { |
444 | const IvshmemServerPeer *peer; |
445 | unsigned vector; |
446 | |
447 | /* dump peers */ |
448 | QTAILQ_FOREACH(peer, &server->peer_list, next) { |
449 | printf("peer_id = %" PRId64 "\n" , peer->id); |
450 | |
451 | for (vector = 0; vector < peer->vectors_count; vector++) { |
452 | printf(" vector %d is enabled (fd=%d)\n" , vector, |
453 | event_notifier_get_fd(&peer->vectors[vector])); |
454 | } |
455 | } |
456 | } |
457 | |