1 | /* |
2 | * Copyright (C) 2016-2018 Red Hat, Inc. |
3 | * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws> |
4 | * |
5 | * Network Block Device Client Side |
6 | * |
7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License as published by |
9 | * the Free Software Foundation; under version 2 of the License. |
10 | * |
11 | * This program is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | * GNU General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU General Public License |
17 | * along with this program; if not, see <http://www.gnu.org/licenses/>. |
18 | */ |
19 | |
20 | #include "qemu/osdep.h" |
21 | #include "qapi/error.h" |
22 | #include "qemu/queue.h" |
23 | #include "trace.h" |
24 | #include "nbd-internal.h" |
25 | #include "qemu/cutils.h" |
26 | |
27 | /* Definitions for opaque data types */ |
28 | |
29 | static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); |
30 | |
31 | /* That's all folks */ |
32 | |
33 | /* Basic flow for negotiation |
34 | |
35 | Server Client |
36 | Negotiate |
37 | |
38 | or |
39 | |
40 | Server Client |
41 | Negotiate #1 |
42 | Option |
43 | Negotiate #2 |
44 | |
45 | ---- |
46 | |
47 | followed by |
48 | |
49 | Server Client |
50 | Request |
51 | Response |
52 | Request |
53 | Response |
54 | ... |
55 | ... |
56 | Request (type == 2) |
57 | |
58 | */ |
59 | |
60 | /* Send an option request. |
61 | * |
62 | * The request is for option @opt, with @data containing @len bytes of |
63 | * additional payload for the request (@len may be -1 to treat @data as |
64 | * a C string; and @data may be NULL if @len is 0). |
65 | * Return 0 if successful, -1 with errp set if it is impossible to |
66 | * continue. */ |
67 | static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt, |
68 | uint32_t len, const char *data, |
69 | Error **errp) |
70 | { |
71 | NBDOption req; |
72 | QEMU_BUILD_BUG_ON(sizeof(req) != 16); |
73 | |
74 | if (len == -1) { |
75 | req.length = len = strlen(data); |
76 | } |
77 | trace_nbd_send_option_request(opt, nbd_opt_lookup(opt), len); |
78 | |
79 | stq_be_p(&req.magic, NBD_OPTS_MAGIC); |
80 | stl_be_p(&req.option, opt); |
81 | stl_be_p(&req.length, len); |
82 | |
83 | if (nbd_write(ioc, &req, sizeof(req), errp) < 0) { |
84 | error_prepend(errp, "Failed to send option request header: " ); |
85 | return -1; |
86 | } |
87 | |
88 | if (len && nbd_write(ioc, (char *) data, len, errp) < 0) { |
89 | error_prepend(errp, "Failed to send option request data: " ); |
90 | return -1; |
91 | } |
92 | |
93 | return 0; |
94 | } |
95 | |
96 | /* Send NBD_OPT_ABORT as a courtesy to let the server know that we are |
97 | * not going to attempt further negotiation. */ |
98 | static void nbd_send_opt_abort(QIOChannel *ioc) |
99 | { |
100 | /* Technically, a compliant server is supposed to reply to us; but |
101 | * older servers disconnected instead. At any rate, we're allowed |
102 | * to disconnect without waiting for the server reply, so we don't |
103 | * even care if the request makes it to the server, let alone |
104 | * waiting around for whether the server replies. */ |
105 | nbd_send_option_request(ioc, NBD_OPT_ABORT, 0, NULL, NULL); |
106 | } |
107 | |
108 | |
109 | /* Receive the header of an option reply, which should match the given |
110 | * opt. Read through the length field, but NOT the length bytes of |
111 | * payload. Return 0 if successful, -1 with errp set if it is |
112 | * impossible to continue. */ |
113 | static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt, |
114 | NBDOptionReply *reply, Error **errp) |
115 | { |
116 | QEMU_BUILD_BUG_ON(sizeof(*reply) != 20); |
117 | if (nbd_read(ioc, reply, sizeof(*reply), "option reply" , errp) < 0) { |
118 | nbd_send_opt_abort(ioc); |
119 | return -1; |
120 | } |
121 | reply->magic = be64_to_cpu(reply->magic); |
122 | reply->option = be32_to_cpu(reply->option); |
123 | reply->type = be32_to_cpu(reply->type); |
124 | reply->length = be32_to_cpu(reply->length); |
125 | |
126 | trace_nbd_receive_option_reply(reply->option, nbd_opt_lookup(reply->option), |
127 | reply->type, nbd_rep_lookup(reply->type), |
128 | reply->length); |
129 | |
130 | if (reply->magic != NBD_REP_MAGIC) { |
131 | error_setg(errp, "Unexpected option reply magic" ); |
132 | nbd_send_opt_abort(ioc); |
133 | return -1; |
134 | } |
135 | if (reply->option != opt) { |
136 | error_setg(errp, "Unexpected option type %u (%s), expected %u (%s)" , |
137 | reply->option, nbd_opt_lookup(reply->option), |
138 | opt, nbd_opt_lookup(opt)); |
139 | nbd_send_opt_abort(ioc); |
140 | return -1; |
141 | } |
142 | return 0; |
143 | } |
144 | |
145 | /* If reply represents success, return 1 without further action. |
146 | * If reply represents an error, consume the optional payload of |
147 | * the packet on ioc. Then return 0 for unsupported (so the client |
148 | * can fall back to other approaches), or -1 with errp set for other |
149 | * errors. |
150 | */ |
151 | static int nbd_handle_reply_err(QIOChannel *ioc, NBDOptionReply *reply, |
152 | Error **errp) |
153 | { |
154 | char *msg = NULL; |
155 | int result = -1; |
156 | |
157 | if (!(reply->type & (1 << 31))) { |
158 | return 1; |
159 | } |
160 | |
161 | if (reply->length) { |
162 | if (reply->length > NBD_MAX_BUFFER_SIZE) { |
163 | error_setg(errp, "server error %" PRIu32 |
164 | " (%s) message is too long" , |
165 | reply->type, nbd_rep_lookup(reply->type)); |
166 | goto cleanup; |
167 | } |
168 | msg = g_malloc(reply->length + 1); |
169 | if (nbd_read(ioc, msg, reply->length, NULL, errp) < 0) { |
170 | error_prepend(errp, "Failed to read option error %" PRIu32 |
171 | " (%s) message: " , |
172 | reply->type, nbd_rep_lookup(reply->type)); |
173 | goto cleanup; |
174 | } |
175 | msg[reply->length] = '\0'; |
176 | trace_nbd_server_error_msg(reply->type, |
177 | nbd_reply_type_lookup(reply->type), msg); |
178 | } |
179 | |
180 | switch (reply->type) { |
181 | case NBD_REP_ERR_UNSUP: |
182 | trace_nbd_reply_err_unsup(reply->option, nbd_opt_lookup(reply->option)); |
183 | result = 0; |
184 | goto cleanup; |
185 | |
186 | case NBD_REP_ERR_POLICY: |
187 | error_setg(errp, "Denied by server for option %" PRIu32 " (%s)" , |
188 | reply->option, nbd_opt_lookup(reply->option)); |
189 | break; |
190 | |
191 | case NBD_REP_ERR_INVALID: |
192 | error_setg(errp, "Invalid parameters for option %" PRIu32 " (%s)" , |
193 | reply->option, nbd_opt_lookup(reply->option)); |
194 | break; |
195 | |
196 | case NBD_REP_ERR_PLATFORM: |
197 | error_setg(errp, "Server lacks support for option %" PRIu32 " (%s)" , |
198 | reply->option, nbd_opt_lookup(reply->option)); |
199 | break; |
200 | |
201 | case NBD_REP_ERR_TLS_REQD: |
202 | error_setg(errp, "TLS negotiation required before option %" PRIu32 |
203 | " (%s)" , reply->option, nbd_opt_lookup(reply->option)); |
204 | break; |
205 | |
206 | case NBD_REP_ERR_UNKNOWN: |
207 | error_setg(errp, "Requested export not available" ); |
208 | break; |
209 | |
210 | case NBD_REP_ERR_SHUTDOWN: |
211 | error_setg(errp, "Server shutting down before option %" PRIu32 " (%s)" , |
212 | reply->option, nbd_opt_lookup(reply->option)); |
213 | break; |
214 | |
215 | case NBD_REP_ERR_BLOCK_SIZE_REQD: |
216 | error_setg(errp, "Server requires INFO_BLOCK_SIZE for option %" PRIu32 |
217 | " (%s)" , reply->option, nbd_opt_lookup(reply->option)); |
218 | break; |
219 | |
220 | default: |
221 | error_setg(errp, "Unknown error code when asking for option %" PRIu32 |
222 | " (%s)" , reply->option, nbd_opt_lookup(reply->option)); |
223 | break; |
224 | } |
225 | |
226 | if (msg) { |
227 | error_append_hint(errp, "server reported: %s\n" , msg); |
228 | } |
229 | |
230 | cleanup: |
231 | g_free(msg); |
232 | if (result < 0) { |
233 | nbd_send_opt_abort(ioc); |
234 | } |
235 | return result; |
236 | } |
237 | |
238 | /* nbd_receive_list: |
239 | * Process another portion of the NBD_OPT_LIST reply, populating any |
240 | * name received into *@name. If @description is non-NULL, and the |
241 | * server provided a description, that is also populated. The caller |
242 | * must eventually call g_free() on success. |
243 | * Returns 1 if name and description were set and iteration must continue, |
244 | * 0 if iteration is complete (including if OPT_LIST unsupported), |
245 | * -1 with @errp set if an unrecoverable error occurred. |
246 | */ |
247 | static int nbd_receive_list(QIOChannel *ioc, char **name, char **description, |
248 | Error **errp) |
249 | { |
250 | int ret = -1; |
251 | NBDOptionReply reply; |
252 | uint32_t len; |
253 | uint32_t namelen; |
254 | char *local_name = NULL; |
255 | char *local_desc = NULL; |
256 | int error; |
257 | |
258 | if (nbd_receive_option_reply(ioc, NBD_OPT_LIST, &reply, errp) < 0) { |
259 | return -1; |
260 | } |
261 | error = nbd_handle_reply_err(ioc, &reply, errp); |
262 | if (error <= 0) { |
263 | return error; |
264 | } |
265 | len = reply.length; |
266 | |
267 | if (reply.type == NBD_REP_ACK) { |
268 | if (len != 0) { |
269 | error_setg(errp, "length too long for option end" ); |
270 | nbd_send_opt_abort(ioc); |
271 | return -1; |
272 | } |
273 | return 0; |
274 | } else if (reply.type != NBD_REP_SERVER) { |
275 | error_setg(errp, "Unexpected reply type %u (%s), expected %u (%s)" , |
276 | reply.type, nbd_rep_lookup(reply.type), |
277 | NBD_REP_SERVER, nbd_rep_lookup(NBD_REP_SERVER)); |
278 | nbd_send_opt_abort(ioc); |
279 | return -1; |
280 | } |
281 | |
282 | if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) { |
283 | error_setg(errp, "incorrect option length %" PRIu32, len); |
284 | nbd_send_opt_abort(ioc); |
285 | return -1; |
286 | } |
287 | if (nbd_read32(ioc, &namelen, "option name length" , errp) < 0) { |
288 | nbd_send_opt_abort(ioc); |
289 | return -1; |
290 | } |
291 | len -= sizeof(namelen); |
292 | if (len < namelen) { |
293 | error_setg(errp, "incorrect option name length" ); |
294 | nbd_send_opt_abort(ioc); |
295 | return -1; |
296 | } |
297 | |
298 | local_name = g_malloc(namelen + 1); |
299 | if (nbd_read(ioc, local_name, namelen, "export name" , errp) < 0) { |
300 | nbd_send_opt_abort(ioc); |
301 | goto out; |
302 | } |
303 | local_name[namelen] = '\0'; |
304 | len -= namelen; |
305 | if (len) { |
306 | local_desc = g_malloc(len + 1); |
307 | if (nbd_read(ioc, local_desc, len, "export description" , errp) < 0) { |
308 | nbd_send_opt_abort(ioc); |
309 | goto out; |
310 | } |
311 | local_desc[len] = '\0'; |
312 | } |
313 | |
314 | trace_nbd_receive_list(local_name, local_desc ?: "" ); |
315 | *name = local_name; |
316 | local_name = NULL; |
317 | if (description) { |
318 | *description = local_desc; |
319 | local_desc = NULL; |
320 | } |
321 | ret = 1; |
322 | |
323 | out: |
324 | g_free(local_name); |
325 | g_free(local_desc); |
326 | return ret; |
327 | } |
328 | |
329 | |
330 | /* |
331 | * nbd_opt_info_or_go: |
332 | * Send option for NBD_OPT_INFO or NBD_OPT_GO and parse the reply. |
333 | * Returns -1 if the option proves the export @info->name cannot be |
334 | * used, 0 if the option is unsupported (fall back to NBD_OPT_LIST and |
335 | * NBD_OPT_EXPORT_NAME in that case), and > 0 if the export is good to |
336 | * go (with the rest of @info populated). |
337 | */ |
338 | static int nbd_opt_info_or_go(QIOChannel *ioc, uint32_t opt, |
339 | NBDExportInfo *info, Error **errp) |
340 | { |
341 | NBDOptionReply reply; |
342 | uint32_t len = strlen(info->name); |
343 | uint16_t type; |
344 | int error; |
345 | char *buf; |
346 | |
347 | /* The protocol requires that the server send NBD_INFO_EXPORT with |
348 | * a non-zero flags (at least NBD_FLAG_HAS_FLAGS must be set); so |
349 | * flags still 0 is a witness of a broken server. */ |
350 | info->flags = 0; |
351 | |
352 | assert(opt == NBD_OPT_GO || opt == NBD_OPT_INFO); |
353 | trace_nbd_opt_info_go_start(nbd_opt_lookup(opt), info->name); |
354 | buf = g_malloc(4 + len + 2 + 2 * info->request_sizes + 1); |
355 | stl_be_p(buf, len); |
356 | memcpy(buf + 4, info->name, len); |
357 | /* At most one request, everything else up to server */ |
358 | stw_be_p(buf + 4 + len, info->request_sizes); |
359 | if (info->request_sizes) { |
360 | stw_be_p(buf + 4 + len + 2, NBD_INFO_BLOCK_SIZE); |
361 | } |
362 | error = nbd_send_option_request(ioc, opt, |
363 | 4 + len + 2 + 2 * info->request_sizes, |
364 | buf, errp); |
365 | g_free(buf); |
366 | if (error < 0) { |
367 | return -1; |
368 | } |
369 | |
370 | while (1) { |
371 | if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) { |
372 | return -1; |
373 | } |
374 | error = nbd_handle_reply_err(ioc, &reply, errp); |
375 | if (error <= 0) { |
376 | return error; |
377 | } |
378 | len = reply.length; |
379 | |
380 | if (reply.type == NBD_REP_ACK) { |
381 | /* |
382 | * Server is done sending info, and moved into transmission |
383 | * phase for NBD_OPT_GO, but make sure it sent flags |
384 | */ |
385 | if (len) { |
386 | error_setg(errp, "server sent invalid NBD_REP_ACK" ); |
387 | return -1; |
388 | } |
389 | if (!info->flags) { |
390 | error_setg(errp, "broken server omitted NBD_INFO_EXPORT" ); |
391 | return -1; |
392 | } |
393 | trace_nbd_opt_info_go_success(nbd_opt_lookup(opt)); |
394 | return 1; |
395 | } |
396 | if (reply.type != NBD_REP_INFO) { |
397 | error_setg(errp, "unexpected reply type %u (%s), expected %u (%s)" , |
398 | reply.type, nbd_rep_lookup(reply.type), |
399 | NBD_REP_INFO, nbd_rep_lookup(NBD_REP_INFO)); |
400 | nbd_send_opt_abort(ioc); |
401 | return -1; |
402 | } |
403 | if (len < sizeof(type)) { |
404 | error_setg(errp, "NBD_REP_INFO length %" PRIu32 " is too short" , |
405 | len); |
406 | nbd_send_opt_abort(ioc); |
407 | return -1; |
408 | } |
409 | if (nbd_read16(ioc, &type, "info type" , errp) < 0) { |
410 | nbd_send_opt_abort(ioc); |
411 | return -1; |
412 | } |
413 | len -= sizeof(type); |
414 | switch (type) { |
415 | case NBD_INFO_EXPORT: |
416 | if (len != sizeof(info->size) + sizeof(info->flags)) { |
417 | error_setg(errp, "remaining export info len %" PRIu32 |
418 | " is unexpected size" , len); |
419 | nbd_send_opt_abort(ioc); |
420 | return -1; |
421 | } |
422 | if (nbd_read64(ioc, &info->size, "info size" , errp) < 0) { |
423 | nbd_send_opt_abort(ioc); |
424 | return -1; |
425 | } |
426 | if (nbd_read16(ioc, &info->flags, "info flags" , errp) < 0) { |
427 | nbd_send_opt_abort(ioc); |
428 | return -1; |
429 | } |
430 | if (info->min_block && |
431 | !QEMU_IS_ALIGNED(info->size, info->min_block)) { |
432 | error_setg(errp, "export size %" PRIu64 " is not multiple of " |
433 | "minimum block size %" PRIu32, info->size, |
434 | info->min_block); |
435 | nbd_send_opt_abort(ioc); |
436 | return -1; |
437 | } |
438 | trace_nbd_receive_negotiate_size_flags(info->size, info->flags); |
439 | break; |
440 | |
441 | case NBD_INFO_BLOCK_SIZE: |
442 | if (len != sizeof(info->min_block) * 3) { |
443 | error_setg(errp, "remaining export info len %" PRIu32 |
444 | " is unexpected size" , len); |
445 | nbd_send_opt_abort(ioc); |
446 | return -1; |
447 | } |
448 | if (nbd_read32(ioc, &info->min_block, "info minimum block size" , |
449 | errp) < 0) { |
450 | nbd_send_opt_abort(ioc); |
451 | return -1; |
452 | } |
453 | if (!is_power_of_2(info->min_block)) { |
454 | error_setg(errp, "server minimum block size %" PRIu32 |
455 | " is not a power of two" , info->min_block); |
456 | nbd_send_opt_abort(ioc); |
457 | return -1; |
458 | } |
459 | if (nbd_read32(ioc, &info->opt_block, "info preferred block size" , |
460 | errp) < 0) |
461 | { |
462 | nbd_send_opt_abort(ioc); |
463 | return -1; |
464 | } |
465 | if (!is_power_of_2(info->opt_block) || |
466 | info->opt_block < info->min_block) { |
467 | error_setg(errp, "server preferred block size %" PRIu32 |
468 | " is not valid" , info->opt_block); |
469 | nbd_send_opt_abort(ioc); |
470 | return -1; |
471 | } |
472 | if (nbd_read32(ioc, &info->max_block, "info maximum block size" , |
473 | errp) < 0) |
474 | { |
475 | nbd_send_opt_abort(ioc); |
476 | return -1; |
477 | } |
478 | if (info->max_block < info->min_block) { |
479 | error_setg(errp, "server maximum block size %" PRIu32 |
480 | " is not valid" , info->max_block); |
481 | nbd_send_opt_abort(ioc); |
482 | return -1; |
483 | } |
484 | trace_nbd_opt_info_block_size(info->min_block, info->opt_block, |
485 | info->max_block); |
486 | break; |
487 | |
488 | default: |
489 | trace_nbd_opt_info_unknown(type, nbd_info_lookup(type)); |
490 | if (nbd_drop(ioc, len, errp) < 0) { |
491 | error_prepend(errp, "Failed to read info payload: " ); |
492 | nbd_send_opt_abort(ioc); |
493 | return -1; |
494 | } |
495 | break; |
496 | } |
497 | } |
498 | } |
499 | |
500 | /* Return -1 on failure, 0 if wantname is an available export. */ |
501 | static int nbd_receive_query_exports(QIOChannel *ioc, |
502 | const char *wantname, |
503 | Error **errp) |
504 | { |
505 | bool list_empty = true; |
506 | bool found_export = false; |
507 | |
508 | trace_nbd_receive_query_exports_start(wantname); |
509 | if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) { |
510 | return -1; |
511 | } |
512 | |
513 | while (1) { |
514 | char *name; |
515 | int ret = nbd_receive_list(ioc, &name, NULL, errp); |
516 | |
517 | if (ret < 0) { |
518 | /* Server gave unexpected reply */ |
519 | return -1; |
520 | } else if (ret == 0) { |
521 | /* Done iterating. */ |
522 | if (list_empty) { |
523 | /* |
524 | * We don't have enough context to tell a server that |
525 | * sent an empty list apart from a server that does |
526 | * not support the list command; but as this function |
527 | * is just used to trigger a nicer error message |
528 | * before trying NBD_OPT_EXPORT_NAME, assume the |
529 | * export is available. |
530 | */ |
531 | return 0; |
532 | } else if (!found_export) { |
533 | error_setg(errp, "No export with name '%s' available" , |
534 | wantname); |
535 | nbd_send_opt_abort(ioc); |
536 | return -1; |
537 | } |
538 | trace_nbd_receive_query_exports_success(wantname); |
539 | return 0; |
540 | } |
541 | list_empty = false; |
542 | if (!strcmp(name, wantname)) { |
543 | found_export = true; |
544 | } |
545 | g_free(name); |
546 | } |
547 | } |
548 | |
549 | /* nbd_request_simple_option: Send an option request, and parse the reply |
550 | * return 1 for successful negotiation, |
551 | * 0 if operation is unsupported, |
552 | * -1 with errp set for any other error |
553 | */ |
554 | static int nbd_request_simple_option(QIOChannel *ioc, int opt, Error **errp) |
555 | { |
556 | NBDOptionReply reply; |
557 | int error; |
558 | |
559 | if (nbd_send_option_request(ioc, opt, 0, NULL, errp) < 0) { |
560 | return -1; |
561 | } |
562 | |
563 | if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) { |
564 | return -1; |
565 | } |
566 | error = nbd_handle_reply_err(ioc, &reply, errp); |
567 | if (error <= 0) { |
568 | return error; |
569 | } |
570 | |
571 | if (reply.type != NBD_REP_ACK) { |
572 | error_setg(errp, "Server answered option %d (%s) with unexpected " |
573 | "reply %" PRIu32 " (%s)" , opt, nbd_opt_lookup(opt), |
574 | reply.type, nbd_rep_lookup(reply.type)); |
575 | nbd_send_opt_abort(ioc); |
576 | return -1; |
577 | } |
578 | |
579 | if (reply.length != 0) { |
580 | error_setg(errp, "Option %d ('%s') response length is %" PRIu32 |
581 | " (it should be zero)" , opt, nbd_opt_lookup(opt), |
582 | reply.length); |
583 | nbd_send_opt_abort(ioc); |
584 | return -1; |
585 | } |
586 | |
587 | return 1; |
588 | } |
589 | |
590 | static QIOChannel *nbd_receive_starttls(QIOChannel *ioc, |
591 | QCryptoTLSCreds *tlscreds, |
592 | const char *hostname, Error **errp) |
593 | { |
594 | int ret; |
595 | QIOChannelTLS *tioc; |
596 | struct NBDTLSHandshakeData data = { 0 }; |
597 | |
598 | ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, errp); |
599 | if (ret <= 0) { |
600 | if (ret == 0) { |
601 | error_setg(errp, "Server don't support STARTTLS option" ); |
602 | nbd_send_opt_abort(ioc); |
603 | } |
604 | return NULL; |
605 | } |
606 | |
607 | trace_nbd_receive_starttls_new_client(); |
608 | tioc = qio_channel_tls_new_client(ioc, tlscreds, hostname, errp); |
609 | if (!tioc) { |
610 | return NULL; |
611 | } |
612 | qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls" ); |
613 | data.loop = g_main_loop_new(g_main_context_default(), FALSE); |
614 | trace_nbd_receive_starttls_tls_handshake(); |
615 | qio_channel_tls_handshake(tioc, |
616 | nbd_tls_handshake, |
617 | &data, |
618 | NULL, |
619 | NULL); |
620 | |
621 | if (!data.complete) { |
622 | g_main_loop_run(data.loop); |
623 | } |
624 | g_main_loop_unref(data.loop); |
625 | if (data.error) { |
626 | error_propagate(errp, data.error); |
627 | object_unref(OBJECT(tioc)); |
628 | return NULL; |
629 | } |
630 | |
631 | return QIO_CHANNEL(tioc); |
632 | } |
633 | |
634 | /* |
635 | * nbd_send_meta_query: |
636 | * Send 0 or 1 set/list meta context queries. |
637 | * Return 0 on success, -1 with errp set for any error |
638 | */ |
639 | static int nbd_send_meta_query(QIOChannel *ioc, uint32_t opt, |
640 | const char *export, const char *query, |
641 | Error **errp) |
642 | { |
643 | int ret; |
644 | uint32_t export_len = strlen(export); |
645 | uint32_t queries = !!query; |
646 | uint32_t query_len = 0; |
647 | uint32_t data_len; |
648 | char *data; |
649 | char *p; |
650 | |
651 | data_len = sizeof(export_len) + export_len + sizeof(queries); |
652 | if (query) { |
653 | query_len = strlen(query); |
654 | data_len += sizeof(query_len) + query_len; |
655 | } else { |
656 | assert(opt == NBD_OPT_LIST_META_CONTEXT); |
657 | } |
658 | p = data = g_malloc(data_len); |
659 | |
660 | trace_nbd_opt_meta_request(nbd_opt_lookup(opt), query ?: "(all)" , export); |
661 | stl_be_p(p, export_len); |
662 | memcpy(p += sizeof(export_len), export, export_len); |
663 | stl_be_p(p += export_len, queries); |
664 | if (query) { |
665 | stl_be_p(p += sizeof(queries), query_len); |
666 | memcpy(p += sizeof(query_len), query, query_len); |
667 | } |
668 | |
669 | ret = nbd_send_option_request(ioc, opt, data_len, data, errp); |
670 | g_free(data); |
671 | return ret; |
672 | } |
673 | |
674 | /* |
675 | * nbd_receive_one_meta_context: |
676 | * Called in a loop to receive and trace one set/list meta context reply. |
677 | * Pass non-NULL @name or @id to collect results back to the caller, which |
678 | * must eventually call g_free(). |
679 | * return 1 if name is set and iteration must continue, |
680 | * 0 if iteration is complete (including if option is unsupported), |
681 | * -1 with errp set for any error |
682 | */ |
683 | static int nbd_receive_one_meta_context(QIOChannel *ioc, |
684 | uint32_t opt, |
685 | char **name, |
686 | uint32_t *id, |
687 | Error **errp) |
688 | { |
689 | int ret; |
690 | NBDOptionReply reply; |
691 | char *local_name = NULL; |
692 | uint32_t local_id; |
693 | |
694 | if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) { |
695 | return -1; |
696 | } |
697 | |
698 | ret = nbd_handle_reply_err(ioc, &reply, errp); |
699 | if (ret <= 0) { |
700 | return ret; |
701 | } |
702 | |
703 | if (reply.type == NBD_REP_ACK) { |
704 | if (reply.length != 0) { |
705 | error_setg(errp, "Unexpected length to ACK response" ); |
706 | nbd_send_opt_abort(ioc); |
707 | return -1; |
708 | } |
709 | return 0; |
710 | } else if (reply.type != NBD_REP_META_CONTEXT) { |
711 | error_setg(errp, "Unexpected reply type %u (%s), expected %u (%s)" , |
712 | reply.type, nbd_rep_lookup(reply.type), |
713 | NBD_REP_META_CONTEXT, nbd_rep_lookup(NBD_REP_META_CONTEXT)); |
714 | nbd_send_opt_abort(ioc); |
715 | return -1; |
716 | } |
717 | |
718 | if (reply.length <= sizeof(local_id) || |
719 | reply.length > NBD_MAX_BUFFER_SIZE) { |
720 | error_setg(errp, "Failed to negotiate meta context, server " |
721 | "answered with unexpected length %" PRIu32, |
722 | reply.length); |
723 | nbd_send_opt_abort(ioc); |
724 | return -1; |
725 | } |
726 | |
727 | if (nbd_read32(ioc, &local_id, "context id" , errp) < 0) { |
728 | return -1; |
729 | } |
730 | |
731 | reply.length -= sizeof(local_id); |
732 | local_name = g_malloc(reply.length + 1); |
733 | if (nbd_read(ioc, local_name, reply.length, "context name" , errp) < 0) { |
734 | g_free(local_name); |
735 | return -1; |
736 | } |
737 | local_name[reply.length] = '\0'; |
738 | trace_nbd_opt_meta_reply(nbd_opt_lookup(opt), local_name, local_id); |
739 | |
740 | if (name) { |
741 | *name = local_name; |
742 | } else { |
743 | g_free(local_name); |
744 | } |
745 | if (id) { |
746 | *id = local_id; |
747 | } |
748 | return 1; |
749 | } |
750 | |
751 | /* |
752 | * nbd_negotiate_simple_meta_context: |
753 | * Request the server to set the meta context for export @info->name |
754 | * using @info->x_dirty_bitmap with a fallback to "base:allocation", |
755 | * setting @info->context_id to the resulting id. Fail if the server |
756 | * responds with more than one context or with a context different |
757 | * than the query. |
758 | * return 1 for successful negotiation, |
759 | * 0 if operation is unsupported, |
760 | * -1 with errp set for any other error |
761 | */ |
762 | static int nbd_negotiate_simple_meta_context(QIOChannel *ioc, |
763 | NBDExportInfo *info, |
764 | Error **errp) |
765 | { |
766 | /* |
767 | * TODO: Removing the x_dirty_bitmap hack will mean refactoring |
768 | * this function to request and store ids for multiple contexts |
769 | * (both base:allocation and a dirty bitmap), at which point this |
770 | * function should lose the term _simple. |
771 | */ |
772 | int ret; |
773 | const char *context = info->x_dirty_bitmap ?: "base:allocation" ; |
774 | bool received = false; |
775 | char *name = NULL; |
776 | |
777 | if (nbd_send_meta_query(ioc, NBD_OPT_SET_META_CONTEXT, |
778 | info->name, context, errp) < 0) { |
779 | return -1; |
780 | } |
781 | |
782 | ret = nbd_receive_one_meta_context(ioc, NBD_OPT_SET_META_CONTEXT, |
783 | &name, &info->context_id, errp); |
784 | if (ret < 0) { |
785 | return -1; |
786 | } |
787 | if (ret == 1) { |
788 | if (strcmp(context, name)) { |
789 | error_setg(errp, "Failed to negotiate meta context '%s', server " |
790 | "answered with different context '%s'" , context, |
791 | name); |
792 | g_free(name); |
793 | nbd_send_opt_abort(ioc); |
794 | return -1; |
795 | } |
796 | g_free(name); |
797 | received = true; |
798 | |
799 | ret = nbd_receive_one_meta_context(ioc, NBD_OPT_SET_META_CONTEXT, |
800 | NULL, NULL, errp); |
801 | if (ret < 0) { |
802 | return -1; |
803 | } |
804 | } |
805 | if (ret != 0) { |
806 | error_setg(errp, "Server answered with more than one context" ); |
807 | nbd_send_opt_abort(ioc); |
808 | return -1; |
809 | } |
810 | return received; |
811 | } |
812 | |
813 | /* |
814 | * nbd_list_meta_contexts: |
815 | * Request the server to list all meta contexts for export @info->name. |
816 | * return 0 if list is complete (even if empty), |
817 | * -1 with errp set for any error |
818 | */ |
819 | static int nbd_list_meta_contexts(QIOChannel *ioc, |
820 | NBDExportInfo *info, |
821 | Error **errp) |
822 | { |
823 | int ret; |
824 | int seen_any = false; |
825 | int seen_qemu = false; |
826 | |
827 | if (nbd_send_meta_query(ioc, NBD_OPT_LIST_META_CONTEXT, |
828 | info->name, NULL, errp) < 0) { |
829 | return -1; |
830 | } |
831 | |
832 | while (1) { |
833 | char *context; |
834 | |
835 | ret = nbd_receive_one_meta_context(ioc, NBD_OPT_LIST_META_CONTEXT, |
836 | &context, NULL, errp); |
837 | if (ret == 0 && seen_any && !seen_qemu) { |
838 | /* |
839 | * Work around qemu 3.0 bug: the server forgot to send |
840 | * "qemu:" replies to 0 queries. If we saw at least one |
841 | * reply (probably base:allocation), but none of them were |
842 | * qemu:, then run a more specific query to make sure. |
843 | */ |
844 | seen_qemu = true; |
845 | if (nbd_send_meta_query(ioc, NBD_OPT_LIST_META_CONTEXT, |
846 | info->name, "qemu:" , errp) < 0) { |
847 | return -1; |
848 | } |
849 | continue; |
850 | } |
851 | if (ret <= 0) { |
852 | return ret; |
853 | } |
854 | seen_any = true; |
855 | seen_qemu |= strstart(context, "qemu:" , NULL); |
856 | info->contexts = g_renew(char *, info->contexts, ++info->n_contexts); |
857 | info->contexts[info->n_contexts - 1] = context; |
858 | } |
859 | } |
860 | |
861 | /* |
862 | * nbd_start_negotiate: |
863 | * Start the handshake to the server. After a positive return, the server |
864 | * is ready to accept additional NBD_OPT requests. |
865 | * Returns: negative errno: failure talking to server |
866 | * 0: server is oldstyle, must call nbd_negotiate_finish_oldstyle |
867 | * 1: server is newstyle, but can only accept EXPORT_NAME |
868 | * 2: server is newstyle, but lacks structured replies |
869 | * 3: server is newstyle and set up for structured replies |
870 | */ |
871 | static int nbd_start_negotiate(AioContext *aio_context, QIOChannel *ioc, |
872 | QCryptoTLSCreds *tlscreds, |
873 | const char *hostname, QIOChannel **outioc, |
874 | bool structured_reply, bool *zeroes, |
875 | Error **errp) |
876 | { |
877 | uint64_t magic; |
878 | |
879 | trace_nbd_start_negotiate(tlscreds, hostname ? hostname : "<null>" ); |
880 | |
881 | if (zeroes) { |
882 | *zeroes = true; |
883 | } |
884 | if (outioc) { |
885 | *outioc = NULL; |
886 | } |
887 | if (tlscreds && !outioc) { |
888 | error_setg(errp, "Output I/O channel required for TLS" ); |
889 | return -EINVAL; |
890 | } |
891 | |
892 | if (nbd_read64(ioc, &magic, "initial magic" , errp) < 0) { |
893 | return -EINVAL; |
894 | } |
895 | trace_nbd_receive_negotiate_magic(magic); |
896 | |
897 | if (magic != NBD_INIT_MAGIC) { |
898 | error_setg(errp, "Bad initial magic received: 0x%" PRIx64, magic); |
899 | return -EINVAL; |
900 | } |
901 | |
902 | if (nbd_read64(ioc, &magic, "server magic" , errp) < 0) { |
903 | return -EINVAL; |
904 | } |
905 | trace_nbd_receive_negotiate_magic(magic); |
906 | |
907 | if (magic == NBD_OPTS_MAGIC) { |
908 | uint32_t clientflags = 0; |
909 | uint16_t globalflags; |
910 | bool fixedNewStyle = false; |
911 | |
912 | if (nbd_read16(ioc, &globalflags, "server flags" , errp) < 0) { |
913 | return -EINVAL; |
914 | } |
915 | trace_nbd_receive_negotiate_server_flags(globalflags); |
916 | if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) { |
917 | fixedNewStyle = true; |
918 | clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE; |
919 | } |
920 | if (globalflags & NBD_FLAG_NO_ZEROES) { |
921 | if (zeroes) { |
922 | *zeroes = false; |
923 | } |
924 | clientflags |= NBD_FLAG_C_NO_ZEROES; |
925 | } |
926 | /* client requested flags */ |
927 | clientflags = cpu_to_be32(clientflags); |
928 | if (nbd_write(ioc, &clientflags, sizeof(clientflags), errp) < 0) { |
929 | error_prepend(errp, "Failed to send clientflags field: " ); |
930 | return -EINVAL; |
931 | } |
932 | if (tlscreds) { |
933 | if (fixedNewStyle) { |
934 | *outioc = nbd_receive_starttls(ioc, tlscreds, hostname, errp); |
935 | if (!*outioc) { |
936 | return -EINVAL; |
937 | } |
938 | ioc = *outioc; |
939 | if (aio_context) { |
940 | qio_channel_set_blocking(ioc, false, NULL); |
941 | qio_channel_attach_aio_context(ioc, aio_context); |
942 | } |
943 | } else { |
944 | error_setg(errp, "Server does not support STARTTLS" ); |
945 | return -EINVAL; |
946 | } |
947 | } |
948 | if (fixedNewStyle) { |
949 | int result = 0; |
950 | |
951 | if (structured_reply) { |
952 | result = nbd_request_simple_option(ioc, |
953 | NBD_OPT_STRUCTURED_REPLY, |
954 | errp); |
955 | if (result < 0) { |
956 | return -EINVAL; |
957 | } |
958 | } |
959 | return 2 + result; |
960 | } else { |
961 | return 1; |
962 | } |
963 | } else if (magic == NBD_CLIENT_MAGIC) { |
964 | if (tlscreds) { |
965 | error_setg(errp, "Server does not support STARTTLS" ); |
966 | return -EINVAL; |
967 | } |
968 | return 0; |
969 | } else { |
970 | error_setg(errp, "Bad server magic received: 0x%" PRIx64, magic); |
971 | return -EINVAL; |
972 | } |
973 | } |
974 | |
975 | /* |
976 | * nbd_negotiate_finish_oldstyle: |
977 | * Populate @info with the size and export flags from an oldstyle server, |
978 | * but does not consume 124 bytes of reserved zero padding. |
979 | * Returns 0 on success, -1 with @errp set on failure |
980 | */ |
981 | static int nbd_negotiate_finish_oldstyle(QIOChannel *ioc, NBDExportInfo *info, |
982 | Error **errp) |
983 | { |
984 | uint32_t oldflags; |
985 | |
986 | if (nbd_read64(ioc, &info->size, "export length" , errp) < 0) { |
987 | return -EINVAL; |
988 | } |
989 | |
990 | if (nbd_read32(ioc, &oldflags, "export flags" , errp) < 0) { |
991 | return -EINVAL; |
992 | } |
993 | if (oldflags & ~0xffff) { |
994 | error_setg(errp, "Unexpected export flags %0x" PRIx32, oldflags); |
995 | return -EINVAL; |
996 | } |
997 | info->flags = oldflags; |
998 | return 0; |
999 | } |
1000 | |
1001 | /* |
1002 | * nbd_receive_negotiate: |
1003 | * Connect to server, complete negotiation, and move into transmission phase. |
1004 | * Returns: negative errno: failure talking to server |
1005 | * 0: server is connected |
1006 | */ |
1007 | int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc, |
1008 | QCryptoTLSCreds *tlscreds, |
1009 | const char *hostname, QIOChannel **outioc, |
1010 | NBDExportInfo *info, Error **errp) |
1011 | { |
1012 | int result; |
1013 | bool zeroes; |
1014 | bool base_allocation = info->base_allocation; |
1015 | |
1016 | assert(info->name); |
1017 | trace_nbd_receive_negotiate_name(info->name); |
1018 | |
1019 | result = nbd_start_negotiate(aio_context, ioc, tlscreds, hostname, outioc, |
1020 | info->structured_reply, &zeroes, errp); |
1021 | |
1022 | info->structured_reply = false; |
1023 | info->base_allocation = false; |
1024 | if (tlscreds && *outioc) { |
1025 | ioc = *outioc; |
1026 | } |
1027 | |
1028 | switch (result) { |
1029 | case 3: /* newstyle, with structured replies */ |
1030 | info->structured_reply = true; |
1031 | if (base_allocation) { |
1032 | result = nbd_negotiate_simple_meta_context(ioc, info, errp); |
1033 | if (result < 0) { |
1034 | return -EINVAL; |
1035 | } |
1036 | info->base_allocation = result == 1; |
1037 | } |
1038 | /* fall through */ |
1039 | case 2: /* newstyle, try OPT_GO */ |
1040 | /* Try NBD_OPT_GO first - if it works, we are done (it |
1041 | * also gives us a good message if the server requires |
1042 | * TLS). If it is not available, fall back to |
1043 | * NBD_OPT_LIST for nicer error messages about a missing |
1044 | * export, then use NBD_OPT_EXPORT_NAME. */ |
1045 | result = nbd_opt_info_or_go(ioc, NBD_OPT_GO, info, errp); |
1046 | if (result < 0) { |
1047 | return -EINVAL; |
1048 | } |
1049 | if (result > 0) { |
1050 | return 0; |
1051 | } |
1052 | /* Check our desired export is present in the |
1053 | * server export list. Since NBD_OPT_EXPORT_NAME |
1054 | * cannot return an error message, running this |
1055 | * query gives us better error reporting if the |
1056 | * export name is not available. |
1057 | */ |
1058 | if (nbd_receive_query_exports(ioc, info->name, errp) < 0) { |
1059 | return -EINVAL; |
1060 | } |
1061 | /* fall through */ |
1062 | case 1: /* newstyle, but limited to EXPORT_NAME */ |
1063 | /* write the export name request */ |
1064 | if (nbd_send_option_request(ioc, NBD_OPT_EXPORT_NAME, -1, info->name, |
1065 | errp) < 0) { |
1066 | return -EINVAL; |
1067 | } |
1068 | |
1069 | /* Read the response */ |
1070 | if (nbd_read64(ioc, &info->size, "export length" , errp) < 0) { |
1071 | return -EINVAL; |
1072 | } |
1073 | |
1074 | if (nbd_read16(ioc, &info->flags, "export flags" , errp) < 0) { |
1075 | return -EINVAL; |
1076 | } |
1077 | break; |
1078 | case 0: /* oldstyle, parse length and flags */ |
1079 | if (*info->name) { |
1080 | error_setg(errp, "Server does not support non-empty export names" ); |
1081 | return -EINVAL; |
1082 | } |
1083 | if (nbd_negotiate_finish_oldstyle(ioc, info, errp) < 0) { |
1084 | return -EINVAL; |
1085 | } |
1086 | break; |
1087 | default: |
1088 | return result; |
1089 | } |
1090 | |
1091 | trace_nbd_receive_negotiate_size_flags(info->size, info->flags); |
1092 | if (zeroes && nbd_drop(ioc, 124, errp) < 0) { |
1093 | error_prepend(errp, "Failed to read reserved block: " ); |
1094 | return -EINVAL; |
1095 | } |
1096 | return 0; |
1097 | } |
1098 | |
1099 | /* Clean up result of nbd_receive_export_list */ |
1100 | void nbd_free_export_list(NBDExportInfo *info, int count) |
1101 | { |
1102 | int i, j; |
1103 | |
1104 | if (!info) { |
1105 | return; |
1106 | } |
1107 | |
1108 | for (i = 0; i < count; i++) { |
1109 | g_free(info[i].name); |
1110 | g_free(info[i].description); |
1111 | for (j = 0; j < info[i].n_contexts; j++) { |
1112 | g_free(info[i].contexts[j]); |
1113 | } |
1114 | g_free(info[i].contexts); |
1115 | } |
1116 | g_free(info); |
1117 | } |
1118 | |
1119 | /* |
1120 | * nbd_receive_export_list: |
1121 | * Query details about a server's exports, then disconnect without |
1122 | * going into transmission phase. Return a count of the exports listed |
1123 | * in @info by the server, or -1 on error. Caller must free @info using |
1124 | * nbd_free_export_list(). |
1125 | */ |
1126 | int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds, |
1127 | const char *hostname, NBDExportInfo **info, |
1128 | Error **errp) |
1129 | { |
1130 | int result; |
1131 | int count = 0; |
1132 | int i; |
1133 | int rc; |
1134 | int ret = -1; |
1135 | NBDExportInfo *array = NULL; |
1136 | QIOChannel *sioc = NULL; |
1137 | |
1138 | *info = NULL; |
1139 | result = nbd_start_negotiate(NULL, ioc, tlscreds, hostname, &sioc, true, |
1140 | NULL, errp); |
1141 | if (tlscreds && sioc) { |
1142 | ioc = sioc; |
1143 | } |
1144 | |
1145 | switch (result) { |
1146 | case 2: |
1147 | case 3: |
1148 | /* newstyle - use NBD_OPT_LIST to populate array, then try |
1149 | * NBD_OPT_INFO on each array member. If structured replies |
1150 | * are enabled, also try NBD_OPT_LIST_META_CONTEXT. */ |
1151 | if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) { |
1152 | goto out; |
1153 | } |
1154 | while (1) { |
1155 | char *name; |
1156 | char *desc; |
1157 | |
1158 | rc = nbd_receive_list(ioc, &name, &desc, errp); |
1159 | if (rc < 0) { |
1160 | goto out; |
1161 | } else if (rc == 0) { |
1162 | break; |
1163 | } |
1164 | array = g_renew(NBDExportInfo, array, ++count); |
1165 | memset(&array[count - 1], 0, sizeof(*array)); |
1166 | array[count - 1].name = name; |
1167 | array[count - 1].description = desc; |
1168 | array[count - 1].structured_reply = result == 3; |
1169 | } |
1170 | |
1171 | for (i = 0; i < count; i++) { |
1172 | array[i].request_sizes = true; |
1173 | rc = nbd_opt_info_or_go(ioc, NBD_OPT_INFO, &array[i], errp); |
1174 | if (rc < 0) { |
1175 | goto out; |
1176 | } else if (rc == 0) { |
1177 | /* |
1178 | * Pointless to try rest of loop. If OPT_INFO doesn't work, |
1179 | * it's unlikely that meta contexts work either |
1180 | */ |
1181 | break; |
1182 | } |
1183 | |
1184 | if (result == 3 && |
1185 | nbd_list_meta_contexts(ioc, &array[i], errp) < 0) { |
1186 | goto out; |
1187 | } |
1188 | } |
1189 | |
1190 | /* Send NBD_OPT_ABORT as a courtesy before hanging up */ |
1191 | nbd_send_opt_abort(ioc); |
1192 | break; |
1193 | case 1: /* newstyle, but limited to EXPORT_NAME */ |
1194 | error_setg(errp, "Server does not support export lists" ); |
1195 | /* We can't even send NBD_OPT_ABORT, so merely hang up */ |
1196 | goto out; |
1197 | case 0: /* oldstyle, parse length and flags */ |
1198 | array = g_new0(NBDExportInfo, 1); |
1199 | array->name = g_strdup("" ); |
1200 | count = 1; |
1201 | |
1202 | if (nbd_negotiate_finish_oldstyle(ioc, array, errp) < 0) { |
1203 | goto out; |
1204 | } |
1205 | |
1206 | /* Send NBD_CMD_DISC as a courtesy to the server, but ignore all |
1207 | * errors now that we have the information we wanted. */ |
1208 | if (nbd_drop(ioc, 124, NULL) == 0) { |
1209 | NBDRequest request = { .type = NBD_CMD_DISC }; |
1210 | |
1211 | nbd_send_request(ioc, &request); |
1212 | } |
1213 | break; |
1214 | default: |
1215 | goto out; |
1216 | } |
1217 | |
1218 | *info = array; |
1219 | array = NULL; |
1220 | ret = count; |
1221 | |
1222 | out: |
1223 | qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); |
1224 | qio_channel_close(ioc, NULL); |
1225 | object_unref(OBJECT(sioc)); |
1226 | nbd_free_export_list(array, count); |
1227 | return ret; |
1228 | } |
1229 | |
1230 | #ifdef __linux__ |
1231 | int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info, |
1232 | Error **errp) |
1233 | { |
1234 | unsigned long sector_size = MAX(BDRV_SECTOR_SIZE, info->min_block); |
1235 | unsigned long sectors = info->size / sector_size; |
1236 | |
1237 | /* FIXME: Once the kernel module is patched to honor block sizes, |
1238 | * and to advertise that fact to user space, we should update the |
1239 | * hand-off to the kernel to use any block sizes we learned. */ |
1240 | assert(!info->request_sizes); |
1241 | if (info->size / sector_size != sectors) { |
1242 | error_setg(errp, "Export size %" PRIu64 " too large for 32-bit kernel" , |
1243 | info->size); |
1244 | return -E2BIG; |
1245 | } |
1246 | |
1247 | trace_nbd_init_set_socket(); |
1248 | |
1249 | if (ioctl(fd, NBD_SET_SOCK, (unsigned long) sioc->fd) < 0) { |
1250 | int serrno = errno; |
1251 | error_setg(errp, "Failed to set NBD socket" ); |
1252 | return -serrno; |
1253 | } |
1254 | |
1255 | trace_nbd_init_set_block_size(sector_size); |
1256 | |
1257 | if (ioctl(fd, NBD_SET_BLKSIZE, sector_size) < 0) { |
1258 | int serrno = errno; |
1259 | error_setg(errp, "Failed setting NBD block size" ); |
1260 | return -serrno; |
1261 | } |
1262 | |
1263 | trace_nbd_init_set_size(sectors); |
1264 | if (info->size % sector_size) { |
1265 | trace_nbd_init_trailing_bytes(info->size % sector_size); |
1266 | } |
1267 | |
1268 | if (ioctl(fd, NBD_SET_SIZE_BLOCKS, sectors) < 0) { |
1269 | int serrno = errno; |
1270 | error_setg(errp, "Failed setting size (in blocks)" ); |
1271 | return -serrno; |
1272 | } |
1273 | |
1274 | if (ioctl(fd, NBD_SET_FLAGS, (unsigned long) info->flags) < 0) { |
1275 | if (errno == ENOTTY) { |
1276 | int read_only = (info->flags & NBD_FLAG_READ_ONLY) != 0; |
1277 | trace_nbd_init_set_readonly(); |
1278 | |
1279 | if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) { |
1280 | int serrno = errno; |
1281 | error_setg(errp, "Failed setting read-only attribute" ); |
1282 | return -serrno; |
1283 | } |
1284 | } else { |
1285 | int serrno = errno; |
1286 | error_setg(errp, "Failed setting flags" ); |
1287 | return -serrno; |
1288 | } |
1289 | } |
1290 | |
1291 | trace_nbd_init_finish(); |
1292 | |
1293 | return 0; |
1294 | } |
1295 | |
1296 | int nbd_client(int fd) |
1297 | { |
1298 | int ret; |
1299 | int serrno; |
1300 | |
1301 | trace_nbd_client_loop(); |
1302 | |
1303 | ret = ioctl(fd, NBD_DO_IT); |
1304 | if (ret < 0 && errno == EPIPE) { |
1305 | /* NBD_DO_IT normally returns EPIPE when someone has disconnected |
1306 | * the socket via NBD_DISCONNECT. We do not want to return 1 in |
1307 | * that case. |
1308 | */ |
1309 | ret = 0; |
1310 | } |
1311 | serrno = errno; |
1312 | |
1313 | trace_nbd_client_loop_ret(ret, strerror(serrno)); |
1314 | |
1315 | trace_nbd_client_clear_queue(); |
1316 | ioctl(fd, NBD_CLEAR_QUE); |
1317 | |
1318 | trace_nbd_client_clear_socket(); |
1319 | ioctl(fd, NBD_CLEAR_SOCK); |
1320 | |
1321 | errno = serrno; |
1322 | return ret; |
1323 | } |
1324 | |
1325 | int nbd_disconnect(int fd) |
1326 | { |
1327 | ioctl(fd, NBD_CLEAR_QUE); |
1328 | ioctl(fd, NBD_DISCONNECT); |
1329 | ioctl(fd, NBD_CLEAR_SOCK); |
1330 | return 0; |
1331 | } |
1332 | |
1333 | #endif /* __linux__ */ |
1334 | |
1335 | int nbd_send_request(QIOChannel *ioc, NBDRequest *request) |
1336 | { |
1337 | uint8_t buf[NBD_REQUEST_SIZE]; |
1338 | |
1339 | trace_nbd_send_request(request->from, request->len, request->handle, |
1340 | request->flags, request->type, |
1341 | nbd_cmd_lookup(request->type)); |
1342 | |
1343 | stl_be_p(buf, NBD_REQUEST_MAGIC); |
1344 | stw_be_p(buf + 4, request->flags); |
1345 | stw_be_p(buf + 6, request->type); |
1346 | stq_be_p(buf + 8, request->handle); |
1347 | stq_be_p(buf + 16, request->from); |
1348 | stl_be_p(buf + 24, request->len); |
1349 | |
1350 | return nbd_write(ioc, buf, sizeof(buf), NULL); |
1351 | } |
1352 | |
1353 | /* nbd_receive_simple_reply |
1354 | * Read simple reply except magic field (which should be already read). |
1355 | * Payload is not read (payload is possible for CMD_READ, but here we even |
1356 | * don't know whether it take place or not). |
1357 | */ |
1358 | static int nbd_receive_simple_reply(QIOChannel *ioc, NBDSimpleReply *reply, |
1359 | Error **errp) |
1360 | { |
1361 | int ret; |
1362 | |
1363 | assert(reply->magic == NBD_SIMPLE_REPLY_MAGIC); |
1364 | |
1365 | ret = nbd_read(ioc, (uint8_t *)reply + sizeof(reply->magic), |
1366 | sizeof(*reply) - sizeof(reply->magic), "reply" , errp); |
1367 | if (ret < 0) { |
1368 | return ret; |
1369 | } |
1370 | |
1371 | reply->error = be32_to_cpu(reply->error); |
1372 | reply->handle = be64_to_cpu(reply->handle); |
1373 | |
1374 | return 0; |
1375 | } |
1376 | |
1377 | /* nbd_receive_structured_reply_chunk |
1378 | * Read structured reply chunk except magic field (which should be already |
1379 | * read). |
1380 | * Payload is not read. |
1381 | */ |
1382 | static int nbd_receive_structured_reply_chunk(QIOChannel *ioc, |
1383 | NBDStructuredReplyChunk *chunk, |
1384 | Error **errp) |
1385 | { |
1386 | int ret; |
1387 | |
1388 | assert(chunk->magic == NBD_STRUCTURED_REPLY_MAGIC); |
1389 | |
1390 | ret = nbd_read(ioc, (uint8_t *)chunk + sizeof(chunk->magic), |
1391 | sizeof(*chunk) - sizeof(chunk->magic), "structured chunk" , |
1392 | errp); |
1393 | if (ret < 0) { |
1394 | return ret; |
1395 | } |
1396 | |
1397 | chunk->flags = be16_to_cpu(chunk->flags); |
1398 | chunk->type = be16_to_cpu(chunk->type); |
1399 | chunk->handle = be64_to_cpu(chunk->handle); |
1400 | chunk->length = be32_to_cpu(chunk->length); |
1401 | |
1402 | return 0; |
1403 | } |
1404 | |
1405 | /* nbd_read_eof |
1406 | * Tries to read @size bytes from @ioc. |
1407 | * Returns 1 on success |
1408 | * 0 on eof, when no data was read (errp is not set) |
1409 | * negative errno on failure (errp is set) |
1410 | */ |
1411 | static inline int coroutine_fn |
1412 | nbd_read_eof(BlockDriverState *bs, QIOChannel *ioc, void *buffer, size_t size, |
1413 | Error **errp) |
1414 | { |
1415 | bool partial = false; |
1416 | |
1417 | assert(size); |
1418 | while (size > 0) { |
1419 | struct iovec iov = { .iov_base = buffer, .iov_len = size }; |
1420 | ssize_t len; |
1421 | |
1422 | len = qio_channel_readv(ioc, &iov, 1, errp); |
1423 | if (len == QIO_CHANNEL_ERR_BLOCK) { |
1424 | bdrv_dec_in_flight(bs); |
1425 | qio_channel_yield(ioc, G_IO_IN); |
1426 | bdrv_inc_in_flight(bs); |
1427 | continue; |
1428 | } else if (len < 0) { |
1429 | return -EIO; |
1430 | } else if (len == 0) { |
1431 | if (partial) { |
1432 | error_setg(errp, |
1433 | "Unexpected end-of-file before all bytes were read" ); |
1434 | return -EIO; |
1435 | } else { |
1436 | return 0; |
1437 | } |
1438 | } |
1439 | |
1440 | partial = true; |
1441 | size -= len; |
1442 | buffer = (uint8_t*) buffer + len; |
1443 | } |
1444 | return 1; |
1445 | } |
1446 | |
1447 | /* nbd_receive_reply |
1448 | * |
1449 | * Decreases bs->in_flight while waiting for a new reply. This yield is where |
1450 | * we wait indefinitely and the coroutine must be able to be safely reentered |
1451 | * for nbd_client_attach_aio_context(). |
1452 | * |
1453 | * Returns 1 on success |
1454 | * 0 on eof, when no data was read (errp is not set) |
1455 | * negative errno on failure (errp is set) |
1456 | */ |
1457 | int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc, |
1458 | NBDReply *reply, Error **errp) |
1459 | { |
1460 | int ret; |
1461 | const char *type; |
1462 | |
1463 | ret = nbd_read_eof(bs, ioc, &reply->magic, sizeof(reply->magic), errp); |
1464 | if (ret <= 0) { |
1465 | return ret; |
1466 | } |
1467 | |
1468 | reply->magic = be32_to_cpu(reply->magic); |
1469 | |
1470 | switch (reply->magic) { |
1471 | case NBD_SIMPLE_REPLY_MAGIC: |
1472 | ret = nbd_receive_simple_reply(ioc, &reply->simple, errp); |
1473 | if (ret < 0) { |
1474 | break; |
1475 | } |
1476 | trace_nbd_receive_simple_reply(reply->simple.error, |
1477 | nbd_err_lookup(reply->simple.error), |
1478 | reply->handle); |
1479 | break; |
1480 | case NBD_STRUCTURED_REPLY_MAGIC: |
1481 | ret = nbd_receive_structured_reply_chunk(ioc, &reply->structured, errp); |
1482 | if (ret < 0) { |
1483 | break; |
1484 | } |
1485 | type = nbd_reply_type_lookup(reply->structured.type); |
1486 | trace_nbd_receive_structured_reply_chunk(reply->structured.flags, |
1487 | reply->structured.type, type, |
1488 | reply->structured.handle, |
1489 | reply->structured.length); |
1490 | break; |
1491 | default: |
1492 | error_setg(errp, "invalid magic (got 0x%" PRIx32 ")" , reply->magic); |
1493 | return -EINVAL; |
1494 | } |
1495 | if (ret < 0) { |
1496 | return ret; |
1497 | } |
1498 | |
1499 | return 1; |
1500 | } |
1501 | |
1502 | |