1 | /* |
2 | * QEMU network structures definitions and helper functions |
3 | * |
4 | * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) |
5 | * |
6 | * Developed by Daynix Computing LTD (http://www.daynix.com) |
7 | * |
8 | * Authors: |
9 | * Dmitry Fleytman <dmitry@daynix.com> |
10 | * Tamir Shomer <tamirs@daynix.com> |
11 | * Yan Vugenfirer <yan@daynix.com> |
12 | * |
13 | * This work is licensed under the terms of the GNU GPL, version 2 or later. |
14 | * See the COPYING file in the top-level directory. |
15 | * |
16 | */ |
17 | |
18 | #include "qemu/osdep.h" |
19 | #include "net/eth.h" |
20 | #include "net/checksum.h" |
21 | #include "net/tap.h" |
22 | |
23 | void (struct eth_header *ehdr, uint16_t vlan_tag, |
24 | uint16_t vlan_ethtype, bool *is_new) |
25 | { |
26 | struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr); |
27 | |
28 | switch (be16_to_cpu(ehdr->h_proto)) { |
29 | case ETH_P_VLAN: |
30 | case ETH_P_DVLAN: |
31 | /* vlan hdr exists */ |
32 | *is_new = false; |
33 | break; |
34 | |
35 | default: |
36 | /* No VLAN header, put a new one */ |
37 | vhdr->h_proto = ehdr->h_proto; |
38 | ehdr->h_proto = cpu_to_be16(vlan_ethtype); |
39 | *is_new = true; |
40 | break; |
41 | } |
42 | vhdr->h_tci = cpu_to_be16(vlan_tag); |
43 | } |
44 | |
45 | uint8_t |
46 | eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto) |
47 | { |
48 | uint8_t ecn_state = 0; |
49 | |
50 | if (l3_proto == ETH_P_IP) { |
51 | struct ip_header *iphdr = (struct ip_header *) l3_hdr; |
52 | |
53 | if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) { |
54 | if (IPTOS_ECN(iphdr->ip_tos) == IPTOS_ECN_CE) { |
55 | ecn_state = VIRTIO_NET_HDR_GSO_ECN; |
56 | } |
57 | if (l4proto == IP_PROTO_TCP) { |
58 | return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state; |
59 | } else if (l4proto == IP_PROTO_UDP) { |
60 | return VIRTIO_NET_HDR_GSO_UDP | ecn_state; |
61 | } |
62 | } |
63 | } else if (l3_proto == ETH_P_IPV6) { |
64 | struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr; |
65 | |
66 | if (IP6_ECN(ip6hdr->ip6_ecn_acc) == IP6_ECN_CE) { |
67 | ecn_state = VIRTIO_NET_HDR_GSO_ECN; |
68 | } |
69 | |
70 | if (l4proto == IP_PROTO_TCP) { |
71 | return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state; |
72 | } |
73 | } |
74 | |
75 | /* Unsupported offload */ |
76 | g_assert_not_reached(); |
77 | |
78 | return VIRTIO_NET_HDR_GSO_NONE | ecn_state; |
79 | } |
80 | |
81 | uint16_t |
82 | eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len) |
83 | { |
84 | uint16_t proto; |
85 | size_t copied; |
86 | size_t size = iov_size(l2hdr_iov, iovcnt); |
87 | size_t proto_offset = l2hdr_len - sizeof(proto); |
88 | |
89 | if (size < proto_offset) { |
90 | return ETH_P_UNKNOWN; |
91 | } |
92 | |
93 | copied = iov_to_buf(l2hdr_iov, iovcnt, proto_offset, |
94 | &proto, sizeof(proto)); |
95 | |
96 | return (copied == sizeof(proto)) ? be16_to_cpu(proto) : ETH_P_UNKNOWN; |
97 | } |
98 | |
99 | static bool |
100 | _eth_copy_chunk(size_t input_size, |
101 | const struct iovec *iov, int iovcnt, |
102 | size_t offset, size_t length, |
103 | void *buffer) |
104 | { |
105 | size_t copied; |
106 | |
107 | if (input_size < offset) { |
108 | return false; |
109 | } |
110 | |
111 | copied = iov_to_buf(iov, iovcnt, offset, buffer, length); |
112 | |
113 | if (copied < length) { |
114 | return false; |
115 | } |
116 | |
117 | return true; |
118 | } |
119 | |
120 | static bool |
121 | _eth_tcp_has_data(bool is_ip4, |
122 | const struct ip_header *ip4_hdr, |
123 | const struct ip6_header *ip6_hdr, |
124 | size_t full_ip6hdr_len, |
125 | const struct tcp_header *tcp) |
126 | { |
127 | uint32_t l4len; |
128 | |
129 | if (is_ip4) { |
130 | l4len = be16_to_cpu(ip4_hdr->ip_len) - IP_HDR_GET_LEN(ip4_hdr); |
131 | } else { |
132 | size_t opts_len = full_ip6hdr_len - sizeof(struct ip6_header); |
133 | l4len = be16_to_cpu(ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - opts_len; |
134 | } |
135 | |
136 | return l4len > TCP_HEADER_DATA_OFFSET(tcp); |
137 | } |
138 | |
139 | void eth_get_protocols(const struct iovec *iov, int iovcnt, |
140 | bool *isip4, bool *isip6, |
141 | bool *isudp, bool *istcp, |
142 | size_t *l3hdr_off, |
143 | size_t *l4hdr_off, |
144 | size_t *l5hdr_off, |
145 | eth_ip6_hdr_info *ip6hdr_info, |
146 | eth_ip4_hdr_info *ip4hdr_info, |
147 | eth_l4_hdr_info *l4hdr_info) |
148 | { |
149 | int proto; |
150 | bool fragment = false; |
151 | size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt); |
152 | size_t input_size = iov_size(iov, iovcnt); |
153 | size_t copied; |
154 | |
155 | *isip4 = *isip6 = *isudp = *istcp = false; |
156 | |
157 | proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len); |
158 | |
159 | *l3hdr_off = l2hdr_len; |
160 | |
161 | if (proto == ETH_P_IP) { |
162 | struct ip_header *iphdr = &ip4hdr_info->ip4_hdr; |
163 | |
164 | if (input_size < l2hdr_len) { |
165 | return; |
166 | } |
167 | |
168 | copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr)); |
169 | |
170 | *isip4 = true; |
171 | |
172 | if (copied < sizeof(*iphdr)) { |
173 | return; |
174 | } |
175 | |
176 | if (IP_HEADER_VERSION(iphdr) == IP_HEADER_VERSION_4) { |
177 | if (iphdr->ip_p == IP_PROTO_TCP) { |
178 | *istcp = true; |
179 | } else if (iphdr->ip_p == IP_PROTO_UDP) { |
180 | *isudp = true; |
181 | } |
182 | } |
183 | |
184 | ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr); |
185 | *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr); |
186 | |
187 | fragment = ip4hdr_info->fragment; |
188 | } else if (proto == ETH_P_IPV6) { |
189 | |
190 | *isip6 = true; |
191 | if (eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len, |
192 | ip6hdr_info)) { |
193 | if (ip6hdr_info->l4proto == IP_PROTO_TCP) { |
194 | *istcp = true; |
195 | } else if (ip6hdr_info->l4proto == IP_PROTO_UDP) { |
196 | *isudp = true; |
197 | } |
198 | } else { |
199 | return; |
200 | } |
201 | |
202 | *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len; |
203 | fragment = ip6hdr_info->fragment; |
204 | } |
205 | |
206 | if (!fragment) { |
207 | if (*istcp) { |
208 | *istcp = _eth_copy_chunk(input_size, |
209 | iov, iovcnt, |
210 | *l4hdr_off, sizeof(l4hdr_info->hdr.tcp), |
211 | &l4hdr_info->hdr.tcp); |
212 | |
213 | if (*istcp) { |
214 | *l5hdr_off = *l4hdr_off + |
215 | TCP_HEADER_DATA_OFFSET(&l4hdr_info->hdr.tcp); |
216 | |
217 | l4hdr_info->has_tcp_data = |
218 | _eth_tcp_has_data(proto == ETH_P_IP, |
219 | &ip4hdr_info->ip4_hdr, |
220 | &ip6hdr_info->ip6_hdr, |
221 | *l4hdr_off - *l3hdr_off, |
222 | &l4hdr_info->hdr.tcp); |
223 | } |
224 | } else if (*isudp) { |
225 | *isudp = _eth_copy_chunk(input_size, |
226 | iov, iovcnt, |
227 | *l4hdr_off, sizeof(l4hdr_info->hdr.udp), |
228 | &l4hdr_info->hdr.udp); |
229 | *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp); |
230 | } |
231 | } |
232 | } |
233 | |
234 | size_t |
235 | eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff, |
236 | uint8_t *new_ehdr_buf, |
237 | uint16_t *payload_offset, uint16_t *tci) |
238 | { |
239 | struct vlan_header vlan_hdr; |
240 | struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf; |
241 | |
242 | size_t copied = iov_to_buf(iov, iovcnt, iovoff, |
243 | new_ehdr, sizeof(*new_ehdr)); |
244 | |
245 | if (copied < sizeof(*new_ehdr)) { |
246 | return 0; |
247 | } |
248 | |
249 | switch (be16_to_cpu(new_ehdr->h_proto)) { |
250 | case ETH_P_VLAN: |
251 | case ETH_P_DVLAN: |
252 | copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr), |
253 | &vlan_hdr, sizeof(vlan_hdr)); |
254 | |
255 | if (copied < sizeof(vlan_hdr)) { |
256 | return 0; |
257 | } |
258 | |
259 | new_ehdr->h_proto = vlan_hdr.h_proto; |
260 | |
261 | *tci = be16_to_cpu(vlan_hdr.h_tci); |
262 | *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr); |
263 | |
264 | if (be16_to_cpu(new_ehdr->h_proto) == ETH_P_VLAN) { |
265 | |
266 | copied = iov_to_buf(iov, iovcnt, *payload_offset, |
267 | PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr)); |
268 | |
269 | if (copied < sizeof(vlan_hdr)) { |
270 | return 0; |
271 | } |
272 | |
273 | *payload_offset += sizeof(vlan_hdr); |
274 | |
275 | return sizeof(struct eth_header) + sizeof(struct vlan_header); |
276 | } else { |
277 | return sizeof(struct eth_header); |
278 | } |
279 | default: |
280 | return 0; |
281 | } |
282 | } |
283 | |
284 | size_t |
285 | eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, |
286 | uint16_t vet, uint8_t *new_ehdr_buf, |
287 | uint16_t *payload_offset, uint16_t *tci) |
288 | { |
289 | struct vlan_header vlan_hdr; |
290 | struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf; |
291 | |
292 | size_t copied = iov_to_buf(iov, iovcnt, iovoff, |
293 | new_ehdr, sizeof(*new_ehdr)); |
294 | |
295 | if (copied < sizeof(*new_ehdr)) { |
296 | return 0; |
297 | } |
298 | |
299 | if (be16_to_cpu(new_ehdr->h_proto) == vet) { |
300 | copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr), |
301 | &vlan_hdr, sizeof(vlan_hdr)); |
302 | |
303 | if (copied < sizeof(vlan_hdr)) { |
304 | return 0; |
305 | } |
306 | |
307 | new_ehdr->h_proto = vlan_hdr.h_proto; |
308 | |
309 | *tci = be16_to_cpu(vlan_hdr.h_tci); |
310 | *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr); |
311 | return sizeof(struct eth_header); |
312 | } |
313 | |
314 | return 0; |
315 | } |
316 | |
317 | void |
318 | eth_setup_ip4_fragmentation(const void *l2hdr, size_t l2hdr_len, |
319 | void *l3hdr, size_t l3hdr_len, |
320 | size_t l3payload_len, |
321 | size_t frag_offset, bool more_frags) |
322 | { |
323 | const struct iovec l2vec = { |
324 | .iov_base = (void *) l2hdr, |
325 | .iov_len = l2hdr_len |
326 | }; |
327 | |
328 | if (eth_get_l3_proto(&l2vec, 1, l2hdr_len) == ETH_P_IP) { |
329 | uint16_t orig_flags; |
330 | struct ip_header *iphdr = (struct ip_header *) l3hdr; |
331 | uint16_t frag_off_units = frag_offset / IP_FRAG_UNIT_SIZE; |
332 | uint16_t new_ip_off; |
333 | |
334 | assert(frag_offset % IP_FRAG_UNIT_SIZE == 0); |
335 | assert((frag_off_units & ~IP_OFFMASK) == 0); |
336 | |
337 | orig_flags = be16_to_cpu(iphdr->ip_off) & ~(IP_OFFMASK|IP_MF); |
338 | new_ip_off = frag_off_units | orig_flags | (more_frags ? IP_MF : 0); |
339 | iphdr->ip_off = cpu_to_be16(new_ip_off); |
340 | iphdr->ip_len = cpu_to_be16(l3payload_len + l3hdr_len); |
341 | } |
342 | } |
343 | |
344 | void |
345 | eth_fix_ip4_checksum(void *l3hdr, size_t l3hdr_len) |
346 | { |
347 | struct ip_header *iphdr = (struct ip_header *) l3hdr; |
348 | iphdr->ip_sum = 0; |
349 | iphdr->ip_sum = cpu_to_be16(net_raw_checksum(l3hdr, l3hdr_len)); |
350 | } |
351 | |
352 | uint32_t |
353 | eth_calc_ip4_pseudo_hdr_csum(struct ip_header *iphdr, |
354 | uint16_t csl, |
355 | uint32_t *cso) |
356 | { |
357 | struct ip_pseudo_header ipph; |
358 | ipph.ip_src = iphdr->ip_src; |
359 | ipph.ip_dst = iphdr->ip_dst; |
360 | ipph.ip_payload = cpu_to_be16(csl); |
361 | ipph.ip_proto = iphdr->ip_p; |
362 | ipph.zeros = 0; |
363 | *cso = sizeof(ipph); |
364 | return net_checksum_add(*cso, (uint8_t *) &ipph); |
365 | } |
366 | |
367 | uint32_t |
368 | eth_calc_ip6_pseudo_hdr_csum(struct ip6_header *iphdr, |
369 | uint16_t csl, |
370 | uint8_t l4_proto, |
371 | uint32_t *cso) |
372 | { |
373 | struct ip6_pseudo_header ipph; |
374 | ipph.ip6_src = iphdr->ip6_src; |
375 | ipph.ip6_dst = iphdr->ip6_dst; |
376 | ipph.len = cpu_to_be16(csl); |
377 | ipph.zero[0] = 0; |
378 | ipph.zero[1] = 0; |
379 | ipph.zero[2] = 0; |
380 | ipph.next_hdr = l4_proto; |
381 | *cso = sizeof(ipph); |
382 | return net_checksum_add(*cso, (uint8_t *)&ipph); |
383 | } |
384 | |
385 | static bool |
386 | (uint8_t hdr_type) |
387 | { |
388 | switch (hdr_type) { |
389 | case IP6_HOP_BY_HOP: |
390 | case IP6_ROUTING: |
391 | case IP6_FRAGMENT: |
392 | case IP6_ESP: |
393 | case IP6_AUTHENTICATION: |
394 | case IP6_DESTINATON: |
395 | case IP6_MOBILITY: |
396 | return true; |
397 | default: |
398 | return false; |
399 | } |
400 | } |
401 | |
402 | static bool |
403 | (const struct iovec *pkt, int pkt_frags, |
404 | size_t rthdr_offset, |
405 | struct ip6_ext_hdr *ext_hdr, |
406 | struct in6_address *dst_addr) |
407 | { |
408 | struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr; |
409 | |
410 | if ((rthdr->rtype == 2) && |
411 | (rthdr->len == sizeof(struct in6_address) / 8) && |
412 | (rthdr->segleft == 1)) { |
413 | |
414 | size_t input_size = iov_size(pkt, pkt_frags); |
415 | size_t bytes_read; |
416 | |
417 | if (input_size < rthdr_offset + sizeof(*ext_hdr)) { |
418 | return false; |
419 | } |
420 | |
421 | bytes_read = iov_to_buf(pkt, pkt_frags, |
422 | rthdr_offset + sizeof(*ext_hdr), |
423 | dst_addr, sizeof(*dst_addr)); |
424 | |
425 | return bytes_read == sizeof(*dst_addr); |
426 | } |
427 | |
428 | return false; |
429 | } |
430 | |
431 | static bool |
432 | (const struct iovec *pkt, int pkt_frags, |
433 | size_t dsthdr_offset, |
434 | struct ip6_ext_hdr *ext_hdr, |
435 | struct in6_address *src_addr) |
436 | { |
437 | size_t bytes_left = (ext_hdr->ip6r_len + 1) * 8 - sizeof(*ext_hdr); |
438 | struct ip6_option_hdr opthdr; |
439 | size_t opt_offset = dsthdr_offset + sizeof(*ext_hdr); |
440 | |
441 | while (bytes_left > sizeof(opthdr)) { |
442 | size_t input_size = iov_size(pkt, pkt_frags); |
443 | size_t bytes_read, optlen; |
444 | |
445 | if (input_size < opt_offset) { |
446 | return false; |
447 | } |
448 | |
449 | bytes_read = iov_to_buf(pkt, pkt_frags, opt_offset, |
450 | &opthdr, sizeof(opthdr)); |
451 | |
452 | if (bytes_read != sizeof(opthdr)) { |
453 | return false; |
454 | } |
455 | |
456 | optlen = (opthdr.type == IP6_OPT_PAD1) ? 1 |
457 | : (opthdr.len + sizeof(opthdr)); |
458 | |
459 | if (optlen > bytes_left) { |
460 | return false; |
461 | } |
462 | |
463 | if (opthdr.type == IP6_OPT_HOME) { |
464 | size_t input_size = iov_size(pkt, pkt_frags); |
465 | |
466 | if (input_size < opt_offset + sizeof(opthdr)) { |
467 | return false; |
468 | } |
469 | |
470 | bytes_read = iov_to_buf(pkt, pkt_frags, |
471 | opt_offset + sizeof(opthdr), |
472 | src_addr, sizeof(*src_addr)); |
473 | |
474 | return bytes_read == sizeof(*src_addr); |
475 | } |
476 | |
477 | opt_offset += optlen; |
478 | bytes_left -= optlen; |
479 | } |
480 | |
481 | return false; |
482 | } |
483 | |
484 | bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags, |
485 | size_t ip6hdr_off, eth_ip6_hdr_info *info) |
486 | { |
487 | struct ip6_ext_hdr ext_hdr; |
488 | size_t bytes_read; |
489 | uint8_t curr_ext_hdr_type; |
490 | size_t input_size = iov_size(pkt, pkt_frags); |
491 | |
492 | info->rss_ex_dst_valid = false; |
493 | info->rss_ex_src_valid = false; |
494 | info->fragment = false; |
495 | |
496 | if (input_size < ip6hdr_off) { |
497 | return false; |
498 | } |
499 | |
500 | bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off, |
501 | &info->ip6_hdr, sizeof(info->ip6_hdr)); |
502 | if (bytes_read < sizeof(info->ip6_hdr)) { |
503 | return false; |
504 | } |
505 | |
506 | info->full_hdr_len = sizeof(struct ip6_header); |
507 | |
508 | curr_ext_hdr_type = info->ip6_hdr.ip6_nxt; |
509 | |
510 | if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type)) { |
511 | info->l4proto = info->ip6_hdr.ip6_nxt; |
512 | info->has_ext_hdrs = false; |
513 | return true; |
514 | } |
515 | |
516 | info->has_ext_hdrs = true; |
517 | |
518 | do { |
519 | if (input_size < ip6hdr_off + info->full_hdr_len) { |
520 | return false; |
521 | } |
522 | |
523 | bytes_read = iov_to_buf(pkt, pkt_frags, ip6hdr_off + info->full_hdr_len, |
524 | &ext_hdr, sizeof(ext_hdr)); |
525 | |
526 | if (bytes_read < sizeof(ext_hdr)) { |
527 | return false; |
528 | } |
529 | |
530 | if (curr_ext_hdr_type == IP6_ROUTING) { |
531 | info->rss_ex_dst_valid = |
532 | _eth_get_rss_ex_dst_addr(pkt, pkt_frags, |
533 | ip6hdr_off + info->full_hdr_len, |
534 | &ext_hdr, &info->rss_ex_dst); |
535 | } else if (curr_ext_hdr_type == IP6_DESTINATON) { |
536 | info->rss_ex_src_valid = |
537 | _eth_get_rss_ex_src_addr(pkt, pkt_frags, |
538 | ip6hdr_off + info->full_hdr_len, |
539 | &ext_hdr, &info->rss_ex_src); |
540 | } else if (curr_ext_hdr_type == IP6_FRAGMENT) { |
541 | info->fragment = true; |
542 | } |
543 | |
544 | info->full_hdr_len += (ext_hdr.ip6r_len + 1) * IP6_EXT_GRANULARITY; |
545 | curr_ext_hdr_type = ext_hdr.ip6r_nxt; |
546 | } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type)); |
547 | |
548 | info->l4proto = ext_hdr.ip6r_nxt; |
549 | return true; |
550 | } |
551 | |