1 | #pragma once |
2 | |
3 | #include <common/Types.h> |
4 | #include <string.h> |
5 | #include <algorithm> |
6 | #include <utility> |
7 | #include <ext/range.h> |
8 | #include <Common/hex.h> |
9 | #include <Common/StringUtils/StringUtils.h> |
10 | |
11 | constexpr size_t IPV4_BINARY_LENGTH = 4; |
12 | constexpr size_t IPV6_BINARY_LENGTH = 16; |
13 | constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte. |
14 | constexpr size_t IPV6_MAX_TEXT_LENGTH = 39; |
15 | |
16 | namespace DB |
17 | { |
18 | |
19 | |
20 | /** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c |
21 | * performs significantly faster than the reference implementation due to the absence of sprintf calls, |
22 | * bounds checking, unnecessary string copying and length calculation. |
23 | */ |
24 | void formatIPv6(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count = 0); |
25 | |
26 | /** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. |
27 | * |
28 | * Parses the input string `src` and stores binary BE value into buffer pointed by `dst`, |
29 | * which should be long enough. |
30 | * That is "127.0.0.1" becomes 0x7f000001. |
31 | * |
32 | * In case of failure returns false and doesn't modify buffer pointed by `dst`. |
33 | * |
34 | * @param src - input string, expected to be non-null and null-terminated right after the IPv4 string value. |
35 | * @param dst - where to put output bytes, expected to be non-null and atleast IPV4_BINARY_LENGTH-long. |
36 | * @return false if parsing failed, true otherwise. |
37 | */ |
38 | inline bool parseIPv4(const char * src, unsigned char * dst) |
39 | { |
40 | UInt32 result = 0; |
41 | for (int offset = 24; offset >= 0; offset -= 8) |
42 | { |
43 | UInt32 value = 0; |
44 | size_t len = 0; |
45 | while (isNumericASCII(*src) && len <= 3) |
46 | { |
47 | value = value * 10 + (*src - '0'); |
48 | ++len; |
49 | ++src; |
50 | } |
51 | if (len == 0 || value > 255 || (offset > 0 && *src != '.')) |
52 | return false; |
53 | result |= value << offset; |
54 | ++src; |
55 | } |
56 | if (*(src - 1) != '\0') |
57 | return false; |
58 | |
59 | memcpy(dst, &result, sizeof(result)); |
60 | return true; |
61 | } |
62 | |
63 | /** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string. |
64 | * |
65 | * Slightly altered implementation from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c |
66 | * Parses the input string `src` and stores binary LE value into buffer pointed by `dst`, |
67 | * which should be long enough. In case of failure zeroes |
68 | * IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`. |
69 | * |
70 | * @param src - input string, expected to be non-null and null-terminated right after the IPv6 string value. |
71 | * @param dst - where to put output bytes, expected to be non-null and atleast IPV6_BINARY_LENGTH-long. |
72 | * @return false if parsing failed, true otherwise. |
73 | */ |
74 | inline bool parseIPv6(const char * src, unsigned char * dst) |
75 | { |
76 | const auto clear_dst = [dst]() |
77 | { |
78 | memset(dst, '\0', IPV6_BINARY_LENGTH); |
79 | return false; |
80 | }; |
81 | |
82 | /// Leading :: requires some special handling. |
83 | if (*src == ':') |
84 | if (*++src != ':') |
85 | return clear_dst(); |
86 | |
87 | unsigned char tmp[IPV6_BINARY_LENGTH]{}; |
88 | auto tp = tmp; |
89 | auto endp = tp + IPV6_BINARY_LENGTH; |
90 | auto curtok = src; |
91 | auto saw_xdigit = false; |
92 | UInt32 val{}; |
93 | unsigned char * colonp = nullptr; |
94 | |
95 | /// Assuming zero-terminated string. |
96 | while (const auto ch = *src++) |
97 | { |
98 | const auto num = unhex(ch); |
99 | |
100 | if (num != '\xff') |
101 | { |
102 | val <<= 4; |
103 | val |= num; |
104 | if (val > 0xffffu) |
105 | return clear_dst(); |
106 | |
107 | saw_xdigit = 1; |
108 | continue; |
109 | } |
110 | |
111 | if (ch == ':') |
112 | { |
113 | curtok = src; |
114 | if (!saw_xdigit) |
115 | { |
116 | if (colonp) |
117 | return clear_dst(); |
118 | |
119 | colonp = tp; |
120 | continue; |
121 | } |
122 | |
123 | if (tp + sizeof(UInt16) > endp) |
124 | return clear_dst(); |
125 | |
126 | *tp++ = static_cast<unsigned char>((val >> 8) & 0xffu); |
127 | *tp++ = static_cast<unsigned char>(val & 0xffu); |
128 | saw_xdigit = false; |
129 | val = 0; |
130 | continue; |
131 | } |
132 | |
133 | if (ch == '.' && (tp + IPV4_BINARY_LENGTH) <= endp) |
134 | { |
135 | if (!parseIPv4(curtok, tp)) |
136 | return clear_dst(); |
137 | std::reverse(tp, tp + IPV4_BINARY_LENGTH); |
138 | |
139 | tp += IPV4_BINARY_LENGTH; |
140 | saw_xdigit = false; |
141 | break; /* '\0' was seen by ipv4_scan(). */ |
142 | } |
143 | |
144 | return clear_dst(); |
145 | } |
146 | |
147 | if (saw_xdigit) |
148 | { |
149 | if (tp + sizeof(UInt16) > endp) |
150 | return clear_dst(); |
151 | |
152 | *tp++ = static_cast<unsigned char>((val >> 8) & 0xffu); |
153 | *tp++ = static_cast<unsigned char>(val & 0xffu); |
154 | } |
155 | |
156 | if (colonp) |
157 | { |
158 | /* |
159 | * Since some memmove()'s erroneously fail to handle |
160 | * overlapping regions, we'll do the shift by hand. |
161 | */ |
162 | const auto n = tp - colonp; |
163 | |
164 | for (int i = 1; i <= n; ++i) |
165 | { |
166 | endp[- i] = colonp[n - i]; |
167 | colonp[n - i] = 0; |
168 | } |
169 | tp = endp; |
170 | } |
171 | |
172 | if (tp != endp) |
173 | return clear_dst(); |
174 | |
175 | memcpy(dst, tmp, sizeof(tmp)); |
176 | return true; |
177 | } |
178 | |
179 | /** Format 4-byte binary sequesnce as IPv4 text: 'aaa.bbb.ccc.ddd', |
180 | * expects inout to be in BE-format, that is 0x7f000001 => "127.0.0.1". |
181 | * |
182 | * Any number of the tail bytes can be masked with given mask string. |
183 | * |
184 | * Assumptions: |
185 | * src is IPV4_BINARY_LENGTH long, |
186 | * dst is IPV4_MAX_TEXT_LENGTH long, |
187 | * mask_tail_octets <= IPV4_BINARY_LENGTH |
188 | * mask_string is NON-NULL, if mask_tail_octets > 0. |
189 | * |
190 | * Examples: |
191 | * formatIPv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr); |
192 | * > dst == "127.0.0.1" |
193 | * formatIPv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx"); |
194 | * > dst == "127.0.0.xxx" |
195 | * formatIPv4(&0x7f000001, dst, mask_tail_octets = 1, "0"); |
196 | * > dst == "127.0.0.0" |
197 | */ |
198 | inline void formatIPv4(const unsigned char * src, char *& dst, UInt8 mask_tail_octets = 0, const char * mask_string = "xxx" ) |
199 | { |
200 | extern const char one_byte_to_string_lookup_table[256][4]; |
201 | |
202 | const size_t mask_length = mask_string ? strlen(mask_string) : 0; |
203 | const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets); |
204 | for (size_t octet = 0; octet < limit; ++octet) |
205 | { |
206 | const UInt8 value = static_cast<UInt8>(src[IPV4_BINARY_LENGTH - octet - 1]); |
207 | auto rep = one_byte_to_string_lookup_table[value]; |
208 | const UInt8 len = rep[0]; |
209 | const char* str = rep + 1; |
210 | |
211 | memcpy(dst, str, len); |
212 | dst += len; |
213 | *dst++ = '.'; |
214 | } |
215 | |
216 | for (size_t mask = 0; mask < mask_tail_octets; ++mask) |
217 | { |
218 | memcpy(dst, mask_string, mask_length); |
219 | dst += mask_length; |
220 | |
221 | *dst++ = '.'; |
222 | } |
223 | |
224 | dst[-1] = '\0'; |
225 | } |
226 | |
227 | } |
228 | |