| 1 | #pragma once |
| 2 | |
| 3 | #include <common/Types.h> |
| 4 | #include <string.h> |
| 5 | #include <algorithm> |
| 6 | #include <utility> |
| 7 | #include <ext/range.h> |
| 8 | #include <Common/hex.h> |
| 9 | #include <Common/StringUtils/StringUtils.h> |
| 10 | |
| 11 | constexpr size_t IPV4_BINARY_LENGTH = 4; |
| 12 | constexpr size_t IPV6_BINARY_LENGTH = 16; |
| 13 | constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte. |
| 14 | constexpr size_t IPV6_MAX_TEXT_LENGTH = 39; |
| 15 | |
| 16 | namespace DB |
| 17 | { |
| 18 | |
| 19 | |
| 20 | /** Rewritten inet_ntop6 from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c |
| 21 | * performs significantly faster than the reference implementation due to the absence of sprintf calls, |
| 22 | * bounds checking, unnecessary string copying and length calculation. |
| 23 | */ |
| 24 | void formatIPv6(const unsigned char * src, char *& dst, UInt8 zeroed_tail_bytes_count = 0); |
| 25 | |
| 26 | /** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv4 string. |
| 27 | * |
| 28 | * Parses the input string `src` and stores binary BE value into buffer pointed by `dst`, |
| 29 | * which should be long enough. |
| 30 | * That is "127.0.0.1" becomes 0x7f000001. |
| 31 | * |
| 32 | * In case of failure returns false and doesn't modify buffer pointed by `dst`. |
| 33 | * |
| 34 | * @param src - input string, expected to be non-null and null-terminated right after the IPv4 string value. |
| 35 | * @param dst - where to put output bytes, expected to be non-null and atleast IPV4_BINARY_LENGTH-long. |
| 36 | * @return false if parsing failed, true otherwise. |
| 37 | */ |
| 38 | inline bool parseIPv4(const char * src, unsigned char * dst) |
| 39 | { |
| 40 | UInt32 result = 0; |
| 41 | for (int offset = 24; offset >= 0; offset -= 8) |
| 42 | { |
| 43 | UInt32 value = 0; |
| 44 | size_t len = 0; |
| 45 | while (isNumericASCII(*src) && len <= 3) |
| 46 | { |
| 47 | value = value * 10 + (*src - '0'); |
| 48 | ++len; |
| 49 | ++src; |
| 50 | } |
| 51 | if (len == 0 || value > 255 || (offset > 0 && *src != '.')) |
| 52 | return false; |
| 53 | result |= value << offset; |
| 54 | ++src; |
| 55 | } |
| 56 | if (*(src - 1) != '\0') |
| 57 | return false; |
| 58 | |
| 59 | memcpy(dst, &result, sizeof(result)); |
| 60 | return true; |
| 61 | } |
| 62 | |
| 63 | /** Unsafe (no bounds-checking for src nor dst), optimized version of parsing IPv6 string. |
| 64 | * |
| 65 | * Slightly altered implementation from http://svn.apache.org/repos/asf/apr/apr/trunk/network_io/unix/inet_pton.c |
| 66 | * Parses the input string `src` and stores binary LE value into buffer pointed by `dst`, |
| 67 | * which should be long enough. In case of failure zeroes |
| 68 | * IPV6_BINARY_LENGTH bytes of buffer pointed by `dst`. |
| 69 | * |
| 70 | * @param src - input string, expected to be non-null and null-terminated right after the IPv6 string value. |
| 71 | * @param dst - where to put output bytes, expected to be non-null and atleast IPV6_BINARY_LENGTH-long. |
| 72 | * @return false if parsing failed, true otherwise. |
| 73 | */ |
| 74 | inline bool parseIPv6(const char * src, unsigned char * dst) |
| 75 | { |
| 76 | const auto clear_dst = [dst]() |
| 77 | { |
| 78 | memset(dst, '\0', IPV6_BINARY_LENGTH); |
| 79 | return false; |
| 80 | }; |
| 81 | |
| 82 | /// Leading :: requires some special handling. |
| 83 | if (*src == ':') |
| 84 | if (*++src != ':') |
| 85 | return clear_dst(); |
| 86 | |
| 87 | unsigned char tmp[IPV6_BINARY_LENGTH]{}; |
| 88 | auto tp = tmp; |
| 89 | auto endp = tp + IPV6_BINARY_LENGTH; |
| 90 | auto curtok = src; |
| 91 | auto saw_xdigit = false; |
| 92 | UInt32 val{}; |
| 93 | unsigned char * colonp = nullptr; |
| 94 | |
| 95 | /// Assuming zero-terminated string. |
| 96 | while (const auto ch = *src++) |
| 97 | { |
| 98 | const auto num = unhex(ch); |
| 99 | |
| 100 | if (num != '\xff') |
| 101 | { |
| 102 | val <<= 4; |
| 103 | val |= num; |
| 104 | if (val > 0xffffu) |
| 105 | return clear_dst(); |
| 106 | |
| 107 | saw_xdigit = 1; |
| 108 | continue; |
| 109 | } |
| 110 | |
| 111 | if (ch == ':') |
| 112 | { |
| 113 | curtok = src; |
| 114 | if (!saw_xdigit) |
| 115 | { |
| 116 | if (colonp) |
| 117 | return clear_dst(); |
| 118 | |
| 119 | colonp = tp; |
| 120 | continue; |
| 121 | } |
| 122 | |
| 123 | if (tp + sizeof(UInt16) > endp) |
| 124 | return clear_dst(); |
| 125 | |
| 126 | *tp++ = static_cast<unsigned char>((val >> 8) & 0xffu); |
| 127 | *tp++ = static_cast<unsigned char>(val & 0xffu); |
| 128 | saw_xdigit = false; |
| 129 | val = 0; |
| 130 | continue; |
| 131 | } |
| 132 | |
| 133 | if (ch == '.' && (tp + IPV4_BINARY_LENGTH) <= endp) |
| 134 | { |
| 135 | if (!parseIPv4(curtok, tp)) |
| 136 | return clear_dst(); |
| 137 | std::reverse(tp, tp + IPV4_BINARY_LENGTH); |
| 138 | |
| 139 | tp += IPV4_BINARY_LENGTH; |
| 140 | saw_xdigit = false; |
| 141 | break; /* '\0' was seen by ipv4_scan(). */ |
| 142 | } |
| 143 | |
| 144 | return clear_dst(); |
| 145 | } |
| 146 | |
| 147 | if (saw_xdigit) |
| 148 | { |
| 149 | if (tp + sizeof(UInt16) > endp) |
| 150 | return clear_dst(); |
| 151 | |
| 152 | *tp++ = static_cast<unsigned char>((val >> 8) & 0xffu); |
| 153 | *tp++ = static_cast<unsigned char>(val & 0xffu); |
| 154 | } |
| 155 | |
| 156 | if (colonp) |
| 157 | { |
| 158 | /* |
| 159 | * Since some memmove()'s erroneously fail to handle |
| 160 | * overlapping regions, we'll do the shift by hand. |
| 161 | */ |
| 162 | const auto n = tp - colonp; |
| 163 | |
| 164 | for (int i = 1; i <= n; ++i) |
| 165 | { |
| 166 | endp[- i] = colonp[n - i]; |
| 167 | colonp[n - i] = 0; |
| 168 | } |
| 169 | tp = endp; |
| 170 | } |
| 171 | |
| 172 | if (tp != endp) |
| 173 | return clear_dst(); |
| 174 | |
| 175 | memcpy(dst, tmp, sizeof(tmp)); |
| 176 | return true; |
| 177 | } |
| 178 | |
| 179 | /** Format 4-byte binary sequesnce as IPv4 text: 'aaa.bbb.ccc.ddd', |
| 180 | * expects inout to be in BE-format, that is 0x7f000001 => "127.0.0.1". |
| 181 | * |
| 182 | * Any number of the tail bytes can be masked with given mask string. |
| 183 | * |
| 184 | * Assumptions: |
| 185 | * src is IPV4_BINARY_LENGTH long, |
| 186 | * dst is IPV4_MAX_TEXT_LENGTH long, |
| 187 | * mask_tail_octets <= IPV4_BINARY_LENGTH |
| 188 | * mask_string is NON-NULL, if mask_tail_octets > 0. |
| 189 | * |
| 190 | * Examples: |
| 191 | * formatIPv4(&0x7f000001, dst, mask_tail_octets = 0, nullptr); |
| 192 | * > dst == "127.0.0.1" |
| 193 | * formatIPv4(&0x7f000001, dst, mask_tail_octets = 1, "xxx"); |
| 194 | * > dst == "127.0.0.xxx" |
| 195 | * formatIPv4(&0x7f000001, dst, mask_tail_octets = 1, "0"); |
| 196 | * > dst == "127.0.0.0" |
| 197 | */ |
| 198 | inline void formatIPv4(const unsigned char * src, char *& dst, UInt8 mask_tail_octets = 0, const char * mask_string = "xxx" ) |
| 199 | { |
| 200 | extern const char one_byte_to_string_lookup_table[256][4]; |
| 201 | |
| 202 | const size_t mask_length = mask_string ? strlen(mask_string) : 0; |
| 203 | const size_t limit = std::min(IPV4_BINARY_LENGTH, IPV4_BINARY_LENGTH - mask_tail_octets); |
| 204 | for (size_t octet = 0; octet < limit; ++octet) |
| 205 | { |
| 206 | const UInt8 value = static_cast<UInt8>(src[IPV4_BINARY_LENGTH - octet - 1]); |
| 207 | auto rep = one_byte_to_string_lookup_table[value]; |
| 208 | const UInt8 len = rep[0]; |
| 209 | const char* str = rep + 1; |
| 210 | |
| 211 | memcpy(dst, str, len); |
| 212 | dst += len; |
| 213 | *dst++ = '.'; |
| 214 | } |
| 215 | |
| 216 | for (size_t mask = 0; mask < mask_tail_octets; ++mask) |
| 217 | { |
| 218 | memcpy(dst, mask_string, mask_length); |
| 219 | dst += mask_length; |
| 220 | |
| 221 | *dst++ = '.'; |
| 222 | } |
| 223 | |
| 224 | dst[-1] = '\0'; |
| 225 | } |
| 226 | |
| 227 | } |
| 228 | |