| 1 | #pragma once |
| 2 | |
| 3 | #include <common/find_symbols.h> |
| 4 | #include "domain.h" |
| 5 | #include "tldLookup.h" |
| 6 | |
| 7 | namespace DB |
| 8 | { |
| 9 | |
| 10 | struct ExtractFirstSignificantSubdomain |
| 11 | { |
| 12 | static size_t getReserveLengthForElement() { return 10; } |
| 13 | |
| 14 | static void execute(const Pos data, const size_t size, Pos & res_data, size_t & res_size, Pos * out_domain_end = nullptr) |
| 15 | { |
| 16 | res_data = data; |
| 17 | res_size = 0; |
| 18 | |
| 19 | Pos tmp; |
| 20 | size_t domain_length; |
| 21 | ExtractDomain<true>::execute(data, size, tmp, domain_length); |
| 22 | |
| 23 | if (domain_length == 0) |
| 24 | return; |
| 25 | |
| 26 | if (out_domain_end) |
| 27 | *out_domain_end = tmp + domain_length; |
| 28 | |
| 29 | /// cut useless dot |
| 30 | if (tmp[domain_length - 1] == '.') |
| 31 | --domain_length; |
| 32 | |
| 33 | res_data = tmp; |
| 34 | res_size = domain_length; |
| 35 | |
| 36 | auto begin = tmp; |
| 37 | auto end = begin + domain_length; |
| 38 | const char * last_3_periods[3]{}; |
| 39 | |
| 40 | auto pos = find_first_symbols<'.'>(begin, end); |
| 41 | while (pos < end) |
| 42 | { |
| 43 | last_3_periods[2] = last_3_periods[1]; |
| 44 | last_3_periods[1] = last_3_periods[0]; |
| 45 | last_3_periods[0] = pos; |
| 46 | pos = find_first_symbols<'.'>(pos + 1, end); |
| 47 | } |
| 48 | |
| 49 | if (!last_3_periods[0]) |
| 50 | return; |
| 51 | |
| 52 | if (!last_3_periods[1]) |
| 53 | { |
| 54 | res_size = last_3_periods[0] - begin; |
| 55 | return; |
| 56 | } |
| 57 | |
| 58 | if (!last_3_periods[2]) |
| 59 | last_3_periods[2] = begin - 1; |
| 60 | |
| 61 | auto end_of_level_domain = find_first_symbols<'/'>(last_3_periods[0], end); |
| 62 | if (!end_of_level_domain) |
| 63 | { |
| 64 | end_of_level_domain = end; |
| 65 | } |
| 66 | |
| 67 | if (tldLookup::is_valid(last_3_periods[1] + 1, end_of_level_domain - last_3_periods[1] - 1) != nullptr) |
| 68 | { |
| 69 | res_data += last_3_periods[2] + 1 - begin; |
| 70 | res_size = last_3_periods[1] - last_3_periods[2] - 1; |
| 71 | } |
| 72 | else |
| 73 | { |
| 74 | res_data += last_3_periods[1] + 1 - begin; |
| 75 | res_size = last_3_periods[0] - last_3_periods[1] - 1; |
| 76 | } |
| 77 | } |
| 78 | }; |
| 79 | |
| 80 | } |
| 81 | |
| 82 | |