1 | #pragma once |
2 | |
3 | #include <common/find_symbols.h> |
4 | #include "domain.h" |
5 | #include "tldLookup.h" |
6 | |
7 | namespace DB |
8 | { |
9 | |
10 | struct ExtractFirstSignificantSubdomain |
11 | { |
12 | static size_t getReserveLengthForElement() { return 10; } |
13 | |
14 | static void execute(const Pos data, const size_t size, Pos & res_data, size_t & res_size, Pos * out_domain_end = nullptr) |
15 | { |
16 | res_data = data; |
17 | res_size = 0; |
18 | |
19 | Pos tmp; |
20 | size_t domain_length; |
21 | ExtractDomain<true>::execute(data, size, tmp, domain_length); |
22 | |
23 | if (domain_length == 0) |
24 | return; |
25 | |
26 | if (out_domain_end) |
27 | *out_domain_end = tmp + domain_length; |
28 | |
29 | /// cut useless dot |
30 | if (tmp[domain_length - 1] == '.') |
31 | --domain_length; |
32 | |
33 | res_data = tmp; |
34 | res_size = domain_length; |
35 | |
36 | auto begin = tmp; |
37 | auto end = begin + domain_length; |
38 | const char * last_3_periods[3]{}; |
39 | |
40 | auto pos = find_first_symbols<'.'>(begin, end); |
41 | while (pos < end) |
42 | { |
43 | last_3_periods[2] = last_3_periods[1]; |
44 | last_3_periods[1] = last_3_periods[0]; |
45 | last_3_periods[0] = pos; |
46 | pos = find_first_symbols<'.'>(pos + 1, end); |
47 | } |
48 | |
49 | if (!last_3_periods[0]) |
50 | return; |
51 | |
52 | if (!last_3_periods[1]) |
53 | { |
54 | res_size = last_3_periods[0] - begin; |
55 | return; |
56 | } |
57 | |
58 | if (!last_3_periods[2]) |
59 | last_3_periods[2] = begin - 1; |
60 | |
61 | auto end_of_level_domain = find_first_symbols<'/'>(last_3_periods[0], end); |
62 | if (!end_of_level_domain) |
63 | { |
64 | end_of_level_domain = end; |
65 | } |
66 | |
67 | if (tldLookup::is_valid(last_3_periods[1] + 1, end_of_level_domain - last_3_periods[1] - 1) != nullptr) |
68 | { |
69 | res_data += last_3_periods[2] + 1 - begin; |
70 | res_size = last_3_periods[1] - last_3_periods[2] - 1; |
71 | } |
72 | else |
73 | { |
74 | res_data += last_3_periods[1] + 1 - begin; |
75 | res_size = last_3_periods[0] - last_3_periods[1] - 1; |
76 | } |
77 | } |
78 | }; |
79 | |
80 | } |
81 | |
82 | |