1#pragma once
2
3#include <common/find_symbols.h>
4#include "domain.h"
5#include "tldLookup.h"
6
7namespace DB
8{
9
10struct ExtractFirstSignificantSubdomain
11{
12 static size_t getReserveLengthForElement() { return 10; }
13
14 static void execute(const Pos data, const size_t size, Pos & res_data, size_t & res_size, Pos * out_domain_end = nullptr)
15 {
16 res_data = data;
17 res_size = 0;
18
19 Pos tmp;
20 size_t domain_length;
21 ExtractDomain<true>::execute(data, size, tmp, domain_length);
22
23 if (domain_length == 0)
24 return;
25
26 if (out_domain_end)
27 *out_domain_end = tmp + domain_length;
28
29 /// cut useless dot
30 if (tmp[domain_length - 1] == '.')
31 --domain_length;
32
33 res_data = tmp;
34 res_size = domain_length;
35
36 auto begin = tmp;
37 auto end = begin + domain_length;
38 const char * last_3_periods[3]{};
39
40 auto pos = find_first_symbols<'.'>(begin, end);
41 while (pos < end)
42 {
43 last_3_periods[2] = last_3_periods[1];
44 last_3_periods[1] = last_3_periods[0];
45 last_3_periods[0] = pos;
46 pos = find_first_symbols<'.'>(pos + 1, end);
47 }
48
49 if (!last_3_periods[0])
50 return;
51
52 if (!last_3_periods[1])
53 {
54 res_size = last_3_periods[0] - begin;
55 return;
56 }
57
58 if (!last_3_periods[2])
59 last_3_periods[2] = begin - 1;
60
61 auto end_of_level_domain = find_first_symbols<'/'>(last_3_periods[0], end);
62 if (!end_of_level_domain)
63 {
64 end_of_level_domain = end;
65 }
66
67 if (tldLookup::is_valid(last_3_periods[1] + 1, end_of_level_domain - last_3_periods[1] - 1) != nullptr)
68 {
69 res_data += last_3_periods[2] + 1 - begin;
70 res_size = last_3_periods[1] - last_3_periods[2] - 1;
71 }
72 else
73 {
74 res_data += last_3_periods[1] + 1 - begin;
75 res_size = last_3_periods[0] - last_3_periods[1] - 1;
76 }
77 }
78};
79
80}
81
82