1 | #pragma once |
2 | |
3 | #include <Core/Types.h> |
4 | |
5 | #include <algorithm> |
6 | #include <cctype> |
7 | #include <cmath> |
8 | #include <memory> |
9 | #include <queue> |
10 | #include <utility> |
11 | |
12 | namespace DB |
13 | { |
14 | template <size_t MaxNumHints> |
15 | class NamePrompter |
16 | { |
17 | public: |
18 | using DistanceIndex = std::pair<size_t, size_t>; |
19 | using DistanceIndexQueue = std::priority_queue<DistanceIndex>; |
20 | |
21 | static std::vector<String> getHints(const String & name, const std::vector<String> & prompting_strings) |
22 | { |
23 | DistanceIndexQueue queue; |
24 | for (size_t i = 0; i < prompting_strings.size(); ++i) |
25 | appendToQueue(i, name, queue, prompting_strings); |
26 | return release(queue, prompting_strings); |
27 | } |
28 | |
29 | private: |
30 | static size_t levenshteinDistance(const String & lhs, const String & rhs) |
31 | { |
32 | size_t m = lhs.size(); |
33 | size_t n = rhs.size(); |
34 | |
35 | static constexpr size_t small_buffer_size = 64; |
36 | size_t small_buffer[small_buffer_size]; |
37 | std::unique_ptr<size_t[]> alloc_buffer; |
38 | size_t * row = small_buffer; |
39 | if (n + 1 > small_buffer_size) |
40 | { |
41 | row = new size_t[n + 1]; |
42 | alloc_buffer.reset(row); |
43 | } |
44 | |
45 | for (size_t i = 1; i <= n; ++i) |
46 | row[i] = i; |
47 | |
48 | for (size_t j = 1; j <= m; ++j) |
49 | { |
50 | row[0] = j; |
51 | size_t prev = j - 1; |
52 | for (size_t i = 1; i <= n; ++i) |
53 | { |
54 | size_t old = row[i]; |
55 | row[i] = std::min(prev + (std::tolower(lhs[j - 1]) != std::tolower(rhs[i - 1])), |
56 | std::min(row[i - 1], row[i]) + 1); |
57 | prev = old; |
58 | } |
59 | } |
60 | return row[n]; |
61 | } |
62 | |
63 | static void appendToQueue(size_t ind, const String & name, DistanceIndexQueue & queue, const std::vector<String> & prompting_strings) |
64 | { |
65 | const String & prompt = prompting_strings[ind]; |
66 | |
67 | /// Clang SimpleTypoCorrector logic |
68 | const size_t min_possible_edit_distance = std::abs(static_cast<int64_t>(name.size()) - static_cast<int64_t>(prompt.size())); |
69 | const size_t mistake_factor = (name.size() + 2) / 3; |
70 | if (min_possible_edit_distance > 0 && name.size() / min_possible_edit_distance < 3) |
71 | return; |
72 | |
73 | if (prompt.size() <= name.size() + mistake_factor && prompt.size() + mistake_factor >= name.size()) |
74 | { |
75 | size_t distance = levenshteinDistance(prompt, name); |
76 | if (distance <= mistake_factor) |
77 | { |
78 | queue.emplace(distance, ind); |
79 | if (queue.size() > MaxNumHints) |
80 | queue.pop(); |
81 | } |
82 | } |
83 | } |
84 | |
85 | static std::vector<String> release(DistanceIndexQueue & queue, const std::vector<String> & prompting_strings) |
86 | { |
87 | std::vector<String> ans; |
88 | ans.reserve(queue.size()); |
89 | while (!queue.empty()) |
90 | { |
91 | auto top = queue.top(); |
92 | queue.pop(); |
93 | ans.push_back(prompting_strings[top.second]); |
94 | } |
95 | std::reverse(ans.begin(), ans.end()); |
96 | return ans; |
97 | } |
98 | }; |
99 | |
100 | template <size_t MaxNumHints, class Self> |
101 | class IHints |
102 | { |
103 | public: |
104 | |
105 | virtual std::vector<String> getAllRegisteredNames() const = 0; |
106 | |
107 | std::vector<String> getHints(const String & name) const |
108 | { |
109 | static const auto registered_names = getAllRegisteredNames(); |
110 | return prompter.getHints(name, registered_names); |
111 | } |
112 | |
113 | virtual ~IHints() = default; |
114 | |
115 | private: |
116 | NamePrompter<MaxNumHints> prompter; |
117 | }; |
118 | |
119 | } |
120 | |