1#pragma once
2
3#include <Core/Types.h>
4
5#include <algorithm>
6#include <cctype>
7#include <cmath>
8#include <memory>
9#include <queue>
10#include <utility>
11
12namespace DB
13{
14template <size_t MaxNumHints>
15class NamePrompter
16{
17public:
18 using DistanceIndex = std::pair<size_t, size_t>;
19 using DistanceIndexQueue = std::priority_queue<DistanceIndex>;
20
21 static std::vector<String> getHints(const String & name, const std::vector<String> & prompting_strings)
22 {
23 DistanceIndexQueue queue;
24 for (size_t i = 0; i < prompting_strings.size(); ++i)
25 appendToQueue(i, name, queue, prompting_strings);
26 return release(queue, prompting_strings);
27 }
28
29private:
30 static size_t levenshteinDistance(const String & lhs, const String & rhs)
31 {
32 size_t m = lhs.size();
33 size_t n = rhs.size();
34
35 static constexpr size_t small_buffer_size = 64;
36 size_t small_buffer[small_buffer_size];
37 std::unique_ptr<size_t[]> alloc_buffer;
38 size_t * row = small_buffer;
39 if (n + 1 > small_buffer_size)
40 {
41 row = new size_t[n + 1];
42 alloc_buffer.reset(row);
43 }
44
45 for (size_t i = 1; i <= n; ++i)
46 row[i] = i;
47
48 for (size_t j = 1; j <= m; ++j)
49 {
50 row[0] = j;
51 size_t prev = j - 1;
52 for (size_t i = 1; i <= n; ++i)
53 {
54 size_t old = row[i];
55 row[i] = std::min(prev + (std::tolower(lhs[j - 1]) != std::tolower(rhs[i - 1])),
56 std::min(row[i - 1], row[i]) + 1);
57 prev = old;
58 }
59 }
60 return row[n];
61 }
62
63 static void appendToQueue(size_t ind, const String & name, DistanceIndexQueue & queue, const std::vector<String> & prompting_strings)
64 {
65 const String & prompt = prompting_strings[ind];
66
67 /// Clang SimpleTypoCorrector logic
68 const size_t min_possible_edit_distance = std::abs(static_cast<int64_t>(name.size()) - static_cast<int64_t>(prompt.size()));
69 const size_t mistake_factor = (name.size() + 2) / 3;
70 if (min_possible_edit_distance > 0 && name.size() / min_possible_edit_distance < 3)
71 return;
72
73 if (prompt.size() <= name.size() + mistake_factor && prompt.size() + mistake_factor >= name.size())
74 {
75 size_t distance = levenshteinDistance(prompt, name);
76 if (distance <= mistake_factor)
77 {
78 queue.emplace(distance, ind);
79 if (queue.size() > MaxNumHints)
80 queue.pop();
81 }
82 }
83 }
84
85 static std::vector<String> release(DistanceIndexQueue & queue, const std::vector<String> & prompting_strings)
86 {
87 std::vector<String> ans;
88 ans.reserve(queue.size());
89 while (!queue.empty())
90 {
91 auto top = queue.top();
92 queue.pop();
93 ans.push_back(prompting_strings[top.second]);
94 }
95 std::reverse(ans.begin(), ans.end());
96 return ans;
97 }
98};
99
100template <size_t MaxNumHints, class Self>
101class IHints
102{
103public:
104
105 virtual std::vector<String> getAllRegisteredNames() const = 0;
106
107 std::vector<String> getHints(const String & name) const
108 {
109 static const auto registered_names = getAllRegisteredNames();
110 return prompter.getHints(name, registered_names);
111 }
112
113 virtual ~IHints() = default;
114
115private:
116 NamePrompter<MaxNumHints> prompter;
117};
118
119}
120