| 1 | #pragma once |
| 2 | |
| 3 | #include <Core/Types.h> |
| 4 | |
| 5 | #include <algorithm> |
| 6 | #include <cctype> |
| 7 | #include <cmath> |
| 8 | #include <memory> |
| 9 | #include <queue> |
| 10 | #include <utility> |
| 11 | |
| 12 | namespace DB |
| 13 | { |
| 14 | template <size_t MaxNumHints> |
| 15 | class NamePrompter |
| 16 | { |
| 17 | public: |
| 18 | using DistanceIndex = std::pair<size_t, size_t>; |
| 19 | using DistanceIndexQueue = std::priority_queue<DistanceIndex>; |
| 20 | |
| 21 | static std::vector<String> getHints(const String & name, const std::vector<String> & prompting_strings) |
| 22 | { |
| 23 | DistanceIndexQueue queue; |
| 24 | for (size_t i = 0; i < prompting_strings.size(); ++i) |
| 25 | appendToQueue(i, name, queue, prompting_strings); |
| 26 | return release(queue, prompting_strings); |
| 27 | } |
| 28 | |
| 29 | private: |
| 30 | static size_t levenshteinDistance(const String & lhs, const String & rhs) |
| 31 | { |
| 32 | size_t m = lhs.size(); |
| 33 | size_t n = rhs.size(); |
| 34 | |
| 35 | static constexpr size_t small_buffer_size = 64; |
| 36 | size_t small_buffer[small_buffer_size]; |
| 37 | std::unique_ptr<size_t[]> alloc_buffer; |
| 38 | size_t * row = small_buffer; |
| 39 | if (n + 1 > small_buffer_size) |
| 40 | { |
| 41 | row = new size_t[n + 1]; |
| 42 | alloc_buffer.reset(row); |
| 43 | } |
| 44 | |
| 45 | for (size_t i = 1; i <= n; ++i) |
| 46 | row[i] = i; |
| 47 | |
| 48 | for (size_t j = 1; j <= m; ++j) |
| 49 | { |
| 50 | row[0] = j; |
| 51 | size_t prev = j - 1; |
| 52 | for (size_t i = 1; i <= n; ++i) |
| 53 | { |
| 54 | size_t old = row[i]; |
| 55 | row[i] = std::min(prev + (std::tolower(lhs[j - 1]) != std::tolower(rhs[i - 1])), |
| 56 | std::min(row[i - 1], row[i]) + 1); |
| 57 | prev = old; |
| 58 | } |
| 59 | } |
| 60 | return row[n]; |
| 61 | } |
| 62 | |
| 63 | static void appendToQueue(size_t ind, const String & name, DistanceIndexQueue & queue, const std::vector<String> & prompting_strings) |
| 64 | { |
| 65 | const String & prompt = prompting_strings[ind]; |
| 66 | |
| 67 | /// Clang SimpleTypoCorrector logic |
| 68 | const size_t min_possible_edit_distance = std::abs(static_cast<int64_t>(name.size()) - static_cast<int64_t>(prompt.size())); |
| 69 | const size_t mistake_factor = (name.size() + 2) / 3; |
| 70 | if (min_possible_edit_distance > 0 && name.size() / min_possible_edit_distance < 3) |
| 71 | return; |
| 72 | |
| 73 | if (prompt.size() <= name.size() + mistake_factor && prompt.size() + mistake_factor >= name.size()) |
| 74 | { |
| 75 | size_t distance = levenshteinDistance(prompt, name); |
| 76 | if (distance <= mistake_factor) |
| 77 | { |
| 78 | queue.emplace(distance, ind); |
| 79 | if (queue.size() > MaxNumHints) |
| 80 | queue.pop(); |
| 81 | } |
| 82 | } |
| 83 | } |
| 84 | |
| 85 | static std::vector<String> release(DistanceIndexQueue & queue, const std::vector<String> & prompting_strings) |
| 86 | { |
| 87 | std::vector<String> ans; |
| 88 | ans.reserve(queue.size()); |
| 89 | while (!queue.empty()) |
| 90 | { |
| 91 | auto top = queue.top(); |
| 92 | queue.pop(); |
| 93 | ans.push_back(prompting_strings[top.second]); |
| 94 | } |
| 95 | std::reverse(ans.begin(), ans.end()); |
| 96 | return ans; |
| 97 | } |
| 98 | }; |
| 99 | |
| 100 | template <size_t MaxNumHints, class Self> |
| 101 | class IHints |
| 102 | { |
| 103 | public: |
| 104 | |
| 105 | virtual std::vector<String> getAllRegisteredNames() const = 0; |
| 106 | |
| 107 | std::vector<String> getHints(const String & name) const |
| 108 | { |
| 109 | static const auto registered_names = getAllRegisteredNames(); |
| 110 | return prompter.getHints(name, registered_names); |
| 111 | } |
| 112 | |
| 113 | virtual ~IHints() = default; |
| 114 | |
| 115 | private: |
| 116 | NamePrompter<MaxNumHints> prompter; |
| 117 | }; |
| 118 | |
| 119 | } |
| 120 | |