1#include <Columns/Collator.h>
2
3#include "config_core.h"
4
5#if USE_ICU
6 #include <unicode/ucol.h>
7 #include <unicode/unistr.h>
8 #include <unicode/locid.h>
9 #include <unicode/ucnv.h>
10#else
11 #ifdef __clang__
12 #pragma clang diagnostic ignored "-Wunused-private-field"
13 #pragma clang diagnostic ignored "-Wmissing-noreturn"
14 #endif
15#endif
16
17#include <Common/Exception.h>
18#include <IO/WriteHelpers.h>
19#include <Poco/String.h>
20#include <algorithm>
21
22
23namespace DB
24{
25 namespace ErrorCodes
26 {
27 extern const int UNSUPPORTED_COLLATION_LOCALE;
28 extern const int COLLATION_COMPARISON_FAILED;
29 extern const int SUPPORT_IS_DISABLED;
30 }
31}
32
33
34AvailableCollationLocales::AvailableCollationLocales()
35{
36#if USE_ICU
37 static const size_t MAX_LANG_LENGTH = 128;
38 size_t available_locales_count = ucol_countAvailable();
39 for (size_t i = 0; i < available_locales_count; ++i)
40 {
41 std::string locale_name = ucol_getAvailable(i);
42 UChar lang_buffer[MAX_LANG_LENGTH];
43 char normal_buf[MAX_LANG_LENGTH];
44 UErrorCode status = U_ZERO_ERROR;
45
46 /// All names will be in English language
47 size_t lang_length = uloc_getDisplayLanguage(
48 locale_name.c_str(), "en", lang_buffer, MAX_LANG_LENGTH, &status);
49 std::optional<std::string> lang;
50
51 if (!U_FAILURE(status))
52 {
53 /// Convert language name from UChar array to normal char array.
54 /// We use English language for name, so all UChar's length is equal to sizeof(char)
55 u_UCharsToChars(lang_buffer, normal_buf, lang_length);
56 lang.emplace(std::string(normal_buf, lang_length));
57 }
58
59 locales_map.emplace(Poco::toLower(locale_name), LocaleAndLanguage{locale_name, lang});
60 }
61
62#endif
63}
64
65const AvailableCollationLocales & AvailableCollationLocales::instance()
66{
67 static AvailableCollationLocales instance;
68 return instance;
69}
70
71AvailableCollationLocales::LocalesVector AvailableCollationLocales::getAvailableCollations() const
72{
73 LocalesVector result;
74 for (const auto & name_and_locale : locales_map)
75 result.push_back(name_and_locale.second);
76
77 auto comparator = [] (const LocaleAndLanguage & f, const LocaleAndLanguage & s)
78 {
79 return f.locale_name < s.locale_name;
80 };
81 std::sort(result.begin(), result.end(), comparator);
82
83 return result;
84}
85
86bool AvailableCollationLocales::isCollationSupported(const std::string & locale_name) const
87{
88 /// We support locale names in any case, so we have to convert all to lower case
89 return locales_map.count(Poco::toLower(locale_name));
90}
91
92Collator::Collator(const std::string & locale_)
93 : locale(Poco::toLower(locale_))
94{
95#if USE_ICU
96 /// We check it here, because ucol_open will fallback to default locale for
97 /// almost all random names.
98 if (!AvailableCollationLocales::instance().isCollationSupported(locale))
99 throw DB::Exception("Unsupported collation locale: " + locale, DB::ErrorCodes::UNSUPPORTED_COLLATION_LOCALE);
100
101 UErrorCode status = U_ZERO_ERROR;
102
103 collator = ucol_open(locale.c_str(), &status);
104 if (U_FAILURE(status))
105 {
106 ucol_close(collator);
107 throw DB::Exception("Failed to open locale: " + locale + " with error: " + u_errorName(status), DB::ErrorCodes::UNSUPPORTED_COLLATION_LOCALE);
108 }
109#else
110 throw DB::Exception("Collations support is disabled, because ClickHouse was built without ICU library", DB::ErrorCodes::SUPPORT_IS_DISABLED);
111#endif
112}
113
114
115Collator::~Collator()
116{
117#if USE_ICU
118 ucol_close(collator);
119#endif
120}
121
122int Collator::compare(const char * str1, size_t length1, const char * str2, size_t length2) const
123{
124#if USE_ICU
125 UCharIterator iter1, iter2;
126 uiter_setUTF8(&iter1, str1, length1);
127 uiter_setUTF8(&iter2, str2, length2);
128
129 UErrorCode status = U_ZERO_ERROR;
130 UCollationResult compare_result = ucol_strcollIter(collator, &iter1, &iter2, &status);
131
132 if (U_FAILURE(status))
133 throw DB::Exception("ICU collation comparison failed with error code: " + std::string(u_errorName(status)),
134 DB::ErrorCodes::COLLATION_COMPARISON_FAILED);
135
136 /** Values of enum UCollationResult are equals to what exactly we need:
137 * UCOL_EQUAL = 0
138 * UCOL_GREATER = 1
139 * UCOL_LESS = -1
140 */
141 return compare_result;
142#else
143 (void)str1;
144 (void)length1;
145 (void)str2;
146 (void)length2;
147 return 0;
148#endif
149}
150
151const std::string & Collator::getLocale() const
152{
153 return locale;
154}
155