1 | #include "FunctionsStringSearch.h" |
2 | |
3 | #include <algorithm> |
4 | #include <memory> |
5 | #include <string> |
6 | #include <vector> |
7 | #include <Columns/ColumnFixedString.h> |
8 | #include <DataTypes/DataTypeFixedString.h> |
9 | #include <Functions/FunctionFactory.h> |
10 | #include <Functions/Regexps.h> |
11 | #include <IO/WriteHelpers.h> |
12 | #include <Poco/UTF8String.h> |
13 | #include <Common/Volnitsky.h> |
14 | |
15 | namespace DB |
16 | { |
17 | /** Implementation details for functions of 'position' family depending on ASCII/UTF8 and case sensitiveness. |
18 | */ |
19 | struct PositionCaseSensitiveASCII |
20 | { |
21 | /// For searching single substring inside big-enough contiguous chunk of data. Coluld have slightly expensive initialization. |
22 | using SearcherInBigHaystack = Volnitsky; |
23 | |
24 | /// For search many substrings in one string |
25 | using MultiSearcherInBigHaystack = MultiVolnitsky; |
26 | |
27 | /// For searching single substring, that is different each time. This object is created for each row of data. It must have cheap initialization. |
28 | using SearcherInSmallHaystack = LibCASCIICaseSensitiveStringSearcher; |
29 | |
30 | static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t haystack_size_hint) |
31 | { |
32 | return SearcherInBigHaystack(needle_data, needle_size, haystack_size_hint); |
33 | } |
34 | |
35 | static SearcherInSmallHaystack createSearcherInSmallHaystack(const char * needle_data, size_t needle_size) |
36 | { |
37 | return SearcherInSmallHaystack(needle_data, needle_size); |
38 | } |
39 | |
40 | static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<StringRef> & needles) |
41 | { |
42 | return MultiSearcherInBigHaystack(needles); |
43 | } |
44 | |
45 | /// Number of code points between 'begin' and 'end' (this has different behaviour for ASCII and UTF-8). |
46 | static size_t countChars(const char * begin, const char * end) { return end - begin; } |
47 | |
48 | /// Convert string to lowercase. Only for case-insensitive search. |
49 | /// Implementation is permitted to be inefficient because it is called for single string. |
50 | static void toLowerIfNeed(std::string &) { } |
51 | }; |
52 | |
53 | struct PositionCaseInsensitiveASCII |
54 | { |
55 | /// `Volnitsky` is not used here, because one person has measured that this is better. It will be good if you question it. |
56 | using SearcherInBigHaystack = ASCIICaseInsensitiveStringSearcher; |
57 | using MultiSearcherInBigHaystack = MultiVolnitskyCaseInsensitive; |
58 | using SearcherInSmallHaystack = LibCASCIICaseInsensitiveStringSearcher; |
59 | |
60 | static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t /*haystack_size_hint*/) |
61 | { |
62 | return SearcherInBigHaystack(needle_data, needle_size); |
63 | } |
64 | |
65 | static SearcherInSmallHaystack createSearcherInSmallHaystack(const char * needle_data, size_t needle_size) |
66 | { |
67 | return SearcherInSmallHaystack(needle_data, needle_size); |
68 | } |
69 | |
70 | static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<StringRef> & needles) |
71 | { |
72 | return MultiSearcherInBigHaystack(needles); |
73 | } |
74 | |
75 | static size_t countChars(const char * begin, const char * end) { return end - begin; } |
76 | |
77 | static void toLowerIfNeed(std::string & s) { std::transform(std::begin(s), std::end(s), std::begin(s), tolower); } |
78 | }; |
79 | |
80 | struct PositionCaseSensitiveUTF8 |
81 | { |
82 | using SearcherInBigHaystack = VolnitskyUTF8; |
83 | using MultiSearcherInBigHaystack = MultiVolnitskyUTF8; |
84 | using SearcherInSmallHaystack = LibCASCIICaseSensitiveStringSearcher; |
85 | |
86 | static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t haystack_size_hint) |
87 | { |
88 | return SearcherInBigHaystack(needle_data, needle_size, haystack_size_hint); |
89 | } |
90 | |
91 | static SearcherInSmallHaystack createSearcherInSmallHaystack(const char * needle_data, size_t needle_size) |
92 | { |
93 | return SearcherInSmallHaystack(needle_data, needle_size); |
94 | } |
95 | |
96 | static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<StringRef> & needles) |
97 | { |
98 | return MultiSearcherInBigHaystack(needles); |
99 | } |
100 | |
101 | static size_t countChars(const char * begin, const char * end) |
102 | { |
103 | size_t res = 0; |
104 | for (auto it = begin; it != end; ++it) |
105 | if (!UTF8::isContinuationOctet(static_cast<UInt8>(*it))) |
106 | ++res; |
107 | return res; |
108 | } |
109 | |
110 | static void toLowerIfNeed(std::string &) { } |
111 | }; |
112 | |
113 | struct PositionCaseInsensitiveUTF8 |
114 | { |
115 | using SearcherInBigHaystack = VolnitskyCaseInsensitiveUTF8; |
116 | using MultiSearcherInBigHaystack = MultiVolnitskyCaseInsensitiveUTF8; |
117 | using SearcherInSmallHaystack = UTF8CaseInsensitiveStringSearcher; /// TODO Very suboptimal. |
118 | |
119 | static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t haystack_size_hint) |
120 | { |
121 | return SearcherInBigHaystack(needle_data, needle_size, haystack_size_hint); |
122 | } |
123 | |
124 | static SearcherInSmallHaystack createSearcherInSmallHaystack(const char * needle_data, size_t needle_size) |
125 | { |
126 | return SearcherInSmallHaystack(needle_data, needle_size); |
127 | } |
128 | |
129 | static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<StringRef> & needles) |
130 | { |
131 | return MultiSearcherInBigHaystack(needles); |
132 | } |
133 | |
134 | static size_t countChars(const char * begin, const char * end) |
135 | { |
136 | size_t res = 0; |
137 | for (auto it = begin; it != end; ++it) |
138 | if (!UTF8::isContinuationOctet(static_cast<UInt8>(*it))) |
139 | ++res; |
140 | return res; |
141 | } |
142 | |
143 | static void toLowerIfNeed(std::string & s) { Poco::UTF8::toLowerInPlace(s); } |
144 | }; |
145 | |
146 | template <typename Impl> |
147 | struct PositionImpl |
148 | { |
149 | using ResultType = UInt64; |
150 | |
151 | /// Find one substring in many strings. |
152 | static void vector_constant( |
153 | const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & needle, PaddedPODArray<UInt64> & res) |
154 | { |
155 | const UInt8 * begin = data.data(); |
156 | const UInt8 * pos = begin; |
157 | const UInt8 * end = pos + data.size(); |
158 | |
159 | /// Current index in the array of strings. |
160 | size_t i = 0; |
161 | |
162 | typename Impl::SearcherInBigHaystack searcher = Impl::createSearcherInBigHaystack(needle.data(), needle.size(), end - pos); |
163 | |
164 | /// We will search for the next occurrence in all strings at once. |
165 | while (pos < end && end != (pos = searcher.search(pos, end - pos))) |
166 | { |
167 | /// Determine which index it refers to. |
168 | while (begin + offsets[i] <= pos) |
169 | { |
170 | res[i] = 0; |
171 | ++i; |
172 | } |
173 | |
174 | /// We check that the entry does not pass through the boundaries of strings. |
175 | if (pos + needle.size() < begin + offsets[i]) |
176 | res[i] = 1 + Impl::countChars(reinterpret_cast<const char *>(begin + offsets[i - 1]), reinterpret_cast<const char *>(pos)); |
177 | else |
178 | res[i] = 0; |
179 | |
180 | pos = begin + offsets[i]; |
181 | ++i; |
182 | } |
183 | |
184 | if (i < res.size()) |
185 | memset(&res[i], 0, (res.size() - i) * sizeof(res[0])); |
186 | } |
187 | |
188 | /// Search for substring in string. |
189 | static void constant_constant(std::string data, std::string needle, UInt64 & res) |
190 | { |
191 | Impl::toLowerIfNeed(data); |
192 | Impl::toLowerIfNeed(needle); |
193 | |
194 | res = data.find(needle); |
195 | if (res == std::string::npos) |
196 | res = 0; |
197 | else |
198 | res = 1 + Impl::countChars(data.data(), data.data() + res); |
199 | } |
200 | |
201 | /// Search each time for a different single substring inside each time different string. |
202 | static void vector_vector( |
203 | const ColumnString::Chars & haystack_data, |
204 | const ColumnString::Offsets & haystack_offsets, |
205 | const ColumnString::Chars & needle_data, |
206 | const ColumnString::Offsets & needle_offsets, |
207 | PaddedPODArray<UInt64> & res) |
208 | { |
209 | ColumnString::Offset prev_haystack_offset = 0; |
210 | ColumnString::Offset prev_needle_offset = 0; |
211 | |
212 | size_t size = haystack_offsets.size(); |
213 | |
214 | for (size_t i = 0; i < size; ++i) |
215 | { |
216 | size_t needle_size = needle_offsets[i] - prev_needle_offset - 1; |
217 | size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1; |
218 | |
219 | if (0 == needle_size) |
220 | { |
221 | /// An empty string is always at the very beginning of `haystack`. |
222 | res[i] = 1; |
223 | } |
224 | else |
225 | { |
226 | /// It is assumed that the StringSearcher is not very difficult to initialize. |
227 | typename Impl::SearcherInSmallHaystack searcher = Impl::createSearcherInSmallHaystack( |
228 | reinterpret_cast<const char *>(&needle_data[prev_needle_offset]), |
229 | needle_offsets[i] - prev_needle_offset - 1); /// zero byte at the end |
230 | |
231 | /// searcher returns a pointer to the found substring or to the end of `haystack`. |
232 | size_t pos = searcher.search(&haystack_data[prev_haystack_offset], &haystack_data[haystack_offsets[i] - 1]) |
233 | - &haystack_data[prev_haystack_offset]; |
234 | |
235 | if (pos != haystack_size) |
236 | { |
237 | res[i] = 1 |
238 | + Impl::countChars( |
239 | reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset]), |
240 | reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset + pos])); |
241 | } |
242 | else |
243 | res[i] = 0; |
244 | } |
245 | |
246 | prev_haystack_offset = haystack_offsets[i]; |
247 | prev_needle_offset = needle_offsets[i]; |
248 | } |
249 | } |
250 | |
251 | /// Find many substrings in single string. |
252 | static void constant_vector( |
253 | const String & haystack, |
254 | const ColumnString::Chars & needle_data, |
255 | const ColumnString::Offsets & needle_offsets, |
256 | PaddedPODArray<UInt64> & res) |
257 | { |
258 | // NOTE You could use haystack indexing. But this is a rare case. |
259 | |
260 | ColumnString::Offset prev_needle_offset = 0; |
261 | |
262 | size_t size = needle_offsets.size(); |
263 | |
264 | for (size_t i = 0; i < size; ++i) |
265 | { |
266 | size_t needle_size = needle_offsets[i] - prev_needle_offset - 1; |
267 | |
268 | if (0 == needle_size) |
269 | { |
270 | res[i] = 1; |
271 | } |
272 | else |
273 | { |
274 | typename Impl::SearcherInSmallHaystack searcher = Impl::createSearcherInSmallHaystack( |
275 | reinterpret_cast<const char *>(&needle_data[prev_needle_offset]), needle_offsets[i] - prev_needle_offset - 1); |
276 | |
277 | size_t pos = searcher.search( |
278 | reinterpret_cast<const UInt8 *>(haystack.data()), |
279 | reinterpret_cast<const UInt8 *>(haystack.data()) + haystack.size()) |
280 | - reinterpret_cast<const UInt8 *>(haystack.data()); |
281 | |
282 | if (pos != haystack.size()) |
283 | { |
284 | res[i] = 1 + Impl::countChars(haystack.data(), haystack.data() + pos); |
285 | } |
286 | else |
287 | res[i] = 0; |
288 | } |
289 | |
290 | prev_needle_offset = needle_offsets[i]; |
291 | } |
292 | } |
293 | }; |
294 | |
295 | template <typename Impl> |
296 | struct MultiSearchAllPositionsImpl |
297 | { |
298 | using ResultType = UInt64; |
299 | |
300 | static void vector_constant( |
301 | const ColumnString::Chars & haystack_data, |
302 | const ColumnString::Offsets & haystack_offsets, |
303 | const std::vector<StringRef> & needles, |
304 | PaddedPODArray<UInt64> & res) |
305 | { |
306 | auto res_callback = [](const UInt8 * start, const UInt8 * end) -> UInt64 |
307 | { |
308 | return 1 + Impl::countChars(reinterpret_cast<const char *>(start), reinterpret_cast<const char *>(end)); |
309 | }; |
310 | |
311 | auto searcher = Impl::createMultiSearcherInBigHaystack(needles); |
312 | |
313 | const size_t haystack_string_size = haystack_offsets.size(); |
314 | const size_t needles_size = needles.size(); |
315 | |
316 | /// Something can be uninitialized after the search itself |
317 | std::fill(res.begin(), res.end(), 0); |
318 | |
319 | while (searcher.hasMoreToSearch()) |
320 | { |
321 | size_t prev_offset = 0; |
322 | for (size_t j = 0, from = 0; j < haystack_string_size; ++j, from += needles_size) |
323 | { |
324 | const auto * haystack = &haystack_data[prev_offset]; |
325 | const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1; |
326 | searcher.searchOneAll(haystack, haystack_end, res.data() + from, res_callback); |
327 | prev_offset = haystack_offsets[j]; |
328 | } |
329 | } |
330 | } |
331 | }; |
332 | |
333 | template <typename Impl> |
334 | struct MultiSearchImpl |
335 | { |
336 | using ResultType = UInt8; |
337 | static constexpr bool is_using_hyperscan = false; |
338 | /// Variable for understanding, if we used offsets for the output, most |
339 | /// likely to determine whether the function returns ColumnVector of ColumnArray. |
340 | static constexpr bool is_column_array = false; |
341 | static auto ReturnType() { return std::make_shared<DataTypeNumber<ResultType>>(); } |
342 | |
343 | static void vector_constant( |
344 | const ColumnString::Chars & haystack_data, |
345 | const ColumnString::Offsets & haystack_offsets, |
346 | const std::vector<StringRef> & needles, |
347 | PaddedPODArray<UInt8> & res, |
348 | [[maybe_unused]] PaddedPODArray<UInt64> & offsets) |
349 | { |
350 | auto searcher = Impl::createMultiSearcherInBigHaystack(needles); |
351 | const size_t haystack_string_size = haystack_offsets.size(); |
352 | res.resize(haystack_string_size); |
353 | size_t iteration = 0; |
354 | while (searcher.hasMoreToSearch()) |
355 | { |
356 | size_t prev_offset = 0; |
357 | for (size_t j = 0; j < haystack_string_size; ++j) |
358 | { |
359 | const auto * haystack = &haystack_data[prev_offset]; |
360 | const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1; |
361 | if (iteration == 0 || !res[j]) |
362 | res[j] = searcher.searchOne(haystack, haystack_end); |
363 | prev_offset = haystack_offsets[j]; |
364 | } |
365 | ++iteration; |
366 | } |
367 | } |
368 | }; |
369 | |
370 | template <typename Impl> |
371 | struct MultiSearchFirstPositionImpl |
372 | { |
373 | using ResultType = UInt64; |
374 | static constexpr bool is_using_hyperscan = false; |
375 | /// Variable for understanding, if we used offsets for the output, most |
376 | /// likely to determine whether the function returns ColumnVector of ColumnArray. |
377 | static constexpr bool is_column_array = false; |
378 | static auto ReturnType() { return std::make_shared<DataTypeNumber<ResultType>>(); } |
379 | |
380 | static void vector_constant( |
381 | const ColumnString::Chars & haystack_data, |
382 | const ColumnString::Offsets & haystack_offsets, |
383 | const std::vector<StringRef> & needles, |
384 | PaddedPODArray<UInt64> & res, |
385 | [[maybe_unused]] PaddedPODArray<UInt64> & offsets) |
386 | { |
387 | auto res_callback = [](const UInt8 * start, const UInt8 * end) -> UInt64 |
388 | { |
389 | return 1 + Impl::countChars(reinterpret_cast<const char *>(start), reinterpret_cast<const char *>(end)); |
390 | }; |
391 | auto searcher = Impl::createMultiSearcherInBigHaystack(needles); |
392 | const size_t haystack_string_size = haystack_offsets.size(); |
393 | res.resize(haystack_string_size); |
394 | size_t iteration = 0; |
395 | while (searcher.hasMoreToSearch()) |
396 | { |
397 | size_t prev_offset = 0; |
398 | for (size_t j = 0; j < haystack_string_size; ++j) |
399 | { |
400 | const auto * haystack = &haystack_data[prev_offset]; |
401 | const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1; |
402 | if (iteration == 0 || res[j] == 0) |
403 | res[j] = searcher.searchOneFirstPosition(haystack, haystack_end, res_callback); |
404 | else |
405 | { |
406 | UInt64 result = searcher.searchOneFirstPosition(haystack, haystack_end, res_callback); |
407 | if (result != 0) |
408 | res[j] = std::min(result, res[j]); |
409 | } |
410 | prev_offset = haystack_offsets[j]; |
411 | } |
412 | ++iteration; |
413 | } |
414 | } |
415 | }; |
416 | |
417 | template <typename Impl> |
418 | struct MultiSearchFirstIndexImpl |
419 | { |
420 | using ResultType = UInt64; |
421 | static constexpr bool is_using_hyperscan = false; |
422 | /// Variable for understanding, if we used offsets for the output, most |
423 | /// likely to determine whether the function returns ColumnVector of ColumnArray. |
424 | static constexpr bool is_column_array = false; |
425 | static auto ReturnType() { return std::make_shared<DataTypeNumber<ResultType>>(); } |
426 | |
427 | static void vector_constant( |
428 | const ColumnString::Chars & haystack_data, |
429 | const ColumnString::Offsets & haystack_offsets, |
430 | const std::vector<StringRef> & needles, |
431 | PaddedPODArray<UInt64> & res, |
432 | [[maybe_unused]] PaddedPODArray<UInt64> & offsets) |
433 | { |
434 | auto searcher = Impl::createMultiSearcherInBigHaystack(needles); |
435 | const size_t haystack_string_size = haystack_offsets.size(); |
436 | res.resize(haystack_string_size); |
437 | size_t iteration = 0; |
438 | while (searcher.hasMoreToSearch()) |
439 | { |
440 | size_t prev_offset = 0; |
441 | for (size_t j = 0; j < haystack_string_size; ++j) |
442 | { |
443 | const auto * haystack = &haystack_data[prev_offset]; |
444 | const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1; |
445 | /// hasMoreToSearch traverse needles in increasing order |
446 | if (iteration == 0 || res[j] == 0) |
447 | res[j] = searcher.searchOneFirstIndex(haystack, haystack_end); |
448 | prev_offset = haystack_offsets[j]; |
449 | } |
450 | ++iteration; |
451 | } |
452 | } |
453 | }; |
454 | |
455 | /** Token search the string, means that needle must be surrounded by some separator chars, like whitespace or puctuation. |
456 | */ |
457 | template <typename TokenSearcher, bool negate_result = false> |
458 | struct HasTokenImpl |
459 | { |
460 | using ResultType = UInt8; |
461 | |
462 | static void vector_constant( |
463 | const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & pattern, PaddedPODArray<UInt8> & res) |
464 | { |
465 | if (offsets.empty()) |
466 | return; |
467 | |
468 | const UInt8 * begin = data.data(); |
469 | const UInt8 * pos = begin; |
470 | const UInt8 * end = pos + data.size(); |
471 | |
472 | /// The current index in the array of strings. |
473 | size_t i = 0; |
474 | |
475 | TokenSearcher searcher(pattern.data(), pattern.size(), end - pos); |
476 | |
477 | /// We will search for the next occurrence in all rows at once. |
478 | while (pos < end && end != (pos = searcher.search(pos, end - pos))) |
479 | { |
480 | /// Let's determine which index it refers to. |
481 | while (begin + offsets[i] <= pos) |
482 | { |
483 | res[i] = negate_result; |
484 | ++i; |
485 | } |
486 | |
487 | /// We check that the entry does not pass through the boundaries of strings. |
488 | if (pos + pattern.size() < begin + offsets[i]) |
489 | res[i] = !negate_result; |
490 | else |
491 | res[i] = negate_result; |
492 | |
493 | pos = begin + offsets[i]; |
494 | ++i; |
495 | } |
496 | |
497 | /// Tail, in which there can be no substring. |
498 | if (i < res.size()) |
499 | memset(&res[i], negate_result, (res.size() - i) * sizeof(res[0])); |
500 | } |
501 | |
502 | static void constant_constant(const std::string & data, const std::string & pattern, UInt8 & res) |
503 | { |
504 | TokenSearcher searcher(pattern.data(), pattern.size(), data.size()); |
505 | const auto found = searcher.search(data.c_str(), data.size()) != data.end().base(); |
506 | res = negate_result ^ found; |
507 | } |
508 | |
509 | template <typename... Args> |
510 | static void vector_vector(Args &&...) |
511 | { |
512 | throw Exception("Function 'hasToken' does not support non-constant needle argument" , ErrorCodes::ILLEGAL_COLUMN); |
513 | } |
514 | |
515 | /// Search different needles in single haystack. |
516 | template <typename... Args> |
517 | static void constant_vector(Args &&...) |
518 | { |
519 | throw Exception("Function 'hasToken' does not support non-constant needle argument" , ErrorCodes::ILLEGAL_COLUMN); |
520 | } |
521 | }; |
522 | |
523 | |
524 | struct NamePosition |
525 | { |
526 | static constexpr auto name = "position" ; |
527 | }; |
528 | struct NamePositionUTF8 |
529 | { |
530 | static constexpr auto name = "positionUTF8" ; |
531 | }; |
532 | struct NamePositionCaseInsensitive |
533 | { |
534 | static constexpr auto name = "positionCaseInsensitive" ; |
535 | }; |
536 | struct NamePositionCaseInsensitiveUTF8 |
537 | { |
538 | static constexpr auto name = "positionCaseInsensitiveUTF8" ; |
539 | }; |
540 | struct NameMultiSearchAllPositions |
541 | { |
542 | static constexpr auto name = "multiSearchAllPositions" ; |
543 | }; |
544 | struct NameMultiSearchAllPositionsUTF8 |
545 | { |
546 | static constexpr auto name = "multiSearchAllPositionsUTF8" ; |
547 | }; |
548 | struct NameMultiSearchAllPositionsCaseInsensitive |
549 | { |
550 | static constexpr auto name = "multiSearchAllPositionsCaseInsensitive" ; |
551 | }; |
552 | struct NameMultiSearchAllPositionsCaseInsensitiveUTF8 |
553 | { |
554 | static constexpr auto name = "multiSearchAllPositionsCaseInsensitiveUTF8" ; |
555 | }; |
556 | struct NameMultiSearchAny |
557 | { |
558 | static constexpr auto name = "multiSearchAny" ; |
559 | }; |
560 | struct NameMultiSearchAnyUTF8 |
561 | { |
562 | static constexpr auto name = "multiSearchAnyUTF8" ; |
563 | }; |
564 | struct NameMultiSearchAnyCaseInsensitive |
565 | { |
566 | static constexpr auto name = "multiSearchAnyCaseInsensitive" ; |
567 | }; |
568 | struct NameMultiSearchAnyCaseInsensitiveUTF8 |
569 | { |
570 | static constexpr auto name = "multiSearchAnyCaseInsensitiveUTF8" ; |
571 | }; |
572 | struct NameMultiSearchFirstIndex |
573 | { |
574 | static constexpr auto name = "multiSearchFirstIndex" ; |
575 | }; |
576 | struct NameMultiSearchFirstIndexUTF8 |
577 | { |
578 | static constexpr auto name = "multiSearchFirstIndexUTF8" ; |
579 | }; |
580 | struct NameMultiSearchFirstIndexCaseInsensitive |
581 | { |
582 | static constexpr auto name = "multiSearchFirstIndexCaseInsensitive" ; |
583 | }; |
584 | struct NameMultiSearchFirstIndexCaseInsensitiveUTF8 |
585 | { |
586 | static constexpr auto name = "multiSearchFirstIndexCaseInsensitiveUTF8" ; |
587 | }; |
588 | struct NameMultiSearchFirstPosition |
589 | { |
590 | static constexpr auto name = "multiSearchFirstPosition" ; |
591 | }; |
592 | struct NameMultiSearchFirstPositionUTF8 |
593 | { |
594 | static constexpr auto name = "multiSearchFirstPositionUTF8" ; |
595 | }; |
596 | struct NameMultiSearchFirstPositionCaseInsensitive |
597 | { |
598 | static constexpr auto name = "multiSearchFirstPositionCaseInsensitive" ; |
599 | }; |
600 | struct NameMultiSearchFirstPositionCaseInsensitiveUTF8 |
601 | { |
602 | static constexpr auto name = "multiSearchFirstPositionCaseInsensitiveUTF8" ; |
603 | }; |
604 | |
605 | struct NameHasToken |
606 | { |
607 | static constexpr auto name = "hasToken" ; |
608 | }; |
609 | |
610 | struct NameHasTokenCaseInsensitive |
611 | { |
612 | static constexpr auto name = "hasTokenCaseInsensitive" ; |
613 | }; |
614 | |
615 | |
616 | using FunctionPosition = FunctionsStringSearch<PositionImpl<PositionCaseSensitiveASCII>, NamePosition>; |
617 | using FunctionPositionUTF8 = FunctionsStringSearch<PositionImpl<PositionCaseSensitiveUTF8>, NamePositionUTF8>; |
618 | using FunctionPositionCaseInsensitive = FunctionsStringSearch<PositionImpl<PositionCaseInsensitiveASCII>, NamePositionCaseInsensitive>; |
619 | using FunctionPositionCaseInsensitiveUTF8 |
620 | = FunctionsStringSearch<PositionImpl<PositionCaseInsensitiveUTF8>, NamePositionCaseInsensitiveUTF8>; |
621 | |
622 | using FunctionMultiSearchAllPositions |
623 | = FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseSensitiveASCII>, NameMultiSearchAllPositions>; |
624 | using FunctionMultiSearchAllPositionsUTF8 |
625 | = FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseSensitiveUTF8>, NameMultiSearchAllPositionsUTF8>; |
626 | using FunctionMultiSearchAllPositionsCaseInsensitive |
627 | = FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseInsensitiveASCII>, NameMultiSearchAllPositionsCaseInsensitive>; |
628 | using FunctionMultiSearchAllPositionsCaseInsensitiveUTF8 = FunctionsMultiStringPosition< |
629 | MultiSearchAllPositionsImpl<PositionCaseInsensitiveUTF8>, |
630 | NameMultiSearchAllPositionsCaseInsensitiveUTF8>; |
631 | |
632 | using FunctionMultiSearch = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseSensitiveASCII>, NameMultiSearchAny>; |
633 | using FunctionMultiSearchUTF8 = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseSensitiveUTF8>, NameMultiSearchAnyUTF8>; |
634 | using FunctionMultiSearchCaseInsensitive |
635 | = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveASCII>, NameMultiSearchAnyCaseInsensitive>; |
636 | using FunctionMultiSearchCaseInsensitiveUTF8 |
637 | = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchAnyCaseInsensitiveUTF8>; |
638 | |
639 | using FunctionMultiSearchFirstIndex |
640 | = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseSensitiveASCII>, NameMultiSearchFirstIndex>; |
641 | using FunctionMultiSearchFirstIndexUTF8 |
642 | = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseSensitiveUTF8>, NameMultiSearchFirstIndexUTF8>; |
643 | using FunctionMultiSearchFirstIndexCaseInsensitive |
644 | = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseInsensitiveASCII>, NameMultiSearchFirstIndexCaseInsensitive>; |
645 | using FunctionMultiSearchFirstIndexCaseInsensitiveUTF8 |
646 | = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchFirstIndexCaseInsensitiveUTF8>; |
647 | |
648 | using FunctionMultiSearchFirstPosition |
649 | = FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseSensitiveASCII>, NameMultiSearchFirstPosition>; |
650 | using FunctionMultiSearchFirstPositionUTF8 |
651 | = FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseSensitiveUTF8>, NameMultiSearchFirstPositionUTF8>; |
652 | using FunctionMultiSearchFirstPositionCaseInsensitive |
653 | = FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseInsensitiveASCII>, NameMultiSearchFirstPositionCaseInsensitive>; |
654 | using FunctionMultiSearchFirstPositionCaseInsensitiveUTF8 = FunctionsMultiStringSearch< |
655 | MultiSearchFirstPositionImpl<PositionCaseInsensitiveUTF8>, |
656 | NameMultiSearchFirstPositionCaseInsensitiveUTF8>; |
657 | |
658 | using FunctionHasToken = FunctionsStringSearch<HasTokenImpl<VolnitskyCaseSensitiveToken, false>, NameHasToken>; |
659 | using FunctionHasTokenCaseInsensitive |
660 | = FunctionsStringSearch<HasTokenImpl<VolnitskyCaseInsensitiveToken, false>, NameHasTokenCaseInsensitive>; |
661 | |
662 | void registerFunctionsStringSearch(FunctionFactory & factory) |
663 | { |
664 | factory.registerFunction<FunctionPosition>(FunctionFactory::CaseInsensitive); |
665 | factory.registerFunction<FunctionPositionUTF8>(); |
666 | factory.registerFunction<FunctionPositionCaseInsensitive>(); |
667 | factory.registerFunction<FunctionPositionCaseInsensitiveUTF8>(); |
668 | |
669 | factory.registerFunction<FunctionMultiSearchAllPositions>(); |
670 | factory.registerFunction<FunctionMultiSearchAllPositionsUTF8>(); |
671 | factory.registerFunction<FunctionMultiSearchAllPositionsCaseInsensitive>(); |
672 | factory.registerFunction<FunctionMultiSearchAllPositionsCaseInsensitiveUTF8>(); |
673 | |
674 | factory.registerFunction<FunctionMultiSearch>(); |
675 | factory.registerFunction<FunctionMultiSearchUTF8>(); |
676 | factory.registerFunction<FunctionMultiSearchCaseInsensitive>(); |
677 | factory.registerFunction<FunctionMultiSearchCaseInsensitiveUTF8>(); |
678 | |
679 | factory.registerFunction<FunctionMultiSearchFirstIndex>(); |
680 | factory.registerFunction<FunctionMultiSearchFirstIndexUTF8>(); |
681 | factory.registerFunction<FunctionMultiSearchFirstIndexCaseInsensitive>(); |
682 | factory.registerFunction<FunctionMultiSearchFirstIndexCaseInsensitiveUTF8>(); |
683 | |
684 | factory.registerFunction<FunctionMultiSearchFirstPosition>(); |
685 | factory.registerFunction<FunctionMultiSearchFirstPositionUTF8>(); |
686 | factory.registerFunction<FunctionMultiSearchFirstPositionCaseInsensitive>(); |
687 | factory.registerFunction<FunctionMultiSearchFirstPositionCaseInsensitiveUTF8>(); |
688 | |
689 | factory.registerFunction<FunctionHasToken>(); |
690 | factory.registerFunction<FunctionHasTokenCaseInsensitive>(); |
691 | |
692 | factory.registerAlias("locate" , NamePosition::name, FunctionFactory::CaseInsensitive); |
693 | } |
694 | } |
695 | |