1#include "FunctionsStringSearch.h"
2
3#include <algorithm>
4#include <memory>
5#include <string>
6#include <vector>
7#include <Columns/ColumnFixedString.h>
8#include <DataTypes/DataTypeFixedString.h>
9#include <Functions/FunctionFactory.h>
10#include <Functions/Regexps.h>
11#include <IO/WriteHelpers.h>
12#include <Poco/UTF8String.h>
13#include <Common/Volnitsky.h>
14
15namespace DB
16{
17/** Implementation details for functions of 'position' family depending on ASCII/UTF8 and case sensitiveness.
18 */
19struct PositionCaseSensitiveASCII
20{
21 /// For searching single substring inside big-enough contiguous chunk of data. Coluld have slightly expensive initialization.
22 using SearcherInBigHaystack = Volnitsky;
23
24 /// For search many substrings in one string
25 using MultiSearcherInBigHaystack = MultiVolnitsky;
26
27 /// For searching single substring, that is different each time. This object is created for each row of data. It must have cheap initialization.
28 using SearcherInSmallHaystack = LibCASCIICaseSensitiveStringSearcher;
29
30 static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t haystack_size_hint)
31 {
32 return SearcherInBigHaystack(needle_data, needle_size, haystack_size_hint);
33 }
34
35 static SearcherInSmallHaystack createSearcherInSmallHaystack(const char * needle_data, size_t needle_size)
36 {
37 return SearcherInSmallHaystack(needle_data, needle_size);
38 }
39
40 static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<StringRef> & needles)
41 {
42 return MultiSearcherInBigHaystack(needles);
43 }
44
45 /// Number of code points between 'begin' and 'end' (this has different behaviour for ASCII and UTF-8).
46 static size_t countChars(const char * begin, const char * end) { return end - begin; }
47
48 /// Convert string to lowercase. Only for case-insensitive search.
49 /// Implementation is permitted to be inefficient because it is called for single string.
50 static void toLowerIfNeed(std::string &) { }
51};
52
53struct PositionCaseInsensitiveASCII
54{
55 /// `Volnitsky` is not used here, because one person has measured that this is better. It will be good if you question it.
56 using SearcherInBigHaystack = ASCIICaseInsensitiveStringSearcher;
57 using MultiSearcherInBigHaystack = MultiVolnitskyCaseInsensitive;
58 using SearcherInSmallHaystack = LibCASCIICaseInsensitiveStringSearcher;
59
60 static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t /*haystack_size_hint*/)
61 {
62 return SearcherInBigHaystack(needle_data, needle_size);
63 }
64
65 static SearcherInSmallHaystack createSearcherInSmallHaystack(const char * needle_data, size_t needle_size)
66 {
67 return SearcherInSmallHaystack(needle_data, needle_size);
68 }
69
70 static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<StringRef> & needles)
71 {
72 return MultiSearcherInBigHaystack(needles);
73 }
74
75 static size_t countChars(const char * begin, const char * end) { return end - begin; }
76
77 static void toLowerIfNeed(std::string & s) { std::transform(std::begin(s), std::end(s), std::begin(s), tolower); }
78};
79
80struct PositionCaseSensitiveUTF8
81{
82 using SearcherInBigHaystack = VolnitskyUTF8;
83 using MultiSearcherInBigHaystack = MultiVolnitskyUTF8;
84 using SearcherInSmallHaystack = LibCASCIICaseSensitiveStringSearcher;
85
86 static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t haystack_size_hint)
87 {
88 return SearcherInBigHaystack(needle_data, needle_size, haystack_size_hint);
89 }
90
91 static SearcherInSmallHaystack createSearcherInSmallHaystack(const char * needle_data, size_t needle_size)
92 {
93 return SearcherInSmallHaystack(needle_data, needle_size);
94 }
95
96 static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<StringRef> & needles)
97 {
98 return MultiSearcherInBigHaystack(needles);
99 }
100
101 static size_t countChars(const char * begin, const char * end)
102 {
103 size_t res = 0;
104 for (auto it = begin; it != end; ++it)
105 if (!UTF8::isContinuationOctet(static_cast<UInt8>(*it)))
106 ++res;
107 return res;
108 }
109
110 static void toLowerIfNeed(std::string &) { }
111};
112
113struct PositionCaseInsensitiveUTF8
114{
115 using SearcherInBigHaystack = VolnitskyCaseInsensitiveUTF8;
116 using MultiSearcherInBigHaystack = MultiVolnitskyCaseInsensitiveUTF8;
117 using SearcherInSmallHaystack = UTF8CaseInsensitiveStringSearcher; /// TODO Very suboptimal.
118
119 static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t haystack_size_hint)
120 {
121 return SearcherInBigHaystack(needle_data, needle_size, haystack_size_hint);
122 }
123
124 static SearcherInSmallHaystack createSearcherInSmallHaystack(const char * needle_data, size_t needle_size)
125 {
126 return SearcherInSmallHaystack(needle_data, needle_size);
127 }
128
129 static MultiSearcherInBigHaystack createMultiSearcherInBigHaystack(const std::vector<StringRef> & needles)
130 {
131 return MultiSearcherInBigHaystack(needles);
132 }
133
134 static size_t countChars(const char * begin, const char * end)
135 {
136 size_t res = 0;
137 for (auto it = begin; it != end; ++it)
138 if (!UTF8::isContinuationOctet(static_cast<UInt8>(*it)))
139 ++res;
140 return res;
141 }
142
143 static void toLowerIfNeed(std::string & s) { Poco::UTF8::toLowerInPlace(s); }
144};
145
146template <typename Impl>
147struct PositionImpl
148{
149 using ResultType = UInt64;
150
151 /// Find one substring in many strings.
152 static void vector_constant(
153 const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & needle, PaddedPODArray<UInt64> & res)
154 {
155 const UInt8 * begin = data.data();
156 const UInt8 * pos = begin;
157 const UInt8 * end = pos + data.size();
158
159 /// Current index in the array of strings.
160 size_t i = 0;
161
162 typename Impl::SearcherInBigHaystack searcher = Impl::createSearcherInBigHaystack(needle.data(), needle.size(), end - pos);
163
164 /// We will search for the next occurrence in all strings at once.
165 while (pos < end && end != (pos = searcher.search(pos, end - pos)))
166 {
167 /// Determine which index it refers to.
168 while (begin + offsets[i] <= pos)
169 {
170 res[i] = 0;
171 ++i;
172 }
173
174 /// We check that the entry does not pass through the boundaries of strings.
175 if (pos + needle.size() < begin + offsets[i])
176 res[i] = 1 + Impl::countChars(reinterpret_cast<const char *>(begin + offsets[i - 1]), reinterpret_cast<const char *>(pos));
177 else
178 res[i] = 0;
179
180 pos = begin + offsets[i];
181 ++i;
182 }
183
184 if (i < res.size())
185 memset(&res[i], 0, (res.size() - i) * sizeof(res[0]));
186 }
187
188 /// Search for substring in string.
189 static void constant_constant(std::string data, std::string needle, UInt64 & res)
190 {
191 Impl::toLowerIfNeed(data);
192 Impl::toLowerIfNeed(needle);
193
194 res = data.find(needle);
195 if (res == std::string::npos)
196 res = 0;
197 else
198 res = 1 + Impl::countChars(data.data(), data.data() + res);
199 }
200
201 /// Search each time for a different single substring inside each time different string.
202 static void vector_vector(
203 const ColumnString::Chars & haystack_data,
204 const ColumnString::Offsets & haystack_offsets,
205 const ColumnString::Chars & needle_data,
206 const ColumnString::Offsets & needle_offsets,
207 PaddedPODArray<UInt64> & res)
208 {
209 ColumnString::Offset prev_haystack_offset = 0;
210 ColumnString::Offset prev_needle_offset = 0;
211
212 size_t size = haystack_offsets.size();
213
214 for (size_t i = 0; i < size; ++i)
215 {
216 size_t needle_size = needle_offsets[i] - prev_needle_offset - 1;
217 size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1;
218
219 if (0 == needle_size)
220 {
221 /// An empty string is always at the very beginning of `haystack`.
222 res[i] = 1;
223 }
224 else
225 {
226 /// It is assumed that the StringSearcher is not very difficult to initialize.
227 typename Impl::SearcherInSmallHaystack searcher = Impl::createSearcherInSmallHaystack(
228 reinterpret_cast<const char *>(&needle_data[prev_needle_offset]),
229 needle_offsets[i] - prev_needle_offset - 1); /// zero byte at the end
230
231 /// searcher returns a pointer to the found substring or to the end of `haystack`.
232 size_t pos = searcher.search(&haystack_data[prev_haystack_offset], &haystack_data[haystack_offsets[i] - 1])
233 - &haystack_data[prev_haystack_offset];
234
235 if (pos != haystack_size)
236 {
237 res[i] = 1
238 + Impl::countChars(
239 reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset]),
240 reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset + pos]));
241 }
242 else
243 res[i] = 0;
244 }
245
246 prev_haystack_offset = haystack_offsets[i];
247 prev_needle_offset = needle_offsets[i];
248 }
249 }
250
251 /// Find many substrings in single string.
252 static void constant_vector(
253 const String & haystack,
254 const ColumnString::Chars & needle_data,
255 const ColumnString::Offsets & needle_offsets,
256 PaddedPODArray<UInt64> & res)
257 {
258 // NOTE You could use haystack indexing. But this is a rare case.
259
260 ColumnString::Offset prev_needle_offset = 0;
261
262 size_t size = needle_offsets.size();
263
264 for (size_t i = 0; i < size; ++i)
265 {
266 size_t needle_size = needle_offsets[i] - prev_needle_offset - 1;
267
268 if (0 == needle_size)
269 {
270 res[i] = 1;
271 }
272 else
273 {
274 typename Impl::SearcherInSmallHaystack searcher = Impl::createSearcherInSmallHaystack(
275 reinterpret_cast<const char *>(&needle_data[prev_needle_offset]), needle_offsets[i] - prev_needle_offset - 1);
276
277 size_t pos = searcher.search(
278 reinterpret_cast<const UInt8 *>(haystack.data()),
279 reinterpret_cast<const UInt8 *>(haystack.data()) + haystack.size())
280 - reinterpret_cast<const UInt8 *>(haystack.data());
281
282 if (pos != haystack.size())
283 {
284 res[i] = 1 + Impl::countChars(haystack.data(), haystack.data() + pos);
285 }
286 else
287 res[i] = 0;
288 }
289
290 prev_needle_offset = needle_offsets[i];
291 }
292 }
293};
294
295template <typename Impl>
296struct MultiSearchAllPositionsImpl
297{
298 using ResultType = UInt64;
299
300 static void vector_constant(
301 const ColumnString::Chars & haystack_data,
302 const ColumnString::Offsets & haystack_offsets,
303 const std::vector<StringRef> & needles,
304 PaddedPODArray<UInt64> & res)
305 {
306 auto res_callback = [](const UInt8 * start, const UInt8 * end) -> UInt64
307 {
308 return 1 + Impl::countChars(reinterpret_cast<const char *>(start), reinterpret_cast<const char *>(end));
309 };
310
311 auto searcher = Impl::createMultiSearcherInBigHaystack(needles);
312
313 const size_t haystack_string_size = haystack_offsets.size();
314 const size_t needles_size = needles.size();
315
316 /// Something can be uninitialized after the search itself
317 std::fill(res.begin(), res.end(), 0);
318
319 while (searcher.hasMoreToSearch())
320 {
321 size_t prev_offset = 0;
322 for (size_t j = 0, from = 0; j < haystack_string_size; ++j, from += needles_size)
323 {
324 const auto * haystack = &haystack_data[prev_offset];
325 const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1;
326 searcher.searchOneAll(haystack, haystack_end, res.data() + from, res_callback);
327 prev_offset = haystack_offsets[j];
328 }
329 }
330 }
331};
332
333template <typename Impl>
334struct MultiSearchImpl
335{
336 using ResultType = UInt8;
337 static constexpr bool is_using_hyperscan = false;
338 /// Variable for understanding, if we used offsets for the output, most
339 /// likely to determine whether the function returns ColumnVector of ColumnArray.
340 static constexpr bool is_column_array = false;
341 static auto ReturnType() { return std::make_shared<DataTypeNumber<ResultType>>(); }
342
343 static void vector_constant(
344 const ColumnString::Chars & haystack_data,
345 const ColumnString::Offsets & haystack_offsets,
346 const std::vector<StringRef> & needles,
347 PaddedPODArray<UInt8> & res,
348 [[maybe_unused]] PaddedPODArray<UInt64> & offsets)
349 {
350 auto searcher = Impl::createMultiSearcherInBigHaystack(needles);
351 const size_t haystack_string_size = haystack_offsets.size();
352 res.resize(haystack_string_size);
353 size_t iteration = 0;
354 while (searcher.hasMoreToSearch())
355 {
356 size_t prev_offset = 0;
357 for (size_t j = 0; j < haystack_string_size; ++j)
358 {
359 const auto * haystack = &haystack_data[prev_offset];
360 const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1;
361 if (iteration == 0 || !res[j])
362 res[j] = searcher.searchOne(haystack, haystack_end);
363 prev_offset = haystack_offsets[j];
364 }
365 ++iteration;
366 }
367 }
368};
369
370template <typename Impl>
371struct MultiSearchFirstPositionImpl
372{
373 using ResultType = UInt64;
374 static constexpr bool is_using_hyperscan = false;
375 /// Variable for understanding, if we used offsets for the output, most
376 /// likely to determine whether the function returns ColumnVector of ColumnArray.
377 static constexpr bool is_column_array = false;
378 static auto ReturnType() { return std::make_shared<DataTypeNumber<ResultType>>(); }
379
380 static void vector_constant(
381 const ColumnString::Chars & haystack_data,
382 const ColumnString::Offsets & haystack_offsets,
383 const std::vector<StringRef> & needles,
384 PaddedPODArray<UInt64> & res,
385 [[maybe_unused]] PaddedPODArray<UInt64> & offsets)
386 {
387 auto res_callback = [](const UInt8 * start, const UInt8 * end) -> UInt64
388 {
389 return 1 + Impl::countChars(reinterpret_cast<const char *>(start), reinterpret_cast<const char *>(end));
390 };
391 auto searcher = Impl::createMultiSearcherInBigHaystack(needles);
392 const size_t haystack_string_size = haystack_offsets.size();
393 res.resize(haystack_string_size);
394 size_t iteration = 0;
395 while (searcher.hasMoreToSearch())
396 {
397 size_t prev_offset = 0;
398 for (size_t j = 0; j < haystack_string_size; ++j)
399 {
400 const auto * haystack = &haystack_data[prev_offset];
401 const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1;
402 if (iteration == 0 || res[j] == 0)
403 res[j] = searcher.searchOneFirstPosition(haystack, haystack_end, res_callback);
404 else
405 {
406 UInt64 result = searcher.searchOneFirstPosition(haystack, haystack_end, res_callback);
407 if (result != 0)
408 res[j] = std::min(result, res[j]);
409 }
410 prev_offset = haystack_offsets[j];
411 }
412 ++iteration;
413 }
414 }
415};
416
417template <typename Impl>
418struct MultiSearchFirstIndexImpl
419{
420 using ResultType = UInt64;
421 static constexpr bool is_using_hyperscan = false;
422 /// Variable for understanding, if we used offsets for the output, most
423 /// likely to determine whether the function returns ColumnVector of ColumnArray.
424 static constexpr bool is_column_array = false;
425 static auto ReturnType() { return std::make_shared<DataTypeNumber<ResultType>>(); }
426
427 static void vector_constant(
428 const ColumnString::Chars & haystack_data,
429 const ColumnString::Offsets & haystack_offsets,
430 const std::vector<StringRef> & needles,
431 PaddedPODArray<UInt64> & res,
432 [[maybe_unused]] PaddedPODArray<UInt64> & offsets)
433 {
434 auto searcher = Impl::createMultiSearcherInBigHaystack(needles);
435 const size_t haystack_string_size = haystack_offsets.size();
436 res.resize(haystack_string_size);
437 size_t iteration = 0;
438 while (searcher.hasMoreToSearch())
439 {
440 size_t prev_offset = 0;
441 for (size_t j = 0; j < haystack_string_size; ++j)
442 {
443 const auto * haystack = &haystack_data[prev_offset];
444 const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1;
445 /// hasMoreToSearch traverse needles in increasing order
446 if (iteration == 0 || res[j] == 0)
447 res[j] = searcher.searchOneFirstIndex(haystack, haystack_end);
448 prev_offset = haystack_offsets[j];
449 }
450 ++iteration;
451 }
452 }
453};
454
455/** Token search the string, means that needle must be surrounded by some separator chars, like whitespace or puctuation.
456 */
457template <typename TokenSearcher, bool negate_result = false>
458struct HasTokenImpl
459{
460 using ResultType = UInt8;
461
462 static void vector_constant(
463 const ColumnString::Chars & data, const ColumnString::Offsets & offsets, const std::string & pattern, PaddedPODArray<UInt8> & res)
464 {
465 if (offsets.empty())
466 return;
467
468 const UInt8 * begin = data.data();
469 const UInt8 * pos = begin;
470 const UInt8 * end = pos + data.size();
471
472 /// The current index in the array of strings.
473 size_t i = 0;
474
475 TokenSearcher searcher(pattern.data(), pattern.size(), end - pos);
476
477 /// We will search for the next occurrence in all rows at once.
478 while (pos < end && end != (pos = searcher.search(pos, end - pos)))
479 {
480 /// Let's determine which index it refers to.
481 while (begin + offsets[i] <= pos)
482 {
483 res[i] = negate_result;
484 ++i;
485 }
486
487 /// We check that the entry does not pass through the boundaries of strings.
488 if (pos + pattern.size() < begin + offsets[i])
489 res[i] = !negate_result;
490 else
491 res[i] = negate_result;
492
493 pos = begin + offsets[i];
494 ++i;
495 }
496
497 /// Tail, in which there can be no substring.
498 if (i < res.size())
499 memset(&res[i], negate_result, (res.size() - i) * sizeof(res[0]));
500 }
501
502 static void constant_constant(const std::string & data, const std::string & pattern, UInt8 & res)
503 {
504 TokenSearcher searcher(pattern.data(), pattern.size(), data.size());
505 const auto found = searcher.search(data.c_str(), data.size()) != data.end().base();
506 res = negate_result ^ found;
507 }
508
509 template <typename... Args>
510 static void vector_vector(Args &&...)
511 {
512 throw Exception("Function 'hasToken' does not support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN);
513 }
514
515 /// Search different needles in single haystack.
516 template <typename... Args>
517 static void constant_vector(Args &&...)
518 {
519 throw Exception("Function 'hasToken' does not support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN);
520 }
521};
522
523
524struct NamePosition
525{
526 static constexpr auto name = "position";
527};
528struct NamePositionUTF8
529{
530 static constexpr auto name = "positionUTF8";
531};
532struct NamePositionCaseInsensitive
533{
534 static constexpr auto name = "positionCaseInsensitive";
535};
536struct NamePositionCaseInsensitiveUTF8
537{
538 static constexpr auto name = "positionCaseInsensitiveUTF8";
539};
540struct NameMultiSearchAllPositions
541{
542 static constexpr auto name = "multiSearchAllPositions";
543};
544struct NameMultiSearchAllPositionsUTF8
545{
546 static constexpr auto name = "multiSearchAllPositionsUTF8";
547};
548struct NameMultiSearchAllPositionsCaseInsensitive
549{
550 static constexpr auto name = "multiSearchAllPositionsCaseInsensitive";
551};
552struct NameMultiSearchAllPositionsCaseInsensitiveUTF8
553{
554 static constexpr auto name = "multiSearchAllPositionsCaseInsensitiveUTF8";
555};
556struct NameMultiSearchAny
557{
558 static constexpr auto name = "multiSearchAny";
559};
560struct NameMultiSearchAnyUTF8
561{
562 static constexpr auto name = "multiSearchAnyUTF8";
563};
564struct NameMultiSearchAnyCaseInsensitive
565{
566 static constexpr auto name = "multiSearchAnyCaseInsensitive";
567};
568struct NameMultiSearchAnyCaseInsensitiveUTF8
569{
570 static constexpr auto name = "multiSearchAnyCaseInsensitiveUTF8";
571};
572struct NameMultiSearchFirstIndex
573{
574 static constexpr auto name = "multiSearchFirstIndex";
575};
576struct NameMultiSearchFirstIndexUTF8
577{
578 static constexpr auto name = "multiSearchFirstIndexUTF8";
579};
580struct NameMultiSearchFirstIndexCaseInsensitive
581{
582 static constexpr auto name = "multiSearchFirstIndexCaseInsensitive";
583};
584struct NameMultiSearchFirstIndexCaseInsensitiveUTF8
585{
586 static constexpr auto name = "multiSearchFirstIndexCaseInsensitiveUTF8";
587};
588struct NameMultiSearchFirstPosition
589{
590 static constexpr auto name = "multiSearchFirstPosition";
591};
592struct NameMultiSearchFirstPositionUTF8
593{
594 static constexpr auto name = "multiSearchFirstPositionUTF8";
595};
596struct NameMultiSearchFirstPositionCaseInsensitive
597{
598 static constexpr auto name = "multiSearchFirstPositionCaseInsensitive";
599};
600struct NameMultiSearchFirstPositionCaseInsensitiveUTF8
601{
602 static constexpr auto name = "multiSearchFirstPositionCaseInsensitiveUTF8";
603};
604
605struct NameHasToken
606{
607 static constexpr auto name = "hasToken";
608};
609
610struct NameHasTokenCaseInsensitive
611{
612 static constexpr auto name = "hasTokenCaseInsensitive";
613};
614
615
616using FunctionPosition = FunctionsStringSearch<PositionImpl<PositionCaseSensitiveASCII>, NamePosition>;
617using FunctionPositionUTF8 = FunctionsStringSearch<PositionImpl<PositionCaseSensitiveUTF8>, NamePositionUTF8>;
618using FunctionPositionCaseInsensitive = FunctionsStringSearch<PositionImpl<PositionCaseInsensitiveASCII>, NamePositionCaseInsensitive>;
619using FunctionPositionCaseInsensitiveUTF8
620 = FunctionsStringSearch<PositionImpl<PositionCaseInsensitiveUTF8>, NamePositionCaseInsensitiveUTF8>;
621
622using FunctionMultiSearchAllPositions
623 = FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseSensitiveASCII>, NameMultiSearchAllPositions>;
624using FunctionMultiSearchAllPositionsUTF8
625 = FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseSensitiveUTF8>, NameMultiSearchAllPositionsUTF8>;
626using FunctionMultiSearchAllPositionsCaseInsensitive
627 = FunctionsMultiStringPosition<MultiSearchAllPositionsImpl<PositionCaseInsensitiveASCII>, NameMultiSearchAllPositionsCaseInsensitive>;
628using FunctionMultiSearchAllPositionsCaseInsensitiveUTF8 = FunctionsMultiStringPosition<
629 MultiSearchAllPositionsImpl<PositionCaseInsensitiveUTF8>,
630 NameMultiSearchAllPositionsCaseInsensitiveUTF8>;
631
632using FunctionMultiSearch = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseSensitiveASCII>, NameMultiSearchAny>;
633using FunctionMultiSearchUTF8 = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseSensitiveUTF8>, NameMultiSearchAnyUTF8>;
634using FunctionMultiSearchCaseInsensitive
635 = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveASCII>, NameMultiSearchAnyCaseInsensitive>;
636using FunctionMultiSearchCaseInsensitiveUTF8
637 = FunctionsMultiStringSearch<MultiSearchImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchAnyCaseInsensitiveUTF8>;
638
639using FunctionMultiSearchFirstIndex
640 = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseSensitiveASCII>, NameMultiSearchFirstIndex>;
641using FunctionMultiSearchFirstIndexUTF8
642 = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseSensitiveUTF8>, NameMultiSearchFirstIndexUTF8>;
643using FunctionMultiSearchFirstIndexCaseInsensitive
644 = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseInsensitiveASCII>, NameMultiSearchFirstIndexCaseInsensitive>;
645using FunctionMultiSearchFirstIndexCaseInsensitiveUTF8
646 = FunctionsMultiStringSearch<MultiSearchFirstIndexImpl<PositionCaseInsensitiveUTF8>, NameMultiSearchFirstIndexCaseInsensitiveUTF8>;
647
648using FunctionMultiSearchFirstPosition
649 = FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseSensitiveASCII>, NameMultiSearchFirstPosition>;
650using FunctionMultiSearchFirstPositionUTF8
651 = FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseSensitiveUTF8>, NameMultiSearchFirstPositionUTF8>;
652using FunctionMultiSearchFirstPositionCaseInsensitive
653 = FunctionsMultiStringSearch<MultiSearchFirstPositionImpl<PositionCaseInsensitiveASCII>, NameMultiSearchFirstPositionCaseInsensitive>;
654using FunctionMultiSearchFirstPositionCaseInsensitiveUTF8 = FunctionsMultiStringSearch<
655 MultiSearchFirstPositionImpl<PositionCaseInsensitiveUTF8>,
656 NameMultiSearchFirstPositionCaseInsensitiveUTF8>;
657
658using FunctionHasToken = FunctionsStringSearch<HasTokenImpl<VolnitskyCaseSensitiveToken, false>, NameHasToken>;
659using FunctionHasTokenCaseInsensitive
660 = FunctionsStringSearch<HasTokenImpl<VolnitskyCaseInsensitiveToken, false>, NameHasTokenCaseInsensitive>;
661
662void registerFunctionsStringSearch(FunctionFactory & factory)
663{
664 factory.registerFunction<FunctionPosition>(FunctionFactory::CaseInsensitive);
665 factory.registerFunction<FunctionPositionUTF8>();
666 factory.registerFunction<FunctionPositionCaseInsensitive>();
667 factory.registerFunction<FunctionPositionCaseInsensitiveUTF8>();
668
669 factory.registerFunction<FunctionMultiSearchAllPositions>();
670 factory.registerFunction<FunctionMultiSearchAllPositionsUTF8>();
671 factory.registerFunction<FunctionMultiSearchAllPositionsCaseInsensitive>();
672 factory.registerFunction<FunctionMultiSearchAllPositionsCaseInsensitiveUTF8>();
673
674 factory.registerFunction<FunctionMultiSearch>();
675 factory.registerFunction<FunctionMultiSearchUTF8>();
676 factory.registerFunction<FunctionMultiSearchCaseInsensitive>();
677 factory.registerFunction<FunctionMultiSearchCaseInsensitiveUTF8>();
678
679 factory.registerFunction<FunctionMultiSearchFirstIndex>();
680 factory.registerFunction<FunctionMultiSearchFirstIndexUTF8>();
681 factory.registerFunction<FunctionMultiSearchFirstIndexCaseInsensitive>();
682 factory.registerFunction<FunctionMultiSearchFirstIndexCaseInsensitiveUTF8>();
683
684 factory.registerFunction<FunctionMultiSearchFirstPosition>();
685 factory.registerFunction<FunctionMultiSearchFirstPositionUTF8>();
686 factory.registerFunction<FunctionMultiSearchFirstPositionCaseInsensitive>();
687 factory.registerFunction<FunctionMultiSearchFirstPositionCaseInsensitiveUTF8>();
688
689 factory.registerFunction<FunctionHasToken>();
690 factory.registerFunction<FunctionHasTokenCaseInsensitive>();
691
692 factory.registerAlias("locate", NamePosition::name, FunctionFactory::CaseInsensitive);
693}
694}
695