1#pragma once
2
3#include <algorithm>
4#include <vector>
5#include <stdint.h>
6#include <string.h>
7#include <Core/Types.h>
8#include <Poco/UTF8Encoding.h>
9#include <Poco/Unicode.h>
10#include <Common/StringSearcher.h>
11#include <Common/StringUtils/StringUtils.h>
12#include <common/StringRef.h>
13#include <common/unaligned.h>
14
15/** Search for a substring in a string by Volnitsky's algorithm
16 * http://volnitsky.com/project/str_search/
17 *
18 * `haystack` and `needle` can contain zero bytes.
19 *
20 * Algorithm:
21 * - if the `needle` is too small or too large, or too small `haystack`, use std::search or memchr;
22 * - when initializing, fill in an open-addressing linear probing hash table of the form
23 * hash from the bigram of needle -> the position of this bigram in needle + 1.
24 * (one is added only to distinguish zero offset from an empty cell)
25 * - the keys are not stored in the hash table, only the values are stored;
26 * - bigrams can be inserted several times if they occur in the needle several times;
27 * - when searching, take from haystack bigram, which should correspond to the last bigram of needle (comparing from the end);
28 * - look for it in the hash table, if found - get the offset from the hash table and compare the string bytewise;
29 * - if it did not match, we check the next cell of the hash table from the collision resolution chain;
30 * - if not found, skip to haystack almost the size of the needle bytes;
31 *
32 * MultiVolnitsky - search for multiple substrings in a string:
33 * - Add bigrams to hash table with string index. Then the usual Volnitsky search is used.
34 * - We are adding while searching, limiting the number of fallback searchers and the total number of added bigrams
35 */
36
37
38namespace DB
39{
40namespace VolnitskyTraits
41{
42 using Offset = UInt8; /// Offset in the needle. For the basic algorithm, the length of the needle must not be greater than 255.
43 using Id = UInt8; /// Index of the string (within the array of multiple needles), must not be greater than 255.
44 using Ngram = UInt16; /// n-gram (2 bytes).
45
46 /** Fits into the L2 cache (of common Intel CPUs).
47 * This number is extremely good for compilers as it is numeric_limits<Uint16>::max() and there are optimizations with movzwl and other instructions with 2 bytes
48 */
49 static constexpr size_t hash_size = 64 * 1024;
50
51 /// min haystack size to use main algorithm instead of fallback
52 static constexpr size_t min_haystack_size_for_algorithm = 20000;
53
54 static inline bool isFallbackNeedle(const size_t needle_size, size_t haystack_size_hint = 0)
55 {
56 return needle_size < 2 * sizeof(Ngram) || needle_size >= std::numeric_limits<Offset>::max()
57 || (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm);
58 }
59
60 static inline Ngram toNGram(const UInt8 * const pos) { return unalignedLoad<Ngram>(pos); }
61
62 template <typename Callback>
63 static inline void putNGramASCIICaseInsensitive(const UInt8 * const pos, const int offset, const Callback & putNGramBase)
64 {
65 struct Chars
66 {
67 UInt8 c0;
68 UInt8 c1;
69 };
70
71 union
72 {
73 Ngram n;
74 Chars chars;
75 };
76
77 n = toNGram(pos);
78
79 const auto c0_al = isAlphaASCII(chars.c0);
80 const auto c1_al = isAlphaASCII(chars.c1);
81
82 if (c0_al && c1_al)
83 {
84 /// 4 combinations: AB, aB, Ab, ab
85 putNGramBase(n, offset);
86 chars.c0 = alternateCaseIfAlphaASCII(chars.c0);
87 putNGramBase(n, offset);
88 chars.c1 = alternateCaseIfAlphaASCII(chars.c1);
89 putNGramBase(n, offset);
90 chars.c0 = alternateCaseIfAlphaASCII(chars.c0);
91 putNGramBase(n, offset);
92 }
93 else if (c0_al)
94 {
95 /// 2 combinations: A1, a1
96 putNGramBase(n, offset);
97 chars.c0 = alternateCaseIfAlphaASCII(chars.c0);
98 putNGramBase(n, offset);
99 }
100 else if (c1_al)
101 {
102 /// 2 combinations: 0B, 0b
103 putNGramBase(n, offset);
104 chars.c1 = alternateCaseIfAlphaASCII(chars.c1);
105 putNGramBase(n, offset);
106 }
107 else
108 /// 1 combination: 01
109 putNGramBase(n, offset);
110 }
111
112 template <bool CaseSensitive, bool ASCII, typename Callback>
113 static inline void putNGram(const UInt8 * const pos, const int offset, [[maybe_unused]] const UInt8 * const begin, const Callback & putNGramBase)
114 {
115 if constexpr (CaseSensitive)
116 {
117 putNGramBase(toNGram(pos), offset);
118 }
119 else
120 {
121 if constexpr (ASCII)
122 {
123 putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
124 }
125 else
126 {
127 struct Chars
128 {
129 UInt8 c0;
130 UInt8 c1;
131 };
132
133 union
134 {
135 VolnitskyTraits::Ngram n;
136 Chars chars;
137 };
138
139 n = toNGram(pos);
140
141 if (isascii(chars.c0) && isascii(chars.c1))
142 putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
143 else
144 {
145 /** n-gram (in the case of n = 2)
146 * can be entirely located within one code point,
147 * or intersect with two code points.
148 *
149 * In the first case, you need to consider up to two alternatives - this code point in upper and lower case,
150 * and in the second case - up to four alternatives - fragments of two code points in all combinations of cases.
151 *
152 * It does not take into account the dependence of the case-transformation from the locale (for example - Turkish `Ii`)
153 * as well as composition / decomposition and other features.
154 *
155 * It also does not work if characters with lower and upper cases are represented by different number of bytes or code points.
156 */
157
158 using Seq = UInt8[6];
159
160 static const Poco::UTF8Encoding utf8;
161
162 if (UTF8::isContinuationOctet(chars.c1))
163 {
164 /// ngram is inside a sequence
165 auto seq_pos = pos;
166 UTF8::syncBackward(seq_pos, begin);
167
168 const auto u32 = utf8.convert(seq_pos);
169 const auto l_u32 = Poco::Unicode::toLower(u32);
170 const auto u_u32 = Poco::Unicode::toUpper(u32);
171
172 /// symbol is case-independent
173 if (l_u32 == u_u32)
174 putNGramBase(n, offset);
175 else
176 {
177 /// where is the given ngram in respect to the start of UTF-8 sequence?
178 const auto seq_ngram_offset = pos - seq_pos;
179
180 Seq seq;
181
182 /// put ngram for lowercase
183 utf8.convert(l_u32, seq, sizeof(seq));
184 chars.c0 = seq[seq_ngram_offset];
185 chars.c1 = seq[seq_ngram_offset + 1];
186 putNGramBase(n, offset);
187
188 /// put ngram for uppercase
189 utf8.convert(u_u32, seq, sizeof(seq));
190 chars.c0 = seq[seq_ngram_offset]; //-V519
191 chars.c1 = seq[seq_ngram_offset + 1]; //-V519
192 putNGramBase(n, offset);
193 }
194 }
195 else
196 {
197 /// ngram is on the boundary of two sequences
198 /// first sequence may start before u_pos if it is not ASCII
199 auto first_seq_pos = pos;
200 UTF8::syncBackward(first_seq_pos, begin);
201 /// where is the given ngram in respect to the start of first UTF-8 sequence?
202 const auto seq_ngram_offset = pos - first_seq_pos;
203
204 const auto first_u32 = utf8.convert(first_seq_pos);
205 const auto first_l_u32 = Poco::Unicode::toLower(first_u32);
206 const auto first_u_u32 = Poco::Unicode::toUpper(first_u32);
207
208 /// second sequence always start immediately after u_pos
209 auto second_seq_pos = pos + 1;
210
211 const auto second_u32 = utf8.convert(second_seq_pos); /// TODO This assumes valid UTF-8 or zero byte after needle.
212 const auto second_l_u32 = Poco::Unicode::toLower(second_u32);
213 const auto second_u_u32 = Poco::Unicode::toUpper(second_u32);
214
215 /// both symbols are case-independent
216 if (first_l_u32 == first_u_u32 && second_l_u32 == second_u_u32)
217 {
218 putNGramBase(n, offset);
219 }
220 else if (first_l_u32 == first_u_u32)
221 {
222 /// first symbol is case-independent
223 Seq seq;
224
225 /// put ngram for lowercase
226 utf8.convert(second_l_u32, seq, sizeof(seq));
227 chars.c1 = seq[0];
228 putNGramBase(n, offset);
229
230 /// put ngram from uppercase, if it is different
231 utf8.convert(second_u_u32, seq, sizeof(seq));
232 if (chars.c1 != seq[0])
233 {
234 chars.c1 = seq[0];
235 putNGramBase(n, offset);
236 }
237 }
238 else if (second_l_u32 == second_u_u32)
239 {
240 /// second symbol is case-independent
241 Seq seq;
242
243 /// put ngram for lowercase
244 utf8.convert(first_l_u32, seq, sizeof(seq));
245 chars.c0 = seq[seq_ngram_offset];
246 putNGramBase(n, offset);
247
248 /// put ngram for uppercase, if it is different
249 utf8.convert(first_u_u32, seq, sizeof(seq));
250 if (chars.c0 != seq[seq_ngram_offset])
251 {
252 chars.c0 = seq[seq_ngram_offset];
253 putNGramBase(n, offset);
254 }
255 }
256 else
257 {
258 Seq first_l_seq;
259 Seq first_u_seq;
260 Seq second_l_seq;
261 Seq second_u_seq;
262
263 utf8.convert(first_l_u32, first_l_seq, sizeof(first_l_seq));
264 utf8.convert(first_u_u32, first_u_seq, sizeof(first_u_seq));
265 utf8.convert(second_l_u32, second_l_seq, sizeof(second_l_seq));
266 utf8.convert(second_u_u32, second_u_seq, sizeof(second_u_seq));
267
268 auto c0l = first_l_seq[seq_ngram_offset];
269 auto c0u = first_u_seq[seq_ngram_offset];
270 auto c1l = second_l_seq[0];
271 auto c1u = second_u_seq[0];
272
273 /// ngram for ll
274 chars.c0 = c0l;
275 chars.c1 = c1l;
276 putNGramBase(n, offset);
277
278 if (c0l != c0u)
279 {
280 /// ngram for Ul
281 chars.c0 = c0u;
282 chars.c1 = c1l;
283 putNGramBase(n, offset);
284 }
285
286 if (c1l != c1u)
287 {
288 /// ngram for lU
289 chars.c0 = c0l;
290 chars.c1 = c1u;
291 putNGramBase(n, offset);
292 }
293
294 if (c0l != c0u && c1l != c1u)
295 {
296 /// ngram for UU
297 chars.c0 = c0u;
298 chars.c1 = c1u;
299 putNGramBase(n, offset);
300 }
301 }
302 }
303 }
304 }
305 }
306 }
307}
308
309
310/// @todo store lowercase needle to speed up in case there are numerous occurrences of bigrams from needle in haystack
311template <bool CaseSensitive, bool ASCII, typename FallbackSearcher>
312class VolnitskyBase
313{
314protected:
315 const UInt8 * const needle;
316 const size_t needle_size;
317 const UInt8 * const needle_end = needle + needle_size;
318 /// For how long we move, if the n-gram from haystack is not found in the hash table.
319 const size_t step = needle_size - sizeof(VolnitskyTraits::Ngram) + 1;
320
321 /** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
322 * storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
323 VolnitskyTraits::Offset hash[VolnitskyTraits::hash_size]; /// Hash table.
324
325 const bool fallback; /// Do we need to use the fallback algorithm.
326
327 FallbackSearcher fallback_searcher;
328
329public:
330 using Searcher = FallbackSearcher;
331
332 /** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified).
333 * If you specify it small enough, the fallback algorithm will be used,
334 * since it is considered that it's useless to waste time initializing the hash table.
335 */
336 VolnitskyBase(const char * const needle_, const size_t needle_size_, size_t haystack_size_hint = 0)
337 : needle{reinterpret_cast<const UInt8 *>(needle_)}
338 , needle_size{needle_size_}
339 , fallback{VolnitskyTraits::isFallbackNeedle(needle_size, haystack_size_hint)}
340 , fallback_searcher{needle_, needle_size}
341 {
342 if (fallback)
343 return;
344
345 memset(hash, 0, sizeof(hash));
346
347 auto callback = [this](const VolnitskyTraits::Ngram ngram, const int offset) { return this->putNGramBase(ngram, offset); };
348 /// ssize_t is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
349 /// And also adding from the end guarantees that we will find first occurence because we will lookup bigger offsets first.
350 for (auto i = static_cast<ssize_t>(needle_size - sizeof(VolnitskyTraits::Ngram)); i >= 0; --i)
351 VolnitskyTraits::putNGram<CaseSensitive, ASCII>(this->needle + i, i + 1, this->needle, callback);
352 }
353
354
355 /// If not found, the end of the haystack is returned.
356 const UInt8 * search(const UInt8 * const haystack, const size_t haystack_size) const
357 {
358 if (needle_size == 0)
359 return haystack;
360
361 const auto haystack_end = haystack + haystack_size;
362
363 if (fallback || haystack_size <= needle_size)
364 return fallback_searcher.search(haystack, haystack_end);
365
366 /// Let's "apply" the needle to the haystack and compare the n-gram from the end of the needle.
367 const auto * pos = haystack + needle_size - sizeof(VolnitskyTraits::Ngram);
368 for (; pos <= haystack_end - needle_size; pos += step)
369 {
370 /// We look at all the cells of the hash table that can correspond to the n-gram from haystack.
371 for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num];
372 cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
373 {
374 /// When found - compare bytewise, using the offset from the hash table.
375 const auto res = pos - (hash[cell_num] - 1);
376
377 /// pointer in the code is always padded array so we can use pagesafe semantics
378 if (fallback_searcher.compare(haystack, haystack_end, res))
379 return res;
380 }
381 }
382
383 return fallback_searcher.search(pos - step + 1, haystack_end);
384 }
385
386 const char * search(const char * haystack, size_t haystack_size) const
387 {
388 return reinterpret_cast<const char *>(search(reinterpret_cast<const UInt8 *>(haystack), haystack_size));
389 }
390
391protected:
392 void putNGramBase(const VolnitskyTraits::Ngram ngram, const int offset)
393 {
394 /// Put the offset for the n-gram in the corresponding cell or the nearest free cell.
395 size_t cell_num = ngram % VolnitskyTraits::hash_size;
396
397 while (hash[cell_num])
398 cell_num = (cell_num + 1) % VolnitskyTraits::hash_size; /// Search for the next free cell.
399
400 hash[cell_num] = offset;
401 }
402};
403
404
405template <bool CaseSensitive, bool ASCII, typename FallbackSearcher>
406class MultiVolnitskyBase
407{
408private:
409 /// needles and their offsets
410 const std::vector<StringRef> & needles;
411
412
413 /// fallback searchers
414 std::vector<size_t> fallback_needles;
415 std::vector<FallbackSearcher> fallback_searchers;
416
417 /// because std::pair<> is not POD
418 struct OffsetId
419 {
420 VolnitskyTraits::Id id;
421 VolnitskyTraits::Offset off;
422 };
423
424 OffsetId hash[VolnitskyTraits::hash_size];
425
426 /// step for each bunch of strings
427 size_t step;
428
429 /// last index of offsets that was not processed
430 size_t last;
431
432 /// limit for adding to hashtable. In worst case with case insentive search, the table will be filled at most as half
433 static constexpr size_t small_limit = VolnitskyTraits::hash_size / 8;
434
435public:
436 MultiVolnitskyBase(const std::vector<StringRef> & needles_) : needles{needles_}, step{0}, last{0}
437 {
438 fallback_searchers.reserve(needles.size());
439 }
440
441 /**
442 * This function is needed to initialize hash table
443 * Returns `true` if there is nothing to initialize
444 * and `false` if we have something to initialize and initializes it.
445 * This function is a kind of fallback if there are many needles.
446 * We actually destroy the hash table and initialize it with uninitialized needles
447 * and search through the haystack again.
448 * The actual usage of this function is like this:
449 * while (hasMoreToSearch())
450 * {
451 * search inside the haystack with the known needles
452 * }
453 */
454 bool hasMoreToSearch()
455 {
456 if (last == needles.size())
457 return false;
458
459 memset(hash, 0, sizeof(hash));
460 fallback_needles.clear();
461 step = std::numeric_limits<size_t>::max();
462
463 size_t buf = 0;
464 size_t size = needles.size();
465
466 for (; last < size; ++last)
467 {
468 const char * cur_needle_data = needles[last].data;
469 const size_t cur_needle_size = needles[last].size;
470
471 /// save the indices of fallback searchers
472 if (VolnitskyTraits::isFallbackNeedle(cur_needle_size))
473 {
474 fallback_needles.push_back(last);
475 }
476 else
477 {
478 /// put all bigrams
479 auto callback = [this](const VolnitskyTraits::Ngram ngram, const int offset)
480 {
481 return this->putNGramBase(ngram, offset, this->last);
482 };
483
484 buf += cur_needle_size - sizeof(VolnitskyTraits::Ngram) + 1;
485
486 /// this is the condition when we actually need to stop and start searching with known needles
487 if (buf > small_limit)
488 break;
489
490 step = std::min(step, cur_needle_size - sizeof(VolnitskyTraits::Ngram) + 1);
491 for (auto i = static_cast<int>(cur_needle_size - sizeof(VolnitskyTraits::Ngram)); i >= 0; --i)
492 {
493 VolnitskyTraits::putNGram<CaseSensitive, ASCII>(
494 reinterpret_cast<const UInt8 *>(cur_needle_data) + i,
495 i + 1,
496 reinterpret_cast<const UInt8 *>(cur_needle_data),
497 callback);
498 }
499 }
500 fallback_searchers.emplace_back(cur_needle_data, cur_needle_size);
501 }
502 return true;
503 }
504
505 inline bool searchOne(const UInt8 * haystack, const UInt8 * haystack_end) const
506 {
507 const size_t fallback_size = fallback_needles.size();
508 for (size_t i = 0; i < fallback_size; ++i)
509 if (fallback_searchers[fallback_needles[i]].search(haystack, haystack_end) != haystack_end)
510 return true;
511
512 /// check if we have one non empty volnitsky searcher
513 if (step != std::numeric_limits<size_t>::max())
514 {
515 const auto * pos = haystack + step - sizeof(VolnitskyTraits::Ngram);
516 for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step)
517 {
518 for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num].off;
519 cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
520 {
521 if (pos >= haystack + hash[cell_num].off - 1)
522 {
523 const auto res = pos - (hash[cell_num].off - 1);
524 const size_t ind = hash[cell_num].id;
525 if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(haystack, haystack_end, res))
526 return true;
527 }
528 }
529 }
530 }
531 return false;
532 }
533
534 inline size_t searchOneFirstIndex(const UInt8 * haystack, const UInt8 * haystack_end) const
535 {
536 const size_t fallback_size = fallback_needles.size();
537
538 size_t ans = std::numeric_limits<size_t>::max();
539
540 for (size_t i = 0; i < fallback_size; ++i)
541 if (fallback_searchers[fallback_needles[i]].search(haystack, haystack_end) != haystack_end)
542 ans = std::min(ans, fallback_needles[i]);
543
544 /// check if we have one non empty volnitsky searcher
545 if (step != std::numeric_limits<size_t>::max())
546 {
547 const auto * pos = haystack + step - sizeof(VolnitskyTraits::Ngram);
548 for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step)
549 {
550 for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num].off;
551 cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
552 {
553 if (pos >= haystack + hash[cell_num].off - 1)
554 {
555 const auto res = pos - (hash[cell_num].off - 1);
556 const size_t ind = hash[cell_num].id;
557 if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(haystack, haystack_end, res))
558 ans = std::min(ans, ind);
559 }
560 }
561 }
562 }
563
564 /*
565 * if nothing was found, ans + 1 will be equal to zero and we can
566 * assign it into the result because we need to return the position starting with one
567 */
568 return ans + 1;
569 }
570
571 template <typename CountCharsCallback>
572 inline UInt64 searchOneFirstPosition(const UInt8 * haystack, const UInt8 * haystack_end, const CountCharsCallback & count_chars) const
573 {
574 const size_t fallback_size = fallback_needles.size();
575
576 UInt64 ans = std::numeric_limits<UInt64>::max();
577
578 for (size_t i = 0; i < fallback_size; ++i)
579 if (auto pos = fallback_searchers[fallback_needles[i]].search(haystack, haystack_end); pos != haystack_end)
580 ans = std::min<UInt64>(ans, pos - haystack);
581
582 /// check if we have one non empty volnitsky searcher
583 if (step != std::numeric_limits<size_t>::max())
584 {
585 const auto * pos = haystack + step - sizeof(VolnitskyTraits::Ngram);
586 for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step)
587 {
588 for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num].off;
589 cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
590 {
591 if (pos >= haystack + hash[cell_num].off - 1)
592 {
593 const auto res = pos - (hash[cell_num].off - 1);
594 const size_t ind = hash[cell_num].id;
595 if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(haystack, haystack_end, res))
596 ans = std::min<UInt64>(ans, res - haystack);
597 }
598 }
599 }
600 }
601 if (ans == std::numeric_limits<UInt64>::max())
602 return 0;
603 return count_chars(haystack, haystack + ans);
604 }
605
606 template <typename CountCharsCallback, typename AnsType>
607 inline void searchOneAll(const UInt8 * haystack, const UInt8 * haystack_end, AnsType * ans, const CountCharsCallback & count_chars) const
608 {
609 const size_t fallback_size = fallback_needles.size();
610 for (size_t i = 0; i < fallback_size; ++i)
611 {
612 const UInt8 * ptr = fallback_searchers[fallback_needles[i]].search(haystack, haystack_end);
613 if (ptr != haystack_end)
614 ans[fallback_needles[i]] = count_chars(haystack, ptr);
615 }
616
617 /// check if we have one non empty volnitsky searcher
618 if (step != std::numeric_limits<size_t>::max())
619 {
620 const auto * pos = haystack + step - sizeof(VolnitskyTraits::Ngram);
621 for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step)
622 {
623 for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num].off;
624 cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
625 {
626 if (pos >= haystack + hash[cell_num].off - 1)
627 {
628 const auto * res = pos - (hash[cell_num].off - 1);
629 const size_t ind = hash[cell_num].id;
630 if (ans[ind] == 0 && res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(haystack, haystack_end, res))
631 ans[ind] = count_chars(haystack, res);
632 }
633 }
634 }
635 }
636 }
637
638 void putNGramBase(const VolnitskyTraits::Ngram ngram, const int offset, const size_t num)
639 {
640 size_t cell_num = ngram % VolnitskyTraits::hash_size;
641
642 while (hash[cell_num].off)
643 cell_num = (cell_num + 1) % VolnitskyTraits::hash_size;
644
645 hash[cell_num] = {static_cast<VolnitskyTraits::Id>(num), static_cast<VolnitskyTraits::Offset>(offset)};
646 }
647};
648
649
650using Volnitsky = VolnitskyBase<true, true, ASCIICaseSensitiveStringSearcher>;
651using VolnitskyUTF8 = VolnitskyBase<true, false, ASCIICaseSensitiveStringSearcher>; /// exactly same as Volnitsky
652using VolnitskyCaseInsensitive = VolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>; /// ignores non-ASCII bytes
653using VolnitskyCaseInsensitiveUTF8 = VolnitskyBase<false, false, UTF8CaseInsensitiveStringSearcher>;
654
655using VolnitskyCaseSensitiveToken = VolnitskyBase<true, true, ASCIICaseSensitiveTokenSearcher>;
656using VolnitskyCaseInsensitiveToken = VolnitskyBase<false, true, ASCIICaseInsensitiveTokenSearcher>;
657
658using MultiVolnitsky = MultiVolnitskyBase<true, true, ASCIICaseSensitiveStringSearcher>;
659using MultiVolnitskyUTF8 = MultiVolnitskyBase<true, false, ASCIICaseSensitiveStringSearcher>;
660using MultiVolnitskyCaseInsensitive = MultiVolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>;
661using MultiVolnitskyCaseInsensitiveUTF8 = MultiVolnitskyBase<false, false, UTF8CaseInsensitiveStringSearcher>;
662
663
664}
665