1/*****************************************************************************
2
3Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2017, 2018, MariaDB Corporation.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free Software
8Foundation; version 2 of the License.
9
10This program is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License along with
15this program; if not, write to the Free Software Foundation, Inc.,
1651 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17
18*****************************************************************************/
19
20/******************************************************************//**
21@file include/fts0types.ic
22Full text search types.
23
24Created 2007-03-27 Sunny Bains
25*******************************************************/
26
27#ifndef INNOBASE_FTS0TYPES_IC
28#define INNOBASE_FTS0TYPES_IC
29
30#include "rem0cmp.h"
31#include "ha_prototypes.h"
32
33/******************************************************************//**
34Duplicate a string.
35@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
36UNIV_INLINE
37void
38fts_string_dup(
39/*===========*/
40 fts_string_t* dst, /*!< in: dup to here */
41 const fts_string_t* src, /*!< in: src string */
42 mem_heap_t* heap) /*!< in: heap to use */
43{
44 dst->f_str = (byte*)mem_heap_alloc(heap, src->f_len + 1);
45 memcpy(dst->f_str, src->f_str, src->f_len);
46
47 dst->f_len = src->f_len;
48 dst->f_str[src->f_len] = 0;
49 dst->f_n_char = src->f_n_char;
50}
51
52/******************************************************************//**
53Compare two fts_trx_row_t doc_ids.
54@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
55UNIV_INLINE
56int
57fts_trx_row_doc_id_cmp(
58/*===================*/
59 const void* p1, /*!< in: id1 */
60 const void* p2) /*!< in: id2 */
61{
62 const fts_trx_row_t* tr1 = (const fts_trx_row_t*) p1;
63 const fts_trx_row_t* tr2 = (const fts_trx_row_t*) p2;
64
65 return((int)(tr1->doc_id - tr2->doc_id));
66}
67
68/******************************************************************//**
69Compare two fts_ranking_t doc_ids.
70@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
71UNIV_INLINE
72int
73fts_ranking_doc_id_cmp(
74/*===================*/
75 const void* p1, /*!< in: id1 */
76 const void* p2) /*!< in: id2 */
77{
78 const fts_ranking_t* rk1 = (const fts_ranking_t*) p1;
79 const fts_ranking_t* rk2 = (const fts_ranking_t*) p2;
80
81 return((int)(rk1->doc_id - rk2->doc_id));
82}
83
84/******************************************************************//**
85Compare two fts_update_t doc_ids.
86@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
87UNIV_INLINE
88int
89fts_update_doc_id_cmp(
90/*==================*/
91 const void* p1, /*!< in: id1 */
92 const void* p2) /*!< in: id2 */
93{
94 const fts_update_t* up1 = (const fts_update_t*) p1;
95 const fts_update_t* up2 = (const fts_update_t*) p2;
96
97 return((int)(up1->doc_id - up2->doc_id));
98}
99
100/******************************************************************//**
101Get the first character's code position for FTS index partition */
102extern
103ulint
104innobase_strnxfrm(
105/*==============*/
106 const CHARSET_INFO* cs, /*!< in: Character set */
107 const uchar* p2, /*!< in: string */
108 const ulint len2); /*!< in: string length */
109
110/** Check if fts index charset is cjk
111@param[in] cs charset
112@retval true if the charset is cjk
113@retval false if not. */
114UNIV_INLINE
115bool
116fts_is_charset_cjk(
117 const CHARSET_INFO* cs)
118{
119 return cs == &my_charset_gb2312_chinese_ci
120 || cs == &my_charset_gbk_chinese_ci
121 || cs == &my_charset_big5_chinese_ci
122 || cs == &my_charset_ujis_japanese_ci
123 || cs == &my_charset_sjis_japanese_ci
124 || cs == &my_charset_cp932_japanese_ci
125 || cs == &my_charset_eucjpms_japanese_ci
126 || cs == &my_charset_euckr_korean_ci;
127}
128
129/** Select the FTS auxiliary index for the given character by range.
130@param[in] cs charset
131@param[in] str string
132@param[in] len string length
133@retval the index to use for the string */
134UNIV_INLINE
135ulint
136fts_select_index_by_range(
137 const CHARSET_INFO* cs,
138 const byte* str,
139 ulint len)
140{
141 ulint selected = 0;
142 ulint value = innobase_strnxfrm(cs, str, len);
143
144 while (fts_index_selector[selected].value != 0) {
145
146 if (fts_index_selector[selected].value == value) {
147
148 return(selected);
149
150 } else if (fts_index_selector[selected].value > value) {
151
152 return(selected > 0 ? selected - 1 : 0);
153 }
154
155 ++selected;
156 }
157
158 ut_ad(selected > 1);
159
160 return(selected - 1);
161}
162
163/** Select the FTS auxiliary index for the given character by hash.
164@param[in] cs charset
165@param[in] str string
166@param[in] len string length
167@retval the index to use for the string */
168UNIV_INLINE
169ulint
170fts_select_index_by_hash(
171 const CHARSET_INFO* cs,
172 const byte* str,
173 ulint len)
174{
175 ulong nr1 = 1;
176 ulong nr2 = 4;
177
178 ut_ad(!(str == NULL && len > 0));
179
180 if (str == NULL || len == 0) {
181 return 0;
182 }
183
184 /* Get the first char */
185 /* JAN: TODO: MySQL 5.7 had
186 char_len = my_mbcharlen_ptr(cs, reinterpret_cast<const char*>(str),
187 reinterpret_cast<const char*>(str + len));
188 */
189 size_t char_len = size_t(cs->cset->charlen(cs, str, str + len));
190
191 ut_ad(char_len <= len);
192
193 /* Get collation hash code */
194 cs->coll->hash_sort(cs, str, char_len, &nr1, &nr2);
195
196 return(nr1 % FTS_NUM_AUX_INDEX);
197}
198
199/** Select the FTS auxiliary index for the given character.
200@param[in] cs charset
201@param[in] str string
202@param[in] len string length in bytes
203@retval the index to use for the string */
204UNIV_INLINE
205ulint
206fts_select_index(
207 const CHARSET_INFO* cs,
208 const byte* str,
209 ulint len)
210{
211 ulint selected;
212
213 if (fts_is_charset_cjk(cs)) {
214 selected = fts_select_index_by_hash(cs, str, len);
215 } else {
216 selected = fts_select_index_by_range(cs, str, len);
217 }
218
219 return(selected);
220}
221
222/******************************************************************//**
223Return the selected FTS aux index suffix. */
224UNIV_INLINE
225const char*
226fts_get_suffix(
227/*===========*/
228 ulint selected) /*!< in: selected index */
229{
230 return(fts_index_selector[selected].suffix);
231}
232
233#endif /* INNOBASE_FTS0TYPES_IC */
234