1 | /***************************************************************************** |
2 | |
3 | Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved. |
4 | Copyright (c) 2017, 2018, MariaDB Corporation. |
5 | |
6 | This program is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free Software |
8 | Foundation; version 2 of the License. |
9 | |
10 | This program is distributed in the hope that it will be useful, but WITHOUT |
11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU General Public License along with |
15 | this program; if not, write to the Free Software Foundation, Inc., |
16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA |
17 | |
18 | *****************************************************************************/ |
19 | |
20 | /******************************************************************//** |
21 | @file include/fts0types.ic |
22 | Full text search types. |
23 | |
24 | Created 2007-03-27 Sunny Bains |
25 | *******************************************************/ |
26 | |
27 | #ifndef INNOBASE_FTS0TYPES_IC |
28 | #define INNOBASE_FTS0TYPES_IC |
29 | |
30 | #include "rem0cmp.h" |
31 | #include "ha_prototypes.h" |
32 | |
33 | /******************************************************************//** |
34 | Duplicate a string. |
35 | @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ |
36 | UNIV_INLINE |
37 | void |
38 | fts_string_dup( |
39 | /*===========*/ |
40 | fts_string_t* dst, /*!< in: dup to here */ |
41 | const fts_string_t* src, /*!< in: src string */ |
42 | mem_heap_t* heap) /*!< in: heap to use */ |
43 | { |
44 | dst->f_str = (byte*)mem_heap_alloc(heap, src->f_len + 1); |
45 | memcpy(dst->f_str, src->f_str, src->f_len); |
46 | |
47 | dst->f_len = src->f_len; |
48 | dst->f_str[src->f_len] = 0; |
49 | dst->f_n_char = src->f_n_char; |
50 | } |
51 | |
52 | /******************************************************************//** |
53 | Compare two fts_trx_row_t doc_ids. |
54 | @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ |
55 | UNIV_INLINE |
56 | int |
57 | fts_trx_row_doc_id_cmp( |
58 | /*===================*/ |
59 | const void* p1, /*!< in: id1 */ |
60 | const void* p2) /*!< in: id2 */ |
61 | { |
62 | const fts_trx_row_t* tr1 = (const fts_trx_row_t*) p1; |
63 | const fts_trx_row_t* tr2 = (const fts_trx_row_t*) p2; |
64 | |
65 | return((int)(tr1->doc_id - tr2->doc_id)); |
66 | } |
67 | |
68 | /******************************************************************//** |
69 | Compare two fts_ranking_t doc_ids. |
70 | @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ |
71 | UNIV_INLINE |
72 | int |
73 | fts_ranking_doc_id_cmp( |
74 | /*===================*/ |
75 | const void* p1, /*!< in: id1 */ |
76 | const void* p2) /*!< in: id2 */ |
77 | { |
78 | const fts_ranking_t* rk1 = (const fts_ranking_t*) p1; |
79 | const fts_ranking_t* rk2 = (const fts_ranking_t*) p2; |
80 | |
81 | return((int)(rk1->doc_id - rk2->doc_id)); |
82 | } |
83 | |
84 | /******************************************************************//** |
85 | Compare two fts_update_t doc_ids. |
86 | @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ |
87 | UNIV_INLINE |
88 | int |
89 | fts_update_doc_id_cmp( |
90 | /*==================*/ |
91 | const void* p1, /*!< in: id1 */ |
92 | const void* p2) /*!< in: id2 */ |
93 | { |
94 | const fts_update_t* up1 = (const fts_update_t*) p1; |
95 | const fts_update_t* up2 = (const fts_update_t*) p2; |
96 | |
97 | return((int)(up1->doc_id - up2->doc_id)); |
98 | } |
99 | |
100 | /******************************************************************//** |
101 | Get the first character's code position for FTS index partition */ |
102 | extern |
103 | ulint |
104 | innobase_strnxfrm( |
105 | /*==============*/ |
106 | const CHARSET_INFO* cs, /*!< in: Character set */ |
107 | const uchar* p2, /*!< in: string */ |
108 | const ulint len2); /*!< in: string length */ |
109 | |
110 | /** Check if fts index charset is cjk |
111 | @param[in] cs charset |
112 | @retval true if the charset is cjk |
113 | @retval false if not. */ |
114 | UNIV_INLINE |
115 | bool |
116 | fts_is_charset_cjk( |
117 | const CHARSET_INFO* cs) |
118 | { |
119 | return cs == &my_charset_gb2312_chinese_ci |
120 | || cs == &my_charset_gbk_chinese_ci |
121 | || cs == &my_charset_big5_chinese_ci |
122 | || cs == &my_charset_ujis_japanese_ci |
123 | || cs == &my_charset_sjis_japanese_ci |
124 | || cs == &my_charset_cp932_japanese_ci |
125 | || cs == &my_charset_eucjpms_japanese_ci |
126 | || cs == &my_charset_euckr_korean_ci; |
127 | } |
128 | |
129 | /** Select the FTS auxiliary index for the given character by range. |
130 | @param[in] cs charset |
131 | @param[in] str string |
132 | @param[in] len string length |
133 | @retval the index to use for the string */ |
134 | UNIV_INLINE |
135 | ulint |
136 | fts_select_index_by_range( |
137 | const CHARSET_INFO* cs, |
138 | const byte* str, |
139 | ulint len) |
140 | { |
141 | ulint selected = 0; |
142 | ulint value = innobase_strnxfrm(cs, str, len); |
143 | |
144 | while (fts_index_selector[selected].value != 0) { |
145 | |
146 | if (fts_index_selector[selected].value == value) { |
147 | |
148 | return(selected); |
149 | |
150 | } else if (fts_index_selector[selected].value > value) { |
151 | |
152 | return(selected > 0 ? selected - 1 : 0); |
153 | } |
154 | |
155 | ++selected; |
156 | } |
157 | |
158 | ut_ad(selected > 1); |
159 | |
160 | return(selected - 1); |
161 | } |
162 | |
163 | /** Select the FTS auxiliary index for the given character by hash. |
164 | @param[in] cs charset |
165 | @param[in] str string |
166 | @param[in] len string length |
167 | @retval the index to use for the string */ |
168 | UNIV_INLINE |
169 | ulint |
170 | fts_select_index_by_hash( |
171 | const CHARSET_INFO* cs, |
172 | const byte* str, |
173 | ulint len) |
174 | { |
175 | ulong nr1 = 1; |
176 | ulong nr2 = 4; |
177 | |
178 | ut_ad(!(str == NULL && len > 0)); |
179 | |
180 | if (str == NULL || len == 0) { |
181 | return 0; |
182 | } |
183 | |
184 | /* Get the first char */ |
185 | /* JAN: TODO: MySQL 5.7 had |
186 | char_len = my_mbcharlen_ptr(cs, reinterpret_cast<const char*>(str), |
187 | reinterpret_cast<const char*>(str + len)); |
188 | */ |
189 | size_t char_len = size_t(cs->cset->charlen(cs, str, str + len)); |
190 | |
191 | ut_ad(char_len <= len); |
192 | |
193 | /* Get collation hash code */ |
194 | cs->coll->hash_sort(cs, str, char_len, &nr1, &nr2); |
195 | |
196 | return(nr1 % FTS_NUM_AUX_INDEX); |
197 | } |
198 | |
199 | /** Select the FTS auxiliary index for the given character. |
200 | @param[in] cs charset |
201 | @param[in] str string |
202 | @param[in] len string length in bytes |
203 | @retval the index to use for the string */ |
204 | UNIV_INLINE |
205 | ulint |
206 | fts_select_index( |
207 | const CHARSET_INFO* cs, |
208 | const byte* str, |
209 | ulint len) |
210 | { |
211 | ulint selected; |
212 | |
213 | if (fts_is_charset_cjk(cs)) { |
214 | selected = fts_select_index_by_hash(cs, str, len); |
215 | } else { |
216 | selected = fts_select_index_by_range(cs, str, len); |
217 | } |
218 | |
219 | return(selected); |
220 | } |
221 | |
222 | /******************************************************************//** |
223 | Return the selected FTS aux index suffix. */ |
224 | UNIV_INLINE |
225 | const char* |
226 | fts_get_suffix( |
227 | /*===========*/ |
228 | ulint selected) /*!< in: selected index */ |
229 | { |
230 | return(fts_index_selector[selected].suffix); |
231 | } |
232 | |
233 | #endif /* INNOBASE_FTS0TYPES_IC */ |
234 | |