1/* Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
15
16/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
17
18/* TODO: add caching - pre-read several index entries at once */
19
20/*
21 Added optimization for full-text queries with plus-words. It was
22 implemented by sharing maximal document id (max_docid) variable
23 inside plus subtree. max_docid could be used by any word in plus
24 subtree, but it could be updated by plus-word only.
25
26 Fulltext "smarter index merge" optimization assumes that rows
27 it gets are ordered by doc_id. That is not the case when we
28 search for a word with truncation operator. It may return
29 rows in random order. Thus we may not use "smarter index merge"
30 optimization with "trunc-words".
31
32 The idea is: there is no need to search for docid smaller than
33 biggest docid inside current plus subtree or any upper plus subtree.
34
35 Examples:
36 +word1 word2
37 share same max_docid
38 max_docid updated by word1
39 +word1 +(word2 word3)
40 share same max_docid
41 max_docid updated by word1
42 +(word1 -word2) +(+word3 word4)
43 share same max_docid
44 max_docid updated by word3
45 +word1 word2 (+word3 word4 (+word5 word6))
46 three subexpressions (including the top-level one),
47 every one has its own max_docid, updated by its plus word.
48 but for the search word6 uses
49 max(word1.max_docid, word3.max_docid, word5.max_docid),
50 while word4 uses, accordingly,
51 max(word1.max_docid, word3.max_docid).
52*/
53
54#define FT_CORE
55#include "ftdefs.h"
56
57/* search with boolean queries */
58
59static double _wghts[11]=
60{
61 0.131687242798354,
62 0.197530864197531,
63 0.296296296296296,
64 0.444444444444444,
65 0.666666666666667,
66 1.000000000000000,
67 1.500000000000000,
68 2.250000000000000,
69 3.375000000000000,
70 5.062500000000000,
71 7.593750000000000};
72static double *wghts=_wghts+5; /* wghts[i] = 1.5**i */
73
74static double _nwghts[11]=
75{
76 -0.065843621399177,
77 -0.098765432098766,
78 -0.148148148148148,
79 -0.222222222222222,
80 -0.333333333333334,
81 -0.500000000000000,
82 -0.750000000000000,
83 -1.125000000000000,
84 -1.687500000000000,
85 -2.531250000000000,
86 -3.796875000000000};
87static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */
88
89#define FTB_FLAG_TRUNC 1
90/* At most one of the following flags can be set */
91#define FTB_FLAG_YES 2
92#define FTB_FLAG_NO 4
93#define FTB_FLAG_WONLY 8
94
95typedef struct st_ftb_expr FTB_EXPR;
96struct st_ftb_expr
97{
98 FTB_EXPR *up;
99 uint flags;
100/* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */
101 my_off_t docid[2];
102 my_off_t max_docid;
103 float weight;
104 float cur_weight;
105 LIST *phrase; /* phrase words */
106 LIST *document; /* for phrase search */
107 uint yesses; /* number of "yes" words matched */
108 uint nos; /* number of "no" words matched */
109 uint ythresh; /* number of "yes" words in expr */
110 uint yweaks; /* number of "yes" words for scan only */
111};
112
113typedef struct st_ftb_word
114{
115 FTB_EXPR *up;
116 uint flags;
117/* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */
118 my_off_t docid[2]; /* for index search and for scan */
119 my_off_t key_root;
120 FTB_EXPR *max_docid_expr;
121 MI_KEYDEF *keyinfo;
122 struct st_ftb_word *prev;
123 float weight;
124 uint ndepth;
125 uint len;
126 uchar off;
127 uchar word[1];
128} FTB_WORD;
129
130typedef struct st_ft_info
131{
132 struct _ft_vft *please;
133 MI_INFO *info;
134 CHARSET_INFO *charset;
135 FTB_EXPR *root;
136 FTB_WORD **list;
137 FTB_WORD *last_word;
138 MEM_ROOT mem_root;
139 QUEUE queue;
140 TREE no_dupes;
141 my_off_t lastpos;
142 uint keynr;
143 uchar with_scan;
144 enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE } state;
145} FTB;
146
147static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b)
148{
149 int i;
150
151 /* if a==curdoc, take it as a < b */
152 if (v && a->docid[0] == *v)
153 return -1;
154
155 /* ORDER BY docid, ndepth DESC */
156 i=CMP_NUM(a->docid[0], b->docid[0]);
157 if (!i)
158 i=CMP_NUM(b->ndepth,a->ndepth);
159 return i;
160}
161
162static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
163{
164 /* ORDER BY word, ndepth */
165 int i= ha_compare_text(cs, (uchar*) (*a)->word + 1, (*a)->len - 1,
166 (uchar*) (*b)->word + 1, (*b)->len - 1, 0);
167 if (!i)
168 i= CMP_NUM((*a)->ndepth, (*b)->ndepth);
169 return i;
170}
171
172
173typedef struct st_my_ftb_param
174{
175 FTB *ftb;
176 FTB_EXPR *ftbe;
177 uchar *up_quot;
178 uint depth;
179} MY_FTB_PARAM;
180
181
182static int ftb_query_add_word(MYSQL_FTPARSER_PARAM *param,
183 const char *word, int word_len,
184 MYSQL_FTPARSER_BOOLEAN_INFO *info)
185{
186 MY_FTB_PARAM *ftb_param= param->mysql_ftparam;
187 FTB_WORD *ftbw;
188 FTB_EXPR *ftbe, *tmp_expr;
189 FT_WORD *phrase_word;
190 LIST *tmp_element;
191 int r= info->weight_adjust;
192 float weight= (float)
193 (info->wasign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)];
194
195 switch (info->type) {
196 case FT_TOKEN_WORD:
197 ftbw= (FTB_WORD *)alloc_root(&ftb_param->ftb->mem_root,
198 sizeof(FTB_WORD) + HA_MAX_KEY_BUFF);
199 ftbw->len= word_len + 1;
200 ftbw->flags= 0;
201 ftbw->off= 0;
202 if (info->yesno > 0) ftbw->flags|= FTB_FLAG_YES;
203 if (info->yesno < 0) ftbw->flags|= FTB_FLAG_NO;
204 if (info->trunc) ftbw->flags|= FTB_FLAG_TRUNC;
205 ftbw->weight= weight;
206 ftbw->up= ftb_param->ftbe;
207 ftbw->docid[0]= ftbw->docid[1]= HA_OFFSET_ERROR;
208 ftbw->ndepth= (info->yesno < 0) + ftb_param->depth;
209 ftbw->key_root= HA_OFFSET_ERROR;
210 memcpy(ftbw->word + 1, word, word_len);
211 ftbw->word[0]= word_len;
212 if (info->yesno > 0) ftbw->up->ythresh++;
213 ftb_param->ftb->queue.max_elements++;
214 ftbw->prev= ftb_param->ftb->last_word;
215 ftb_param->ftb->last_word= ftbw;
216 ftb_param->ftb->with_scan|= (info->trunc & FTB_FLAG_TRUNC);
217 for (tmp_expr= ftb_param->ftbe; tmp_expr->up; tmp_expr= tmp_expr->up)
218 if (! (tmp_expr->flags & FTB_FLAG_YES))
219 break;
220 ftbw->max_docid_expr= tmp_expr;
221 /* fall through */
222 case FT_TOKEN_STOPWORD:
223 if (! ftb_param->up_quot) break;
224 phrase_word= (FT_WORD *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD));
225 tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST));
226 phrase_word->pos= (uchar*) word;
227 phrase_word->len= word_len;
228 tmp_element->data= (void *)phrase_word;
229 ftb_param->ftbe->phrase= list_add(ftb_param->ftbe->phrase, tmp_element);
230 /* Allocate document list at this point.
231 It allows to avoid huge amount of allocs/frees for each row.*/
232 tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST));
233 tmp_element->data= alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD));
234 ftb_param->ftbe->document=
235 list_add(ftb_param->ftbe->document, tmp_element);
236 break;
237 case FT_TOKEN_LEFT_PAREN:
238 ftbe=(FTB_EXPR *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FTB_EXPR));
239 ftbe->flags= 0;
240 if (info->yesno > 0) ftbe->flags|= FTB_FLAG_YES;
241 if (info->yesno < 0) ftbe->flags|= FTB_FLAG_NO;
242 ftbe->weight= weight;
243 ftbe->up= ftb_param->ftbe;
244 ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0;
245 ftbe->docid[0]= ftbe->docid[1]= HA_OFFSET_ERROR;
246 ftbe->phrase= NULL;
247 ftbe->document= 0;
248 if (info->quot) ftb_param->ftb->with_scan|= 2;
249 if (info->yesno > 0) ftbe->up->ythresh++;
250 ftb_param->ftbe= ftbe;
251 ftb_param->depth++;
252 ftb_param->up_quot= (uchar*) info->quot;
253 break;
254 case FT_TOKEN_RIGHT_PAREN:
255 if (ftb_param->ftbe->document)
256 {
257 /* Circuit document list */
258 for (tmp_element= ftb_param->ftbe->document;
259 tmp_element->next; tmp_element= tmp_element->next) /* no-op */;
260 tmp_element->next= ftb_param->ftbe->document;
261 ftb_param->ftbe->document->prev= tmp_element;
262 }
263 info->quot= 0;
264 if (ftb_param->ftbe->up)
265 {
266 DBUG_ASSERT(ftb_param->depth);
267 ftb_param->ftbe= ftb_param->ftbe->up;
268 ftb_param->depth--;
269 ftb_param->up_quot= 0;
270 }
271 break;
272 case FT_TOKEN_EOF:
273 default:
274 break;
275 }
276 return(0);
277}
278
279
280static int ftb_parse_query_internal(MYSQL_FTPARSER_PARAM *param,
281 const char *query, int len)
282{
283 MY_FTB_PARAM *ftb_param= param->mysql_ftparam;
284 MYSQL_FTPARSER_BOOLEAN_INFO info;
285 CHARSET_INFO *cs= ftb_param->ftb->charset;
286 const uchar **start= (const uchar**) &query;
287 uchar *end= (uchar*) query + len;
288 FT_WORD w;
289
290 info.prev= ' ';
291 info.quot= 0;
292 while (ft_get_word(cs, start, end, &w, &info))
293 param->mysql_add_word(param, (char*) w.pos, (int)w.len, &info);
294 return(0);
295}
296
297
298static int _ftb_parse_query(FTB *ftb, uchar *query, uint len,
299 struct st_mysql_ftparser *parser)
300{
301 MYSQL_FTPARSER_PARAM *param;
302 MY_FTB_PARAM ftb_param;
303 DBUG_ENTER("_ftb_parse_query");
304 DBUG_ASSERT(parser);
305
306 if (ftb->state != UNINITIALIZED)
307 DBUG_RETURN(0);
308 if (! (param= ftparser_call_initializer(ftb->info, ftb->keynr, 0)))
309 DBUG_RETURN(1);
310
311 ftb_param.ftb= ftb;
312 ftb_param.depth= 0;
313 ftb_param.ftbe= ftb->root;
314 ftb_param.up_quot= 0;
315
316 param->mysql_parse= ftb_parse_query_internal;
317 param->mysql_add_word= ftb_query_add_word;
318 param->mysql_ftparam= (void *)&ftb_param;
319 param->cs= ftb->charset;
320 param->doc= (char*) query;
321 param->length= len;
322 param->flags= 0;
323 param->mode= MYSQL_FTPARSER_FULL_BOOLEAN_INFO;
324 DBUG_RETURN(parser->parse(param));
325}
326
327
328static int _ftb_no_dupes_cmp(void* not_used __attribute__((unused)),
329 const void *a,const void *b)
330{
331 return CMP_NUM((*((my_off_t*)a)), (*((my_off_t*)b)));
332}
333
334/*
335 When performing prefix search (a word with truncation operator), we
336 must preserve original prefix to ensure that characters which may be
337 expanded/contracted do not break the prefix. This is done by storing
338 newly found key immediatly after the original word in ftbw->word
339 buffer.
340
341 ftbw->word= LENGTH WORD [ LENGTH1 WORD1 ] WEIGHT REFERENCE
342 LENGTH - 1 byte, length of the WORD
343 WORD - LENGTH bytes, the word itself
344 LENGTH1 - 1 byte, length of the WORD1, present in case of prefix search
345 WORD1 - LENGTH bytes, the word itself, present in case of prefix search
346 WEIGHT - 4 bytes (HA_FT_WLEN), either weight or number of subkeys
347 REFERENCE - rec_reflength bytes, pointer to the record
348
349 returns 1 if the search was finished (must-word wasn't found)
350*/
351static int _ft2_search_no_lock(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
352{
353 int r;
354 int subkeys=1;
355 my_bool can_go_down;
356 MI_INFO *info=ftb->info;
357 uint UNINIT_VAR(off), extra= HA_FT_WLEN + info->s->rec_reflength;
358 uchar *lastkey_buf=ftbw->word+ftbw->off;
359
360 if (ftbw->flags & FTB_FLAG_TRUNC)
361 lastkey_buf+=ftbw->len;
362
363 if (init_search)
364 {
365 ftbw->key_root=info->s->state.key_root[ftb->keynr];
366 ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
367
368 r=_mi_search(info, ftbw->keyinfo, (uchar*) ftbw->word, ftbw->len,
369 SEARCH_FIND | SEARCH_BIGGER, ftbw->key_root);
370 }
371 else
372 {
373 uint sflag= SEARCH_BIGGER;
374 my_off_t max_docid=0;
375 FTB_EXPR *tmp;
376
377 for (tmp= ftbw->max_docid_expr; tmp; tmp= tmp->up)
378 set_if_bigger(max_docid, tmp->max_docid);
379
380 if (ftbw->docid[0] < max_docid)
381 {
382 sflag|= SEARCH_SAME;
383 _mi_dpointer(info, (uchar*) (lastkey_buf + HA_FT_WLEN +
384 (ftbw->off ? 0 : lastkey_buf[0] + 1)),
385 max_docid);
386 }
387 r=_mi_search(info, ftbw->keyinfo, (uchar*) lastkey_buf,
388 USE_WHOLE_KEY, sflag, ftbw->key_root);
389 }
390
391 can_go_down=(!ftbw->off && (init_search || (ftbw->flags & FTB_FLAG_TRUNC)));
392 /* Skip rows inserted by concurrent insert */
393 while (!r)
394 {
395 if (can_go_down)
396 {
397 /* going down ? */
398 off=info->lastkey_length-extra;
399 subkeys=ft_sintXkorr(info->lastkey+off);
400 }
401 if (subkeys<0 || info->lastpos < info->state->data_file_length)
402 break;
403 r= _mi_search_next(info, ftbw->keyinfo, info->lastkey,
404 info->lastkey_length,
405 SEARCH_BIGGER, ftbw->key_root);
406 }
407
408 if (!r && !ftbw->off)
409 {
410 r= ha_compare_text(ftb->charset,
411 info->lastkey+1,
412 info->lastkey_length-extra-1,
413 (uchar*) ftbw->word+1,
414 ftbw->len-1,
415 (my_bool) (ftbw->flags & FTB_FLAG_TRUNC));
416 }
417
418 if (r) /* not found */
419 {
420 if (!ftbw->off || !(ftbw->flags & FTB_FLAG_TRUNC))
421 {
422 ftbw->docid[0]=HA_OFFSET_ERROR;
423 if ((ftbw->flags & FTB_FLAG_YES) && ftbw->up->up==0)
424 {
425 /*
426 This word MUST BE present in every document returned,
427 so we can stop the search right now
428 */
429 ftb->state=INDEX_DONE;
430 return 1; /* search is done */
431 }
432 else
433 return 0;
434 }
435
436 /*
437 Going up to the first-level tree to continue search there.
438 Only done when performing prefix search.
439
440 Key buffer data pointer as well as docid[0] may be smaller
441 than values we got while searching first-level tree. Thus
442 they must be restored to original values to avoid dead-loop,
443 when subsequent search for a bigger value eventually ends up
444 in this same second-level tree.
445 */
446 _mi_dpointer(info, (uchar*) (lastkey_buf+HA_FT_WLEN), ftbw->key_root);
447 ftbw->docid[0]= ftbw->key_root;
448 ftbw->key_root=info->s->state.key_root[ftb->keynr];
449 ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
450 ftbw->off=0;
451 return _ft2_search_no_lock(ftb, ftbw, 0);
452 }
453
454 /* matching key found */
455 memcpy(lastkey_buf, info->lastkey, info->lastkey_length);
456 if (lastkey_buf == ftbw->word)
457 ftbw->len=info->lastkey_length-extra;
458
459 /* going down ? */
460 if (subkeys<0)
461 {
462 /*
463 yep, going down, to the second-level tree
464 TODO here: subkey-based optimization
465 */
466 ftbw->off=off;
467 ftbw->key_root=info->lastpos;
468 ftbw->keyinfo=& info->s->ft2_keyinfo;
469 r=_mi_search_first(info, ftbw->keyinfo, ftbw->key_root);
470 DBUG_ASSERT(r==0); /* found something */
471 memcpy(lastkey_buf+off, info->lastkey, info->lastkey_length);
472 }
473 ftbw->docid[0]=info->lastpos;
474 if (ftbw->flags & FTB_FLAG_YES && !(ftbw->flags & FTB_FLAG_TRUNC))
475 ftbw->max_docid_expr->max_docid= info->lastpos;
476 return 0;
477}
478
479static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
480{
481 int r;
482 MYISAM_SHARE *share= ftb->info->s;
483 if (share->concurrent_insert)
484 mysql_rwlock_rdlock(&share->key_root_lock[ftb->keynr]);
485 r= _ft2_search_no_lock(ftb, ftbw, init_search);
486 if (share->concurrent_insert)
487 mysql_rwlock_unlock(&share->key_root_lock[ftb->keynr]);
488 return r;
489}
490
491static void _ftb_init_index_search(FT_INFO *ftb)
492{
493 int i;
494 FTB_WORD *ftbw;
495
496 if (ftb->state == UNINITIALIZED || ftb->keynr == NO_SUCH_KEY)
497 return;
498 ftb->state=INDEX_SEARCH;
499
500 for (i= queue_last_element(&ftb->queue);
501 i >= (int) queue_first_element(&ftb->queue);
502 i--)
503 {
504 ftbw=(FTB_WORD *)(queue_element(&ftb->queue, i));
505
506 if (ftbw->flags & FTB_FLAG_TRUNC)
507 {
508 /*
509 special treatment for truncation operator
510 1. there are some (besides this) +words
511 | no need to search in the index, it can never ADD new rows
512 | to the result, and to remove half-matched rows we do scan anyway
513 2. -trunc*
514 | same as 1.
515 3. in 1 and 2, +/- need not be on the same expr. level,
516 but can be on any upper level, as in +word +(trunc1* trunc2*)
517 4. otherwise
518 | We have to index-search for this prefix.
519 | It may cause duplicates, as in the index (sorted by <word,docid>)
520 | <aaaa,row1>
521 | <aabb,row2>
522 | <aacc,row1>
523 | Searching for "aa*" will find row1 twice...
524 */
525 FTB_EXPR *ftbe;
526 for (ftbe=(FTB_EXPR*)ftbw;
527 ftbe->up && !(ftbe->up->flags & FTB_FLAG_TRUNC);
528 ftbe->up->flags|= FTB_FLAG_TRUNC, ftbe=ftbe->up)
529 {
530 if (ftbe->flags & FTB_FLAG_NO || /* 2 */
531 ftbe->up->ythresh - ftbe->up->yweaks >
532 (uint) MY_TEST(ftbe->flags & FTB_FLAG_YES)) /* 1 */
533 {
534 FTB_EXPR *top_ftbe=ftbe->up;
535 ftbw->docid[0]=HA_OFFSET_ERROR;
536 for (ftbe=(FTB_EXPR *)ftbw;
537 ftbe != top_ftbe && !(ftbe->flags & FTB_FLAG_NO);
538 ftbe=ftbe->up)
539 ftbe->up->yweaks++;
540 ftbe=0;
541 break;
542 }
543 }
544 if (!ftbe)
545 continue;
546 /* 4 */
547 if (!is_tree_inited(& ftb->no_dupes))
548 init_tree(& ftb->no_dupes,0,0,sizeof(my_off_t),
549 _ftb_no_dupes_cmp,0,0,MYF(0));
550 else
551 reset_tree(& ftb->no_dupes);
552 }
553
554 ftbw->off=0; /* in case of reinit */
555 if (_ft2_search(ftb, ftbw, 1))
556 return;
557 }
558 queue_fix(& ftb->queue);
559}
560
561
562FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, uchar *query,
563 uint query_len, CHARSET_INFO *cs)
564{
565 FTB *ftb;
566 FTB_EXPR *ftbe;
567 FTB_WORD *ftbw;
568
569 if (!(ftb=(FTB *)my_malloc(sizeof(FTB), MYF(MY_WME))))
570 return 0;
571 ftb->please= (struct _ft_vft *) & _ft_vft_boolean;
572 ftb->state=UNINITIALIZED;
573 ftb->info=info;
574 ftb->keynr=keynr;
575 ftb->charset=cs;
576 DBUG_ASSERT(keynr==NO_SUCH_KEY || cs == info->s->keyinfo[keynr].seg->charset);
577 ftb->with_scan=0;
578 ftb->lastpos=HA_OFFSET_ERROR;
579 bzero(& ftb->no_dupes, sizeof(TREE));
580 ftb->last_word= 0;
581
582 init_alloc_root(&ftb->mem_root, "fulltext", 1024, 1024, MYF(0));
583 ftb->queue.max_elements= 0;
584 if (!(ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR))))
585 goto err;
586 ftbe->weight=1;
587 ftbe->flags=FTB_FLAG_YES;
588 ftbe->nos=1;
589 ftbe->up=0;
590 ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0;
591 ftbe->docid[0]=ftbe->docid[1]=HA_OFFSET_ERROR;
592 ftbe->phrase= NULL;
593 ftbe->document= 0;
594 ftb->root=ftbe;
595 if (unlikely(_ftb_parse_query(ftb, query, query_len,
596 keynr == NO_SUCH_KEY ? &ft_default_parser :
597 info->s->keyinfo[keynr].parser)))
598 goto err;
599 /*
600 Hack: instead of init_queue, we'll use reinit queue to be able
601 to alloc queue with alloc_root()
602 */
603 if (! (ftb->queue.root= (uchar **)alloc_root(&ftb->mem_root,
604 (ftb->queue.max_elements + 1) *
605 sizeof(void *))))
606 goto err;
607 reinit_queue(&ftb->queue, ftb->queue.max_elements, 0, 0,
608 (int (*)(void*, uchar*, uchar*))FTB_WORD_cmp, 0, 0, 0);
609 for (ftbw= ftb->last_word; ftbw; ftbw= ftbw->prev)
610 queue_insert(&ftb->queue, (uchar *)ftbw);
611 ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root,
612 sizeof(FTB_WORD *)*ftb->queue.elements);
613 memcpy(ftb->list, &queue_top(&ftb->queue), sizeof(FTB_WORD *)*ftb->queue.elements);
614 my_qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *),
615 (qsort2_cmp)FTB_WORD_cmp_list, (void*)ftb->charset);
616 if (ftb->queue.elements<2) ftb->with_scan &= ~FTB_FLAG_TRUNC;
617 ftb->state=READY;
618 return ftb;
619err:
620 free_root(& ftb->mem_root, MYF(0));
621 my_free(ftb);
622 return 0;
623}
624
625
626typedef struct st_my_ftb_phrase_param
627{
628 LIST *phrase;
629 LIST *document;
630 CHARSET_INFO *cs;
631 uint phrase_length;
632 uint document_length;
633 uint match;
634} MY_FTB_PHRASE_PARAM;
635
636
637static int ftb_phrase_add_word(MYSQL_FTPARSER_PARAM *param,
638 const char *word, int word_len,
639 MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused)))
640{
641 MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam;
642 FT_WORD *w= (FT_WORD *)phrase_param->document->data;
643 LIST *phrase, *document;
644 w->pos= (uchar*) word;
645 w->len= word_len;
646 phrase_param->document= phrase_param->document->prev;
647 if (phrase_param->phrase_length > phrase_param->document_length)
648 {
649 phrase_param->document_length++;
650 return 0;
651 }
652 /* TODO: rewrite phrase search to avoid
653 comparing the same word twice. */
654 for (phrase= phrase_param->phrase, document= phrase_param->document->next;
655 phrase; phrase= phrase->next, document= document->next)
656 {
657 FT_WORD *phrase_word= (FT_WORD *)phrase->data;
658 FT_WORD *document_word= (FT_WORD *)document->data;
659 if (my_strnncoll(phrase_param->cs, (uchar*) phrase_word->pos,
660 phrase_word->len,
661 (uchar*) document_word->pos, document_word->len))
662 return 0;
663 }
664 phrase_param->match++;
665 return 0;
666}
667
668
669static int ftb_check_phrase_internal(MYSQL_FTPARSER_PARAM *param,
670 const char *document, int len)
671{
672 FT_WORD word;
673 MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam;
674 const uchar *docend= (uchar*) document + len;
675 while (ft_simple_get_word(phrase_param->cs, (uchar**) &document, docend,
676 &word, FALSE))
677 {
678 param->mysql_add_word(param, (char*) word.pos, (int)word.len, 0);
679 if (phrase_param->match)
680 break;
681 }
682 return 0;
683}
684
685
686/*
687 Checks if given buffer matches phrase list.
688
689 SYNOPSIS
690 _ftb_check_phrase()
691 s0 start of buffer
692 e0 end of buffer
693 phrase broken into list phrase
694 cs charset info
695
696 RETURN VALUE
697 1 is returned if phrase found, 0 else.
698 -1 is returned if error occurs.
699*/
700
701static int _ftb_check_phrase(FTB *ftb, const uchar *document, uint len,
702 FTB_EXPR *ftbe, struct st_mysql_ftparser *parser)
703{
704 MY_FTB_PHRASE_PARAM ftb_param;
705 MYSQL_FTPARSER_PARAM *param;
706 DBUG_ENTER("_ftb_check_phrase");
707 DBUG_ASSERT(parser);
708
709 if (! (param= ftparser_call_initializer(ftb->info, ftb->keynr, 1)))
710 DBUG_RETURN(0);
711
712 ftb_param.phrase= ftbe->phrase;
713 ftb_param.document= ftbe->document;
714 ftb_param.cs= ftb->charset;
715 ftb_param.phrase_length= list_length(ftbe->phrase);
716 ftb_param.document_length= 1;
717 ftb_param.match= 0;
718
719 param->mysql_parse= ftb_check_phrase_internal;
720 param->mysql_add_word= ftb_phrase_add_word;
721 param->mysql_ftparam= (void *)&ftb_param;
722 param->cs= ftb->charset;
723 param->doc= (char *) document;
724 param->length= len;
725 param->flags= 0;
726 param->mode= MYSQL_FTPARSER_WITH_STOPWORDS;
727 if (unlikely(parser->parse(param)))
728 return -1;
729 DBUG_RETURN(ftb_param.match ? 1 : 0);
730}
731
732
733static int _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig)
734{
735 FT_SEG_ITERATOR ftsi;
736 FTB_EXPR *ftbe;
737 float weight=ftbw->weight;
738 int yn_flag= ftbw->flags, ythresh, mode=(ftsi_orig != 0);
739 my_off_t curdoc=ftbw->docid[mode];
740 struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ?
741 &ft_default_parser :
742 ftb->info->s->keyinfo[ftb->keynr].parser;
743
744 for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up)
745 {
746 ythresh = ftbe->ythresh - (mode ? 0 : ftbe->yweaks);
747 if (ftbe->docid[mode] != curdoc)
748 {
749 ftbe->cur_weight=0;
750 ftbe->yesses=ftbe->nos=0;
751 ftbe->docid[mode]=curdoc;
752 }
753 if (ftbe->nos)
754 break;
755 if (yn_flag & FTB_FLAG_YES)
756 {
757 weight /= ftbe->ythresh;
758 ftbe->cur_weight += weight;
759 if ((int) ++ftbe->yesses == ythresh)
760 {
761 yn_flag=ftbe->flags;
762 weight=ftbe->cur_weight*ftbe->weight;
763 if (mode && ftbe->phrase)
764 {
765 int found= 0;
766
767 memcpy(&ftsi, ftsi_orig, sizeof(ftsi));
768 while (_mi_ft_segiterator(&ftsi) && !found)
769 {
770 if (!ftsi.pos)
771 continue;
772 found= _ftb_check_phrase(ftb, ftsi.pos, ftsi.len, ftbe, parser);
773 if (unlikely(found < 0))
774 return 1;
775 }
776 if (!found)
777 break;
778 } /* ftbe->quot */
779 }
780 else
781 break;
782 }
783 else
784 if (yn_flag & FTB_FLAG_NO)
785 {
786 /*
787 NOTE: special sort function of queue assures that all
788 (yn_flag & FTB_FLAG_NO) != 0
789 events for every particular subexpression will
790 "auto-magically" happen BEFORE all the
791 (yn_flag & FTB_FLAG_YES) != 0 events. So no
792 already matched expression can become not-matched again.
793 */
794 ++ftbe->nos;
795 break;
796 }
797 else
798 {
799 if (ftbe->ythresh)
800 weight/=3;
801 ftbe->cur_weight += weight;
802 if ((int) ftbe->yesses < ythresh)
803 break;
804 if (!(yn_flag & FTB_FLAG_WONLY))
805 yn_flag= ((int) ftbe->yesses++ == ythresh) ? ftbe->flags : FTB_FLAG_WONLY ;
806 weight*= ftbe->weight;
807 }
808 }
809 return 0;
810}
811
812
813int ft_boolean_read_next(FT_INFO *ftb, char *record)
814{
815 FTB_EXPR *ftbe;
816 FTB_WORD *ftbw;
817 MI_INFO *info=ftb->info;
818 my_off_t curdoc;
819
820 if (ftb->state != INDEX_SEARCH && ftb->state != INDEX_DONE)
821 return -1;
822
823 /* black magic ON */
824 if ((int) _mi_check_index(info, ftb->keynr) < 0)
825 return my_errno;
826 if (_mi_readinfo(info, F_RDLCK, 1))
827 return my_errno;
828 /* black magic OFF */
829
830 if (!ftb->queue.elements)
831 return my_errno=HA_ERR_END_OF_FILE;
832
833 /* Attention!!! Address of a local variable is used here! See err: label */
834 ftb->queue.first_cmp_arg=(void *)&curdoc;
835
836 while (ftb->state == INDEX_SEARCH &&
837 (curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid[0]) !=
838 HA_OFFSET_ERROR)
839 {
840 while (curdoc == (ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0])
841 {
842 if (unlikely(_ftb_climb_the_tree(ftb, ftbw, 0)))
843 {
844 my_errno= HA_ERR_OUT_OF_MEM;
845 goto err;
846 }
847
848 /* update queue */
849 _ft2_search(ftb, ftbw, 0);
850 queue_replace_top(&ftb->queue);
851 }
852
853 ftbe=ftb->root;
854 if (ftbe->docid[0]==curdoc && ftbe->cur_weight>0 &&
855 ftbe->yesses>=(ftbe->ythresh-ftbe->yweaks) && !ftbe->nos)
856 {
857 /* curdoc matched ! */
858 if (is_tree_inited(&ftb->no_dupes) &&
859 tree_insert(&ftb->no_dupes, &curdoc, 0,
860 ftb->no_dupes.custom_arg)->count >1)
861 /* but it managed already to get past this line once */
862 continue;
863
864 info->lastpos=curdoc;
865 /* Clear all states, except that the table was updated */
866 info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
867
868 if (!(*info->read_record)(info,curdoc, (uchar*) record))
869 {
870 info->update|= HA_STATE_AKTIV; /* Record is read */
871 if (ftb->with_scan &&
872 ft_boolean_find_relevance(ftb,(uchar*) record,0)==0)
873 continue; /* no match */
874 my_errno=0;
875 goto err;
876 }
877 goto err;
878 }
879 }
880 ftb->state=INDEX_DONE;
881 my_errno=HA_ERR_END_OF_FILE;
882err:
883 ftb->queue.first_cmp_arg=(void *)0;
884 return my_errno;
885}
886
887
888typedef struct st_my_ftb_find_param
889{
890 FT_INFO *ftb;
891 FT_SEG_ITERATOR *ftsi;
892} MY_FTB_FIND_PARAM;
893
894
895static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
896 const char *word, int len,
897 MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info __attribute__((unused)))
898{
899 MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam;
900 FT_INFO *ftb= ftb_param->ftb;
901 FTB_WORD *ftbw;
902 int a, b, c;
903 /*
904 Find right-most element in the array of query words matching this
905 word from a document.
906 */
907 for (a= 0, b= ftb->queue.elements, c= (a+b)/2; b-a>1; c= (a+b)/2)
908 {
909 ftbw= ftb->list[c];
910 if (ha_compare_text(ftb->charset, (uchar*)word, len,
911 (uchar*)ftbw->word+1, ftbw->len-1,
912 (my_bool) (ftbw->flags & FTB_FLAG_TRUNC)) < 0)
913 b= c;
914 else
915 a= c;
916 }
917 /*
918 If there were no words with truncation operator, we iterate to the
919 beginning of an array until array element is equal to the word from
920 a document. This is done mainly because the same word may be
921 mentioned twice (or more) in the query.
922
923 In case query has words with truncation operator we must iterate
924 to the beginning of the array. There may be non-matching query words
925 between matching word with truncation operator and the right-most
926 matching element. E.g., if we're looking for 'aaa15' in an array of
927 'aaa1* aaa14 aaa15 aaa16'.
928
929 Worse of that there still may be match even if the binary search
930 above didn't find matching element. E.g., if we're looking for
931 'aaa15' in an array of 'aaa1* aaa14 aaa16'. The binary search will
932 stop at 'aaa16'.
933 */
934 for (; c >= 0; c--)
935 {
936 ftbw= ftb->list[c];
937 if (ha_compare_text(ftb->charset, (uchar*)word, len,
938 (uchar*)ftbw->word + 1,ftbw->len - 1,
939 (my_bool)(ftbw->flags & FTB_FLAG_TRUNC)))
940 {
941 if (ftb->with_scan & FTB_FLAG_TRUNC)
942 continue;
943 else
944 break;
945 }
946 if (ftbw->docid[1] == ftb->info->lastpos)
947 continue;
948 ftbw->docid[1]= ftb->info->lastpos;
949 if (unlikely(_ftb_climb_the_tree(ftb, ftbw, ftb_param->ftsi)))
950 return 1;
951 }
952 return(0);
953}
954
955
956static int ftb_find_relevance_parse(MYSQL_FTPARSER_PARAM *param,
957 const char *doc, int len)
958{
959 MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam;
960 FT_INFO *ftb= ftb_param->ftb;
961 uchar *end= (uchar*) doc + len;
962 FT_WORD w;
963 while (ft_simple_get_word(ftb->charset, (uchar**) &doc, end, &w, TRUE))
964 param->mysql_add_word(param, (char*) w.pos, (int)w.len, 0);
965 return(0);
966}
967
968
969float ft_boolean_find_relevance(FT_INFO *ftb, uchar *record, uint length)
970{
971 FTB_EXPR *ftbe;
972 FT_SEG_ITERATOR ftsi, ftsi2;
973 my_off_t docid=ftb->info->lastpos;
974 MY_FTB_FIND_PARAM ftb_param;
975 MYSQL_FTPARSER_PARAM *param;
976 struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ?
977 &ft_default_parser :
978 ftb->info->s->keyinfo[ftb->keynr].parser;
979
980 if (docid == HA_OFFSET_ERROR)
981 return -2.0;
982 if (!ftb->queue.elements)
983 return 0;
984 if (! (param= ftparser_call_initializer(ftb->info, ftb->keynr, 0)))
985 return 0;
986
987 if (ftb->state != INDEX_SEARCH && docid <= ftb->lastpos)
988 {
989 FTB_EXPR *x;
990 uint i;
991
992 for (i=0; i < ftb->queue.elements; i++)
993 {
994 ftb->list[i]->docid[1]=HA_OFFSET_ERROR;
995 for (x=ftb->list[i]->up; x; x=x->up)
996 x->docid[1]=HA_OFFSET_ERROR;
997 }
998 }
999
1000 ftb->lastpos=docid;
1001
1002 if (ftb->keynr==NO_SUCH_KEY)
1003 _mi_ft_segiterator_dummy_init(record, length, &ftsi);
1004 else
1005 _mi_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi);
1006 memcpy(&ftsi2, &ftsi, sizeof(ftsi));
1007
1008 ftb_param.ftb= ftb;
1009 ftb_param.ftsi= &ftsi2;
1010 param->mysql_parse= ftb_find_relevance_parse;
1011 param->mysql_add_word= ftb_find_relevance_add_word;
1012 param->mysql_ftparam= (void *)&ftb_param;
1013 param->flags= 0;
1014 param->cs= ftb->charset;
1015 param->mode= MYSQL_FTPARSER_SIMPLE_MODE;
1016 while (_mi_ft_segiterator(&ftsi))
1017 {
1018 if (!ftsi.pos)
1019 continue;
1020 param->doc= (char *)ftsi.pos;
1021 param->length= ftsi.len;
1022 if (unlikely(parser->parse(param)))
1023 return 0;
1024 }
1025 ftbe=ftb->root;
1026 if (ftbe->docid[1]==docid && ftbe->cur_weight>0 &&
1027 ftbe->yesses>=ftbe->ythresh && !ftbe->nos)
1028 { /* row matched ! */
1029 return ftbe->cur_weight;
1030 }
1031 else
1032 { /* match failed ! */
1033 return 0.0;
1034 }
1035}
1036
1037
1038void ft_boolean_close_search(FT_INFO *ftb)
1039{
1040 if (is_tree_inited(& ftb->no_dupes))
1041 {
1042 delete_tree(&ftb->no_dupes, 0);
1043 }
1044 free_root(& ftb->mem_root, MYF(0));
1045 my_free(ftb);
1046}
1047
1048
1049float ft_boolean_get_relevance(FT_INFO *ftb)
1050{
1051 return ftb->root->cur_weight;
1052}
1053
1054
1055void ft_boolean_reinit_search(FT_INFO *ftb)
1056{
1057 _ftb_init_index_search(ftb);
1058}
1059
1060