1/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
15
16/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
17
18/* TODO: add caching - pre-read several index entries at once */
19
20/*
21 Added optimization for full-text queries with plus-words. It was
22 implemented by sharing maximal document id (max_docid) variable
23 inside plus subtree. max_docid could be used by any word in plus
24 subtree, but it could be updated by plus-word only.
25
26 Fulltext "smarter index merge" optimization assumes that rows
27 it gets are ordered by doc_id. That is not the case when we
28 search for a word with truncation operator. It may return
29 rows in random order. Thus we may not use "smarter index merge"
30 optimization with "trunc-words".
31
32 The idea is: there is no need to search for docid smaller than
33 biggest docid inside current plus subtree or any upper plus subtree.
34
35 Examples:
36 +word1 word2
37 share same max_docid
38 max_docid updated by word1
39 +word1 +(word2 word3)
40 share same max_docid
41 max_docid updated by word1
42 +(word1 -word2) +(+word3 word4)
43 share same max_docid
44 max_docid updated by word3
45 +word1 word2 (+word3 word4 (+word5 word6))
46 three subexpressions (including the top-level one),
47 every one has its own max_docid, updated by its plus word.
48 but for the search word6 uses
49 MY_MAX(word1.max_docid, word3.max_docid, word5.max_docid),
50 while word4 uses, accordingly,
51 MY_MAX(word1.max_docid, word3.max_docid).
52*/
53
54#define FT_CORE
55#include "ma_ftdefs.h"
56
57/* search with boolean queries */
58
59static double _wghts[11]=
60{
61 0.131687242798354,
62 0.197530864197531,
63 0.296296296296296,
64 0.444444444444444,
65 0.666666666666667,
66 1.000000000000000,
67 1.500000000000000,
68 2.250000000000000,
69 3.375000000000000,
70 5.062500000000000,
71 7.593750000000000};
72static double *wghts=_wghts+5; /* wghts[i] = 1.5**i */
73
74static double _nwghts[11]=
75{
76 -0.065843621399177,
77 -0.098765432098766,
78 -0.148148148148148,
79 -0.222222222222222,
80 -0.333333333333334,
81 -0.500000000000000,
82 -0.750000000000000,
83 -1.125000000000000,
84 -1.687500000000000,
85 -2.531250000000000,
86 -3.796875000000000};
87static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */
88
89#define FTB_FLAG_TRUNC 1
90/* At most one of the following flags can be set */
91#define FTB_FLAG_YES 2
92#define FTB_FLAG_NO 4
93#define FTB_FLAG_WONLY 8
94
95typedef struct st_ftb_expr FTB_EXPR;
96struct st_ftb_expr
97{
98 FTB_EXPR *up;
99 uint flags;
100/* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */
101 my_off_t docid[2];
102 my_off_t max_docid;
103 float weight;
104 float cur_weight;
105 LIST *phrase; /* phrase words */
106 LIST *document; /* for phrase search */
107 uint yesses; /* number of "yes" words matched */
108 uint nos; /* number of "no" words matched */
109 uint ythresh; /* number of "yes" words in expr */
110 uint yweaks; /* number of "yes" words for scan only */
111};
112
113typedef struct st_ftb_word
114{
115 FTB_EXPR *up;
116 uint flags;
117/* ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section */
118 my_off_t docid[2]; /* for index search and for scan */
119 my_off_t key_root;
120 FTB_EXPR *max_docid_expr;
121 MARIA_KEYDEF *keyinfo;
122 struct st_ftb_word *prev;
123 float weight;
124 uint ndepth;
125 uint len;
126 uchar off;
127 uchar word[1];
128} FTB_WORD;
129
130typedef struct st_ft_info
131{
132 struct _ft_vft *please;
133 MARIA_HA *info;
134 CHARSET_INFO *charset;
135 FTB_EXPR *root;
136 FTB_WORD **list;
137 FTB_WORD *last_word;
138 MEM_ROOT mem_root;
139 QUEUE queue;
140 TREE no_dupes;
141 my_off_t lastpos;
142 uint keynr;
143 uchar with_scan;
144 enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE } state;
145} FTB;
146
147static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b)
148{
149 int i;
150
151 /* if a==curdoc, take it as a < b */
152 if (v && a->docid[0] == *v)
153 return -1;
154
155 /* ORDER BY docid, ndepth DESC */
156 i=CMP_NUM(a->docid[0], b->docid[0]);
157 if (!i)
158 i=CMP_NUM(b->ndepth,a->ndepth);
159 return i;
160}
161
162static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
163{
164 /* ORDER BY word, ndepth */
165 int i= ha_compare_text(cs, (uchar*) (*a)->word + 1,(*a)->len - 1,
166 (uchar*) (*b)->word + 1,(*b)->len - 1, 0);
167 if (!i)
168 i=CMP_NUM((*a)->ndepth, (*b)->ndepth);
169 return i;
170}
171
172
173typedef struct st_my_ftb_param
174{
175 FTB *ftb;
176 FTB_EXPR *ftbe;
177 uchar *up_quot;
178 uint depth;
179} MY_FTB_PARAM;
180
181
182static int ftb_query_add_word(MYSQL_FTPARSER_PARAM *param,
183 const char *word, int word_len,
184 MYSQL_FTPARSER_BOOLEAN_INFO *info)
185{
186 MY_FTB_PARAM *ftb_param= param->mysql_ftparam;
187 FTB_WORD *ftbw;
188 FTB_EXPR *ftbe, *tmp_expr;
189 FT_WORD *phrase_word;
190 LIST *tmp_element;
191 int r= info->weight_adjust;
192 float weight= (float)
193 (info->wasign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)];
194
195 switch (info->type) {
196 case FT_TOKEN_WORD:
197 ftbw= (FTB_WORD *)alloc_root(&ftb_param->ftb->mem_root,
198 sizeof(FTB_WORD) +
199 (info->trunc ? MARIA_MAX_KEY_BUFF :
200 word_len * ftb_param->ftb->charset->mbmaxlen +
201 HA_FT_WLEN +
202 ftb_param->ftb->info->s->rec_reflength));
203 ftbw->len= word_len + 1;
204 ftbw->flags= 0;
205 ftbw->off= 0;
206 if (info->yesno > 0) ftbw->flags|= FTB_FLAG_YES;
207 if (info->yesno < 0) ftbw->flags|= FTB_FLAG_NO;
208 if (info->trunc) ftbw->flags|= FTB_FLAG_TRUNC;
209 ftbw->weight= weight;
210 ftbw->up= ftb_param->ftbe;
211 ftbw->docid[0]= ftbw->docid[1]= HA_OFFSET_ERROR;
212 ftbw->ndepth= (info->yesno < 0) + ftb_param->depth;
213 ftbw->key_root= HA_OFFSET_ERROR;
214 memcpy(ftbw->word + 1, word, word_len);
215 ftbw->word[0]= word_len;
216 if (info->yesno > 0) ftbw->up->ythresh++;
217 ftb_param->ftb->queue.max_elements++;
218 ftbw->prev= ftb_param->ftb->last_word;
219 ftb_param->ftb->last_word= ftbw;
220 ftb_param->ftb->with_scan|= (info->trunc & FTB_FLAG_TRUNC);
221 for (tmp_expr= ftb_param->ftbe; tmp_expr->up; tmp_expr= tmp_expr->up)
222 if (! (tmp_expr->flags & FTB_FLAG_YES))
223 break;
224 ftbw->max_docid_expr= tmp_expr;
225 /* fall through */
226 case FT_TOKEN_STOPWORD:
227 if (! ftb_param->up_quot) break;
228 phrase_word= (FT_WORD *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD));
229 tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST));
230 phrase_word->pos= (uchar*)word;
231 phrase_word->len= word_len;
232 tmp_element->data= (void *)phrase_word;
233 ftb_param->ftbe->phrase= list_add(ftb_param->ftbe->phrase, tmp_element);
234 /* Allocate document list at this point.
235 It allows to avoid huge amount of allocs/frees for each row.*/
236 tmp_element= (LIST *)alloc_root(&ftb_param->ftb->mem_root, sizeof(LIST));
237 tmp_element->data= alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD));
238 ftb_param->ftbe->document=
239 list_add(ftb_param->ftbe->document, tmp_element);
240 break;
241 case FT_TOKEN_LEFT_PAREN:
242 ftbe=(FTB_EXPR *)alloc_root(&ftb_param->ftb->mem_root, sizeof(FTB_EXPR));
243 ftbe->flags= 0;
244 if (info->yesno > 0) ftbe->flags|= FTB_FLAG_YES;
245 if (info->yesno < 0) ftbe->flags|= FTB_FLAG_NO;
246 ftbe->weight= weight;
247 ftbe->up= ftb_param->ftbe;
248 ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0;
249 ftbe->docid[0]= ftbe->docid[1]= HA_OFFSET_ERROR;
250 ftbe->phrase= NULL;
251 ftbe->document= 0;
252 if (info->quot) ftb_param->ftb->with_scan|= 2;
253 if (info->yesno > 0) ftbe->up->ythresh++;
254 ftb_param->ftbe= ftbe;
255 ftb_param->depth++;
256 ftb_param->up_quot= (uchar*)info->quot;
257 break;
258 case FT_TOKEN_RIGHT_PAREN:
259 if (ftb_param->ftbe->document)
260 {
261 /* Circuit document list */
262 for (tmp_element= ftb_param->ftbe->document;
263 tmp_element->next; tmp_element= tmp_element->next) /* no-op */;
264 tmp_element->next= ftb_param->ftbe->document;
265 ftb_param->ftbe->document->prev= tmp_element;
266 }
267 info->quot= 0;
268 if (ftb_param->ftbe->up)
269 {
270 DBUG_ASSERT(ftb_param->depth);
271 ftb_param->ftbe= ftb_param->ftbe->up;
272 ftb_param->depth--;
273 ftb_param->up_quot= 0;
274 }
275 break;
276 case FT_TOKEN_EOF:
277 default:
278 break;
279 }
280 return(0);
281}
282
283
284static int ftb_parse_query_internal(MYSQL_FTPARSER_PARAM *param,
285 const char *query, int len)
286{
287 MY_FTB_PARAM *ftb_param= param->mysql_ftparam;
288 MYSQL_FTPARSER_BOOLEAN_INFO info;
289 CHARSET_INFO *cs= ftb_param->ftb->charset;
290 const uchar **start= (const uchar**) &query;
291 uchar *end= (uchar*) query + len;
292 FT_WORD w;
293
294 info.prev= ' ';
295 info.quot= 0;
296 while (maria_ft_get_word(cs, start, end, &w, &info))
297 param->mysql_add_word(param, (char*)w.pos, w.len, &info);
298 return(0);
299}
300
301
302static int _ftb_parse_query(FTB *ftb, uchar *query, uint len,
303 struct st_mysql_ftparser *parser)
304{
305 MYSQL_FTPARSER_PARAM *param;
306 MY_FTB_PARAM ftb_param;
307 DBUG_ENTER("_ftb_parse_query");
308 DBUG_ASSERT(parser);
309
310 if (ftb->state != UNINITIALIZED)
311 DBUG_RETURN(0);
312 if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 0)))
313 DBUG_RETURN(1);
314
315 ftb_param.ftb= ftb;
316 ftb_param.depth= 0;
317 ftb_param.ftbe= ftb->root;
318 ftb_param.up_quot= 0;
319
320 param->mysql_parse= ftb_parse_query_internal;
321 param->mysql_add_word= ftb_query_add_word;
322 param->mysql_ftparam= (void *)&ftb_param;
323 param->cs= ftb->charset;
324 param->doc= (char*)query;
325 param->length= len;
326 param->flags= 0;
327 param->mode= MYSQL_FTPARSER_FULL_BOOLEAN_INFO;
328 DBUG_RETURN(parser->parse(param));
329}
330
331
332static int _ftb_no_dupes_cmp(void* not_used __attribute__((unused)),
333 const void *a,const void *b)
334{
335 return CMP_NUM((*((my_off_t*)a)), (*((my_off_t*)b)));
336}
337
338
339/* returns 1 if the search was finished (must-word wasn't found) */
340
341static int _ft2_search_no_lock(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
342{
343 int r;
344 int subkeys=1;
345 my_bool can_go_down;
346 MARIA_HA *info=ftb->info;
347 uint UNINIT_VAR(off), extra=HA_FT_WLEN+info->s->base.rec_reflength;
348 uchar *lastkey_buf= ftbw->word+ftbw->off;
349 MARIA_KEY key;
350
351 if (ftbw->flags & FTB_FLAG_TRUNC)
352 lastkey_buf+=ftbw->len;
353
354 if (init_search)
355 {
356 ftbw->key_root=info->s->state.key_root[ftb->keynr];
357 ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
358 info->last_key.keyinfo= key.keyinfo= ftbw->keyinfo;
359 info->lastinx= ~0; /* Safety */
360 key.data= ftbw->word;
361 key.data_length= ftbw->len;
362 key.ref_length= 0;
363 key.flag= 0;
364
365 r= _ma_search(info, &key, SEARCH_FIND | SEARCH_BIGGER, ftbw->key_root);
366 }
367 else
368 {
369 uint sflag= SEARCH_BIGGER;
370 my_off_t max_docid=0;
371 FTB_EXPR *tmp;
372
373 for (tmp= ftbw->max_docid_expr; tmp; tmp= tmp->up)
374 set_if_bigger(max_docid, tmp->max_docid);
375
376 if (ftbw->docid[0] < max_docid)
377 {
378 sflag|= SEARCH_SAME;
379 _ma_dpointer(info->s, (uchar*) (ftbw->word + ftbw->len + HA_FT_WLEN),
380 max_docid);
381 }
382
383 info->last_key.keyinfo= key.keyinfo= ftbw->keyinfo;
384 info->lastinx= ~0; /* Safety */
385 key.data= lastkey_buf;
386 key.data_length= USE_WHOLE_KEY;
387 key.ref_length= 0;
388 key.flag= 0;
389
390 r= _ma_search(info, &key, sflag, ftbw->key_root);
391 }
392
393 can_go_down=(!ftbw->off && (init_search || (ftbw->flags & FTB_FLAG_TRUNC)));
394 /* Skip rows inserted by concurrent insert */
395 while (!r)
396 {
397 if (can_go_down)
398 {
399 /* going down ? */
400 off= info->last_key.data_length + info->last_key.ref_length - extra;
401 subkeys=ft_sintXkorr(info->last_key.data + off);
402 }
403 if (subkeys<0 || info->cur_row.lastpos < info->state->data_file_length)
404 break;
405 r= _ma_search_next(info, &info->last_key, SEARCH_BIGGER, ftbw->key_root);
406 }
407
408 if (!r && !ftbw->off)
409 {
410 r= ha_compare_text(ftb->charset,
411 info->last_key.data+1,
412 info->last_key.data_length + info->last_key.ref_length-
413 extra-1,
414 (uchar*) ftbw->word+1,
415 ftbw->len-1,
416 (my_bool) (ftbw->flags & FTB_FLAG_TRUNC));
417 }
418
419 if (r) /* not found */
420 {
421 if (!ftbw->off || !(ftbw->flags & FTB_FLAG_TRUNC))
422 {
423 ftbw->docid[0]=HA_OFFSET_ERROR;
424 if ((ftbw->flags & FTB_FLAG_YES) && ftbw->up->up==0)
425 {
426 /*
427 This word MUST BE present in every document returned,
428 so we can stop the search right now
429 */
430 ftb->state=INDEX_DONE;
431 return 1; /* search is done */
432 }
433 else
434 return 0;
435 }
436
437 /* going up to the first-level tree to continue search there */
438 _ma_dpointer(info->s, (lastkey_buf+HA_FT_WLEN), ftbw->key_root);
439 ftbw->key_root=info->s->state.key_root[ftb->keynr];
440 ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
441 ftbw->off=0;
442 return _ft2_search_no_lock(ftb, ftbw, 0);
443 }
444
445 /* matching key found */
446 memcpy(lastkey_buf, info->last_key.data,
447 info->last_key.data_length + info->last_key.ref_length);
448 if (lastkey_buf == ftbw->word)
449 ftbw->len= info->last_key.data_length + info->last_key.ref_length - extra;
450
451 /* going down ? */
452 if (subkeys<0)
453 {
454 /*
455 yep, going down, to the second-level tree
456 TODO here: subkey-based optimization
457 */
458 ftbw->off=off;
459 ftbw->key_root= info->cur_row.lastpos;
460 ftbw->keyinfo=& info->s->ft2_keyinfo;
461 r= _ma_search_first(info, ftbw->keyinfo, ftbw->key_root);
462 DBUG_ASSERT(r==0); /* found something */
463 memcpy(lastkey_buf+off, info->last_key.data,
464 info->last_key.data_length + info->last_key.ref_length);
465 }
466 ftbw->docid[0]= info->cur_row.lastpos;
467 if (ftbw->flags & FTB_FLAG_YES && !(ftbw->flags & FTB_FLAG_TRUNC))
468 ftbw->max_docid_expr->max_docid= info->cur_row.lastpos;
469 return 0;
470}
471
472static int _ft2_search(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
473{
474 int r;
475 MARIA_SHARE *share= ftb->info->s;
476 if (share->lock_key_trees)
477 mysql_rwlock_rdlock(&share->keyinfo[ftb->keynr].root_lock);
478 r= _ft2_search_no_lock(ftb, ftbw, init_search);
479 if (share->lock_key_trees)
480 mysql_rwlock_unlock(&share->keyinfo[ftb->keynr].root_lock);
481 return r;
482}
483
484
485static void _ftb_init_index_search(FT_INFO *ftb)
486{
487 int i;
488 FTB_WORD *ftbw;
489
490 if (ftb->state == UNINITIALIZED || ftb->keynr == NO_SUCH_KEY)
491 return;
492 ftb->state=INDEX_SEARCH;
493
494 for (i= queue_last_element(&ftb->queue);
495 (int) i >= (int) queue_first_element(&ftb->queue);
496 i--)
497 {
498 ftbw=(FTB_WORD *)(queue_element(&ftb->queue, i));
499
500 if (ftbw->flags & FTB_FLAG_TRUNC)
501 {
502 /*
503 special treatment for truncation operator
504 1. there are some (besides this) +words
505 | no need to search in the index, it can never ADD new rows
506 | to the result, and to remove half-matched rows we do scan anyway
507 2. -trunc*
508 | same as 1.
509 3. in 1 and 2, +/- need not be on the same expr. level,
510 but can be on any upper level, as in +word +(trunc1* trunc2*)
511 4. otherwise
512 | We have to index-search for this prefix.
513 | It may cause duplicates, as in the index (sorted by <word,docid>)
514 | <aaaa,row1>
515 | <aabb,row2>
516 | <aacc,row1>
517 | Searching for "aa*" will find row1 twice...
518 */
519 FTB_EXPR *ftbe;
520 for (ftbe=(FTB_EXPR*)ftbw;
521 ftbe->up && !(ftbe->up->flags & FTB_FLAG_TRUNC);
522 ftbe->up->flags|= FTB_FLAG_TRUNC, ftbe=ftbe->up)
523 {
524 if (ftbe->flags & FTB_FLAG_NO || /* 2 */
525 ftbe->up->ythresh - ftbe->up->yweaks >
526 (uint) MY_TEST(ftbe->flags & FTB_FLAG_YES)) /* 1 */
527 {
528 FTB_EXPR *top_ftbe=ftbe->up;
529 ftbw->docid[0]=HA_OFFSET_ERROR;
530 for (ftbe=(FTB_EXPR *)ftbw;
531 ftbe != top_ftbe && !(ftbe->flags & FTB_FLAG_NO);
532 ftbe=ftbe->up)
533 ftbe->up->yweaks++;
534 ftbe=0;
535 break;
536 }
537 }
538 if (!ftbe)
539 continue;
540 /* 4 */
541 if (!is_tree_inited(& ftb->no_dupes))
542 init_tree(& ftb->no_dupes,0,0,sizeof(my_off_t),
543 _ftb_no_dupes_cmp,0,0,0);
544 else
545 reset_tree(& ftb->no_dupes);
546 }
547
548 ftbw->off=0; /* in case of reinit */
549 if (_ft2_search(ftb, ftbw, 1))
550 return;
551 }
552 queue_fix(& ftb->queue);
553}
554
555
556FT_INFO * maria_ft_init_boolean_search(MARIA_HA *info, uint keynr,
557 uchar *query, uint query_len,
558 CHARSET_INFO *cs)
559{
560 FTB *ftb;
561 FTB_EXPR *ftbe;
562 FTB_WORD *ftbw;
563
564 if (!(ftb=(FTB *)my_malloc(sizeof(FTB), MYF(MY_WME))))
565 return 0;
566 ftb->please= (struct _ft_vft *) & _ma_ft_vft_boolean;
567 ftb->state=UNINITIALIZED;
568 ftb->info=info;
569 ftb->keynr=keynr;
570 ftb->charset=cs;
571 DBUG_ASSERT(keynr==NO_SUCH_KEY || cs == info->s->keyinfo[keynr].seg->charset);
572 ftb->with_scan=0;
573 ftb->lastpos=HA_OFFSET_ERROR;
574 bzero(& ftb->no_dupes, sizeof(TREE));
575 ftb->last_word= 0;
576
577 init_alloc_root(&ftb->mem_root, "fulltext", 1024, 1024, 0);
578 ftb->queue.max_elements= 0;
579 if (!(ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR))))
580 goto err;
581 ftbe->weight=1;
582 ftbe->flags=FTB_FLAG_YES;
583 ftbe->nos=1;
584 ftbe->up=0;
585 ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= 0;
586 ftbe->docid[0]=ftbe->docid[1]=HA_OFFSET_ERROR;
587 ftbe->phrase= NULL;
588 ftbe->document= 0;
589 ftb->root=ftbe;
590 if (unlikely(_ftb_parse_query(ftb, query, query_len,
591 keynr == NO_SUCH_KEY ? &ft_default_parser :
592 info->s->keyinfo[keynr].parser)))
593 goto err;
594 /*
595 Hack: instead of init_queue, we'll use reinit queue to be able
596 to alloc queue with alloc_root()
597 */
598 if (! (ftb->queue.root= (uchar **)alloc_root(&ftb->mem_root,
599 (ftb->queue.max_elements + 1) *
600 sizeof(void *))))
601 goto err;
602 reinit_queue(&ftb->queue, ftb->queue.max_elements, 0, 0,
603 (int (*)(void*, uchar*, uchar*))FTB_WORD_cmp, 0, 0, 0);
604 for (ftbw= ftb->last_word; ftbw; ftbw= ftbw->prev)
605 queue_insert(&ftb->queue, (uchar *)ftbw);
606 ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root,
607 sizeof(FTB_WORD *)*ftb->queue.elements);
608 memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements);
609 my_qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *),
610 (qsort2_cmp)FTB_WORD_cmp_list, (void*) ftb->charset);
611 if (ftb->queue.elements<2) ftb->with_scan &= ~FTB_FLAG_TRUNC;
612 ftb->state=READY;
613 return ftb;
614err:
615 free_root(& ftb->mem_root, MYF(0));
616 my_free(ftb);
617 return 0;
618}
619
620
621typedef struct st_my_ftb_phrase_param
622{
623 LIST *phrase;
624 LIST *document;
625 CHARSET_INFO *cs;
626 uint phrase_length;
627 uint document_length;
628 uint match;
629} MY_FTB_PHRASE_PARAM;
630
631
632static int ftb_phrase_add_word(MYSQL_FTPARSER_PARAM *param,
633 const char *word, int word_len,
634 MYSQL_FTPARSER_BOOLEAN_INFO
635 *boolean_info __attribute__((unused)))
636{
637 MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam;
638 FT_WORD *w= (FT_WORD *)phrase_param->document->data;
639 LIST *phrase, *document;
640 w->pos= (uchar*)word;
641 w->len= word_len;
642 phrase_param->document= phrase_param->document->prev;
643 if (phrase_param->phrase_length > phrase_param->document_length)
644 {
645 phrase_param->document_length++;
646 return 0;
647 }
648 /* TODO: rewrite phrase search to avoid
649 comparing the same word twice. */
650 for (phrase= phrase_param->phrase, document= phrase_param->document->next;
651 phrase; phrase= phrase->next, document= document->next)
652 {
653 FT_WORD *phrase_word= (FT_WORD *)phrase->data;
654 FT_WORD *document_word= (FT_WORD *)document->data;
655 if (my_strnncoll(phrase_param->cs, (uchar*) phrase_word->pos,
656 phrase_word->len,
657 (uchar*) document_word->pos, document_word->len))
658 return 0;
659 }
660 phrase_param->match++;
661 return 0;
662}
663
664
665static int ftb_check_phrase_internal(MYSQL_FTPARSER_PARAM *param,
666 const char *document, int len)
667{
668 FT_WORD word;
669 MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam;
670 const uchar *docend= (uchar*)document + len;
671 while (maria_ft_simple_get_word(phrase_param->cs, (uchar**)&document,
672 docend, &word, FALSE))
673 {
674 param->mysql_add_word(param, (char*)word.pos, word.len, 0);
675 if (phrase_param->match)
676 break;
677 }
678 return 0;
679}
680
681
682/*
683 Checks if given buffer matches phrase list.
684
685 SYNOPSIS
686 _ftb_check_phrase()
687 s0 start of buffer
688 e0 end of buffer
689 phrase broken into list phrase
690 cs charset info
691
692 RETURN VALUE
693 1 is returned if phrase found, 0 else.
694 -1 is returned if error occurs.
695*/
696
697static int _ftb_check_phrase(FTB *ftb, const uchar *document, uint len,
698 FTB_EXPR *ftbe, struct st_mysql_ftparser *parser)
699{
700 MY_FTB_PHRASE_PARAM ftb_param;
701 MYSQL_FTPARSER_PARAM *param;
702 DBUG_ENTER("_ftb_check_phrase");
703 DBUG_ASSERT(parser);
704
705 if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 1)))
706 DBUG_RETURN(0);
707 ftb_param.phrase= ftbe->phrase;
708 ftb_param.document= ftbe->document;
709 ftb_param.cs= ftb->charset;
710 ftb_param.phrase_length= list_length(ftbe->phrase);
711 ftb_param.document_length= 1;
712 ftb_param.match= 0;
713
714 param->mysql_parse= ftb_check_phrase_internal;
715 param->mysql_add_word= ftb_phrase_add_word;
716 param->mysql_ftparam= (void *)&ftb_param;
717 param->cs= ftb->charset;
718 param->doc= (char *)document;
719 param->length= len;
720 param->flags= 0;
721 param->mode= MYSQL_FTPARSER_WITH_STOPWORDS;
722 if (unlikely(parser->parse(param)))
723 return -1;
724 DBUG_RETURN(ftb_param.match ? 1 : 0);
725}
726
727
728static int _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig)
729{
730 FT_SEG_ITERATOR ftsi;
731 FTB_EXPR *ftbe;
732 float weight=ftbw->weight;
733 int yn_flag= ftbw->flags, ythresh, mode=(ftsi_orig != 0);
734 my_off_t curdoc=ftbw->docid[mode];
735 struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ?
736 &ft_default_parser :
737 ftb->info->s->keyinfo[ftb->keynr].parser;
738
739 for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up)
740 {
741 ythresh = ftbe->ythresh - (mode ? 0 : ftbe->yweaks);
742 if (ftbe->docid[mode] != curdoc)
743 {
744 ftbe->cur_weight=0;
745 ftbe->yesses=ftbe->nos=0;
746 ftbe->docid[mode]=curdoc;
747 }
748 if (ftbe->nos)
749 break;
750 if (yn_flag & FTB_FLAG_YES)
751 {
752 weight /= ftbe->ythresh;
753 ftbe->cur_weight += weight;
754 if ((int) ++ftbe->yesses == ythresh)
755 {
756 yn_flag=ftbe->flags;
757 weight=ftbe->cur_weight*ftbe->weight;
758 if (mode && ftbe->phrase)
759 {
760 int found= 0;
761
762 memcpy(&ftsi, ftsi_orig, sizeof(ftsi));
763 while (_ma_ft_segiterator(&ftsi) && !found)
764 {
765 if (!ftsi.pos)
766 continue;
767 found= _ftb_check_phrase(ftb, ftsi.pos, ftsi.len, ftbe, parser);
768 if (unlikely(found < 0))
769 return 1;
770 }
771 if (!found)
772 break;
773 } /* ftbe->quot */
774 }
775 else
776 break;
777 }
778 else
779 if (yn_flag & FTB_FLAG_NO)
780 {
781 /*
782 NOTE: special sort function of queue assures that all
783 (yn_flag & FTB_FLAG_NO) != 0
784 events for every particular subexpression will
785 "auto-magically" happen BEFORE all the
786 (yn_flag & FTB_FLAG_YES) != 0 events. So no
787 already matched expression can become not-matched again.
788 */
789 ++ftbe->nos;
790 break;
791 }
792 else
793 {
794 if (ftbe->ythresh)
795 weight/=3;
796 ftbe->cur_weight += weight;
797 if ((int) ftbe->yesses < ythresh)
798 break;
799 if (!(yn_flag & FTB_FLAG_WONLY))
800 yn_flag= ((int) ftbe->yesses++ == ythresh) ? ftbe->flags : FTB_FLAG_WONLY ;
801 weight*= ftbe->weight;
802 }
803 }
804 return 0;
805}
806
807
808int maria_ft_boolean_read_next(FT_INFO *ftb, char *record)
809{
810 FTB_EXPR *ftbe;
811 FTB_WORD *ftbw;
812 MARIA_HA *info=ftb->info;
813 my_off_t curdoc;
814
815 if (ftb->state != INDEX_SEARCH && ftb->state != INDEX_DONE)
816 return -1;
817
818 /* black magic ON */
819 if ((int) _ma_check_index(info, ftb->keynr) < 0)
820 return my_errno;
821 if (_ma_readinfo(info, F_RDLCK, 1))
822 return my_errno;
823 /* black magic OFF */
824
825 if (!ftb->queue.elements)
826 return my_errno=HA_ERR_END_OF_FILE;
827
828 /* Attention!!! Address of a local variable is used here! See err: label */
829 ftb->queue.first_cmp_arg=(void *)&curdoc;
830
831 while (ftb->state == INDEX_SEARCH &&
832 (curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid[0]) !=
833 HA_OFFSET_ERROR)
834 {
835 while (curdoc == (ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0])
836 {
837 if (unlikely(_ftb_climb_the_tree(ftb, ftbw, 0)))
838 {
839 my_errno= HA_ERR_OUT_OF_MEM;
840 goto err;
841 }
842
843 /* update queue */
844 _ft2_search(ftb, ftbw, 0);
845 queue_replace_top(&ftb->queue);
846 }
847
848 ftbe=ftb->root;
849 if (ftbe->docid[0]==curdoc && ftbe->cur_weight>0 &&
850 ftbe->yesses>=(ftbe->ythresh-ftbe->yweaks) && !ftbe->nos)
851 {
852 /* curdoc matched ! */
853 if (is_tree_inited(&ftb->no_dupes) &&
854 tree_insert(&ftb->no_dupes, &curdoc, 0,
855 ftb->no_dupes.custom_arg)->count >1)
856 /* but it managed already to get past this line once */
857 continue;
858
859 info->cur_row.lastpos= curdoc;
860 /* Clear all states, except that the table was updated */
861 info->update&= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
862
863 if (!(*info->read_record)(info, (uchar *) record, curdoc))
864 {
865 info->update|= HA_STATE_AKTIV; /* Record is read */
866 if (ftb->with_scan &&
867 maria_ft_boolean_find_relevance(ftb, (uchar*)record, 0)==0)
868 continue; /* no match */
869 my_errno=0;
870 goto err;
871 }
872 goto err;
873 }
874 }
875 ftb->state=INDEX_DONE;
876 my_errno=HA_ERR_END_OF_FILE;
877err:
878 ftb->queue.first_cmp_arg=(void *)0;
879 return my_errno;
880}
881
882
883typedef struct st_my_ftb_find_param
884{
885 FT_INFO *ftb;
886 FT_SEG_ITERATOR *ftsi;
887} MY_FTB_FIND_PARAM;
888
889
890static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
891 const char *word, int len,
892 MYSQL_FTPARSER_BOOLEAN_INFO
893 *boolean_info __attribute__((unused)))
894{
895 MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam;
896 FT_INFO *ftb= ftb_param->ftb;
897 FTB_WORD *ftbw;
898 int a, b, c;
899 /*
900 Find right-most element in the array of query words matching this
901 word from a document.
902 */
903 for (a= 0, b= ftb->queue.elements, c= (a+b)/2; b-a>1; c= (a+b)/2)
904 {
905 ftbw= ftb->list[c];
906 if (ha_compare_text(ftb->charset, (uchar*)word, len,
907 (uchar*)ftbw->word+1, ftbw->len-1,
908 (my_bool)(ftbw->flags&FTB_FLAG_TRUNC)) < 0)
909 b= c;
910 else
911 a= c;
912 }
913 /*
914 If there were no words with truncation operator, we iterate to the
915 beginning of an array until array element is equal to the word from
916 a document. This is done mainly because the same word may be
917 mentioned twice (or more) in the query.
918
919 In case query has words with truncation operator we must iterate
920 to the beginning of the array. There may be non-matching query words
921 between matching word with truncation operator and the right-most
922 matching element. E.g., if we're looking for 'aaa15' in an array of
923 'aaa1* aaa14 aaa15 aaa16'.
924
925 Worse of that there still may be match even if the binary search
926 above didn't find matching element. E.g., if we're looking for
927 'aaa15' in an array of 'aaa1* aaa14 aaa16'. The binary search will
928 stop at 'aaa16'.
929 */
930 for (; c >= 0; c--)
931 {
932 ftbw= ftb->list[c];
933 if (ha_compare_text(ftb->charset, (uchar*)word, len,
934 (uchar*)ftbw->word + 1,ftbw->len - 1,
935 (my_bool)(ftbw->flags & FTB_FLAG_TRUNC)))
936 {
937 if (ftb->with_scan & FTB_FLAG_TRUNC)
938 continue;
939 else
940 break;
941 }
942 if (ftbw->docid[1] == ftb->info->cur_row.lastpos)
943 continue;
944 ftbw->docid[1]= ftb->info->cur_row.lastpos;
945 if (unlikely(_ftb_climb_the_tree(ftb, ftbw, ftb_param->ftsi)))
946 return 1;
947 }
948 return(0);
949}
950
951
952static int ftb_find_relevance_parse(MYSQL_FTPARSER_PARAM *param,
953 const char *doc, int len)
954{
955 MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam;
956 FT_INFO *ftb= ftb_param->ftb;
957 uchar *end= (uchar*) doc + len;
958 FT_WORD w;
959 while (maria_ft_simple_get_word(ftb->charset, (uchar**)&doc, end, &w, TRUE))
960 param->mysql_add_word(param, (char*)w.pos, w.len, 0);
961 return(0);
962}
963
964
965float maria_ft_boolean_find_relevance(FT_INFO *ftb, uchar *record, uint length)
966{
967 FTB_EXPR *ftbe;
968 FT_SEG_ITERATOR ftsi, ftsi2;
969 MARIA_RECORD_POS docid= ftb->info->cur_row.lastpos;
970 MY_FTB_FIND_PARAM ftb_param;
971 MYSQL_FTPARSER_PARAM *param;
972 struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ?
973 &ft_default_parser :
974 ftb->info->s->keyinfo[ftb->keynr].parser;
975
976 if (docid == HA_OFFSET_ERROR)
977 return -2.0;
978 if (!ftb->queue.elements)
979 return 0;
980 if (! (param= maria_ftparser_call_initializer(ftb->info, ftb->keynr, 0)))
981 return 0;
982
983 if (ftb->state != INDEX_SEARCH && docid <= ftb->lastpos)
984 {
985 FTB_EXPR *x;
986 uint i;
987
988 for (i=0; i < ftb->queue.elements; i++)
989 {
990 ftb->list[i]->docid[1]=HA_OFFSET_ERROR;
991 for (x=ftb->list[i]->up; x; x=x->up)
992 x->docid[1]=HA_OFFSET_ERROR;
993 }
994 }
995
996 ftb->lastpos=docid;
997
998 if (ftb->keynr==NO_SUCH_KEY)
999 _ma_ft_segiterator_dummy_init(record, length, &ftsi);
1000 else
1001 _ma_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi);
1002 memcpy(&ftsi2, &ftsi, sizeof(ftsi));
1003
1004 ftb_param.ftb= ftb;
1005 ftb_param.ftsi= &ftsi2;
1006 param->mysql_parse= ftb_find_relevance_parse;
1007 param->mysql_add_word= ftb_find_relevance_add_word;
1008 param->mysql_ftparam= (void *)&ftb_param;
1009 param->flags= 0;
1010 param->cs= ftb->charset;
1011 param->mode= MYSQL_FTPARSER_SIMPLE_MODE;
1012
1013 while (_ma_ft_segiterator(&ftsi))
1014 {
1015 if (!ftsi.pos)
1016 continue;
1017 param->doc= (char *)ftsi.pos;
1018 param->length= ftsi.len;
1019 if (unlikely(parser->parse(param)))
1020 return 0;
1021 }
1022 ftbe=ftb->root;
1023 if (ftbe->docid[1]==docid && ftbe->cur_weight>0 &&
1024 ftbe->yesses>=ftbe->ythresh && !ftbe->nos)
1025 { /* row matched ! */
1026 return ftbe->cur_weight;
1027 }
1028 else
1029 { /* match failed ! */
1030 return 0.0;
1031 }
1032}
1033
1034
1035void maria_ft_boolean_close_search(FT_INFO *ftb)
1036{
1037 if (is_tree_inited(& ftb->no_dupes))
1038 {
1039 delete_tree(&ftb->no_dupes, 0);
1040 }
1041 free_root(& ftb->mem_root, MYF(0));
1042 my_free(ftb);
1043}
1044
1045
1046float maria_ft_boolean_get_relevance(FT_INFO *ftb)
1047{
1048 return ftb->root->cur_weight;
1049}
1050
1051
1052void maria_ft_boolean_reinit_search(FT_INFO *ftb)
1053{
1054 _ftb_init_index_search(ftb);
1055}
1056