ft_boolean_search.c source code [MariaDB/storage/myisam/ft_boolean_search.c]

1	/ Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.*
2
3	This program is free software; you can redistribute it and/or modify
4	it under the terms of the GNU General Public License as published by
5	the Free Software Foundation; version 2 of the License.
6
7	This program is distributed in the hope that it will be useful,
8	but WITHOUT ANY WARRANTY; without even the implied warranty of
9	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10	GNU General Public License for more details.
11
12	You should have received a copy of the GNU General Public License
13	along with this program; if not, write to the Free Software
14	Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA /*
15
16	/ Written by Sergei A. Golubchik, who has a shared copyright to this code /
17
18	/ TODO: add caching - pre-read several index entries at once /
19
20	/*
21	Added optimization for full-text queries with plus-words. It was
22	implemented by sharing maximal document id (max_docid) variable
23	inside plus subtree. max_docid could be used by any word in plus
24	subtree, but it could be updated by plus-word only.
25
26	Fulltext "smarter index merge" optimization assumes that rows
27	it gets are ordered by doc_id. That is not the case when we
28	search for a word with truncation operator. It may return
29	rows in random order. Thus we may not use "smarter index merge"
30	optimization with "trunc-words".
31
32	The idea is: there is no need to search for docid smaller than
33	biggest docid inside current plus subtree or any upper plus subtree.
34
35	Examples:
36	+word1 word2
37	share same max_docid
38	max_docid updated by word1
39	+word1 +(word2 word3)
40	share same max_docid
41	max_docid updated by word1
42	+(word1 -word2) +(+word3 word4)
43	share same max_docid
44	max_docid updated by word3
45	+word1 word2 (+word3 word4 (+word5 word6))
46	three subexpressions (including the top-level one),
47	every one has its own max_docid, updated by its plus word.
48	but for the search word6 uses
49	max(word1.max_docid, word3.max_docid, word5.max_docid),
50	while word4 uses, accordingly,
51	max(word1.max_docid, word3.max_docid).
52	*/
53
54	#define FT_CORE
55	#include "ftdefs.h"
56
57	/ search with boolean queries /
58
59	static double _wghts[`11`]=
60	{
61	`0.131687242798354`,
62	`0.197530864197531`,
63	`0.296296296296296`,
64	`0.444444444444444`,
65	`0.666666666666667`,
66	`1.000000000000000`,
67	`1.500000000000000`,
68	`2.250000000000000`,
69	`3.375000000000000`,
70	`5.062500000000000`,
71	`7.593750000000000`};
72	static double wghts=_wghts+`5`; /* wghts[i] = 1.5*i /*
73
74	static double _nwghts[`11`]=
75	{
76	-`0.065843621399177`,
77	-`0.098765432098766`,
78	-`0.148148148148148`,
79	-`0.222222222222222`,
80	-`0.333333333333334`,
81	-`0.500000000000000`,
82	-`0.750000000000000`,
83	-`1.125000000000000`,
84	-`1.687500000000000`,
85	-`2.531250000000000`,
86	-`3.796875000000000`};
87	static double nwghts=_nwghts+`5`; /* nwghts[i] = -0.51.5i /*
88
89	#define FTB_FLAG_TRUNC 1
90	/ At most one of the following flags can be set /
91	#define FTB_FLAG_YES 2
92	#define FTB_FLAG_NO 4
93	#define FTB_FLAG_WONLY 8
94
95	typedef struct st_ftb_expr FTB_EXPR;
96	struct st_ftb_expr
97	{
98	FTB_EXPR *up;
99	uint flags;
100	/ ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section /
101	my_off_t docid[`2`];
102	my_off_t max_docid;
103	float weight;
104	float cur_weight;
105	LIST phrase; /* phrase words /
106	LIST document; /* for phrase search /
107	uint yesses; / number of "yes" words matched /
108	uint nos; / number of "no" words matched /
109	uint ythresh; / number of "yes" words in expr /
110	uint yweaks; / number of "yes" words for scan only /
111	};
112
113	typedef struct st_ftb_word
114	{
115	FTB_EXPR *up;
116	uint flags;
117	/ ^^^^^^^^^^^^^^^^^^ FTB_{EXPR,WORD} common section /
118	my_off_t docid[`2`]; / for index search and for scan /
119	my_off_t key_root;
120	FTB_EXPR *max_docid_expr;
121	MI_KEYDEF *keyinfo;
122	struct st_ftb_word *prev;
123	float weight;
124	uint ndepth;
125	uint len;
126	uchar off;
127	uchar word[`1`];
128	} FTB_WORD;
129
130	typedef struct st_ft_info
131	{
132	struct _ft_vft *please;
133	MI_INFO *info;
134	CHARSET_INFO *charset;
135	FTB_EXPR *root;
136	FTB_WORD **list;
137	FTB_WORD *last_word;
138	MEM_ROOT mem_root;
139	QUEUE queue;
140	TREE no_dupes;
141	my_off_t lastpos;
142	uint keynr;
143	uchar with_scan;
144	enum { UNINITIALIZED, READY, INDEX_SEARCH, INDEX_DONE } state;
145	} FTB;
146
147	static int FTB_WORD_cmp(my_off_t v, FTB_WORD a, FTB_WORD *b)
148	{
149	int i;
150
151	/ if a==curdoc, take it as a < b /
152	if (v && a->docid[`0`] == *v)
153	return -`1`;
154
155	/ ORDER BY docid, ndepth DESC /
156	i=CMP_NUM(a->docid[`0`], b->docid[`0`]);
157	if (!i)
158	i=CMP_NUM(b->ndepth,a->ndepth);
159	return i;
160	}
161
162	static int FTB_WORD_cmp_list(CHARSET_INFO cs, FTB_WORD a, FTB_WORD *b)
163	{
164	/ ORDER BY word, ndepth /
165	int i= ha_compare_text(cs, (uchar) (a)->word + `1`, (*a)->len - `1`,
166	(uchar) (b)->word + `1`, (*b)->len - `1`, `0`);
167	if (!i)
168	i= CMP_NUM((a)->ndepth, (b)->ndepth);
169	return i;
170	}
171
172
173	typedef struct st_my_ftb_param
174	{
175	FTB *ftb;
176	FTB_EXPR *ftbe;
177	uchar *up_quot;
178	uint depth;
179	} MY_FTB_PARAM;
180
181
182	static int ftb_query_add_word(MYSQL_FTPARSER_PARAM *param,
183	const char word, int* word_len,
184	MYSQL_FTPARSER_BOOLEAN_INFO *info)
185	{
186	MY_FTB_PARAM *ftb_param= param->mysql_ftparam;
187	FTB_WORD *ftbw;
188	FTB_EXPR ftbe, tmp_expr;
189	FT_WORD *phrase_word;
190	LIST *tmp_element;
191	int r= info->weight_adjust;
192	float weight= (float)
193	(info->wasign ? nwghts : wghts)[(r>`5`)?`5`:((r<-`5`)?-`5`:r)];
194
195	switch (info->type) {
196	case FT_TOKEN_WORD:
197	ftbw= (FTB_WORD *)alloc_root(&ftb_param->ftb->mem_root,
198	sizeof(FTB_WORD) + HA_MAX_KEY_BUFF);
199	ftbw->len= word_len + `1`;
200	ftbw->flags= `0`;
201	ftbw->off= `0`;
202	if (info->yesno > `0`) ftbw->flags\|= FTB_FLAG_YES;
203	if (info->yesno < `0`) ftbw->flags\|= FTB_FLAG_NO;
204	if (info->trunc) ftbw->flags\|= FTB_FLAG_TRUNC;
205	ftbw->weight= weight;
206	ftbw->up= ftb_param->ftbe;
207	ftbw->docid[`0`]= ftbw->docid[`1`]= HA_OFFSET_ERROR;
208	ftbw->ndepth= (info->yesno < `0`) + ftb_param->depth;
209	ftbw->key_root= HA_OFFSET_ERROR;
210	memcpy(ftbw->word + `1`, word, word_len);
211	ftbw->word[`0`]= word_len;
212	if (info->yesno > `0`) ftbw->up->ythresh++;
213	ftb_param->ftb->queue.max_elements++;
214	ftbw->prev= ftb_param->ftb->last_word;
215	ftb_param->ftb->last_word= ftbw;
216	ftb_param->ftb->with_scan\|= (info->trunc & FTB_FLAG_TRUNC);
217	for (tmp_expr= ftb_param->ftbe; tmp_expr->up; tmp_expr= tmp_expr->up)
218	if (! (tmp_expr->flags & FTB_FLAG_YES))
219	break;
220	ftbw->max_docid_expr= tmp_expr;
221	/ fall through /
222	case FT_TOKEN_STOPWORD:
223	if (! ftb_param->up_quot) break;
224	phrase_word= (FT_WORD )alloc_root(&ftb_param->ftb->mem_root, sizeof*(FT_WORD));
225	tmp_element= (LIST )alloc_root(&ftb_param->ftb->mem_root, sizeof*(LIST));
226	phrase_word->pos= (uchar*) word;
227	phrase_word->len= word_len;
228	tmp_element->data= (void *)phrase_word;
229	ftb_param->ftbe->phrase= list_add(ftb_param->ftbe->phrase, tmp_element);
230	/ Allocate document list at this point.*
231	It allows to avoid huge amount of allocs/frees for each row./*
232	tmp_element= (LIST )alloc_root(&ftb_param->ftb->mem_root, sizeof*(LIST));
233	tmp_element->data= alloc_root(&ftb_param->ftb->mem_root, sizeof(FT_WORD));
234	ftb_param->ftbe->document=
235	list_add(ftb_param->ftbe->document, tmp_element);
236	break;
237	case FT_TOKEN_LEFT_PAREN:
238	ftbe=(FTB_EXPR )alloc_root(&ftb_param->ftb->mem_root, sizeof*(FTB_EXPR));
239	ftbe->flags= `0`;
240	if (info->yesno > `0`) ftbe->flags\|= FTB_FLAG_YES;
241	if (info->yesno < `0`) ftbe->flags\|= FTB_FLAG_NO;
242	ftbe->weight= weight;
243	ftbe->up= ftb_param->ftbe;
244	ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= `0`;
245	ftbe->docid[`0`]= ftbe->docid[`1`]= HA_OFFSET_ERROR;
246	ftbe->phrase= NULL;
247	ftbe->document= `0`;
248	if (info->quot) ftb_param->ftb->with_scan\|= `2`;
249	if (info->yesno > `0`) ftbe->up->ythresh++;
250	ftb_param->ftbe= ftbe;
251	ftb_param->depth++;
252	ftb_param->up_quot= (uchar*) info->quot;
253	break;
254	case FT_TOKEN_RIGHT_PAREN:
255	if (ftb_param->ftbe->document)
256	{
257	/ Circuit document list /
258	for (tmp_element= ftb_param->ftbe->document;
259	tmp_element->next; tmp_element= tmp_element->next) / no-op /;
260	tmp_element->next= ftb_param->ftbe->document;
261	ftb_param->ftbe->document->prev= tmp_element;
262	}
263	info->quot= `0`;
264	if (ftb_param->ftbe->up)
265	{
266	DBUG_ASSERT(ftb_param->depth);
267	ftb_param->ftbe= ftb_param->ftbe->up;
268	ftb_param->depth--;
269	ftb_param->up_quot= `0`;
270	}
271	break;
272	case FT_TOKEN_EOF:
273	default:
274	break;
275	}
276	return(`0`);
277	}
278
279
280	static int ftb_parse_query_internal(MYSQL_FTPARSER_PARAM *param,
281	const char query, int* len)
282	{
283	MY_FTB_PARAM *ftb_param= param->mysql_ftparam;
284	MYSQL_FTPARSER_BOOLEAN_INFO info;
285	CHARSET_INFO *cs= ftb_param->ftb->charset;
286	const uchar *start= (const* uchar**) &query;
287	uchar end= (uchar) query + len;
288	FT_WORD w;
289
290	info.prev= `' '`;
291	info.quot= `0`;
292	while (ft_get_word(cs, start, end, &w, &info))
293	param->mysql_add_word(param, (char) w.pos, (int*)w.len, &info);
294	return(`0`);
295	}
296
297
298	static int _ftb_parse_query(FTB ftb, uchar query, uint len,
299	struct st_mysql_ftparser *parser)
300	{
301	MYSQL_FTPARSER_PARAM *param;
302	MY_FTB_PARAM ftb_param;
303	DBUG_ENTER("_ftb_parse_query");
304	DBUG_ASSERT(parser);
305
306	if (ftb->state != UNINITIALIZED)
307	DBUG_RETURN(`0`);
308	if (! (param= ftparser_call_initializer(ftb->info, ftb->keynr, `0`)))
309	DBUG_RETURN(`1`);
310
311	ftb_param.ftb= ftb;
312	ftb_param.depth= `0`;
313	ftb_param.ftbe= ftb->root;
314	ftb_param.up_quot= `0`;
315
316	param->mysql_parse= ftb_parse_query_internal;
317	param->mysql_add_word= ftb_query_add_word;
318	param->mysql_ftparam= (void *)&ftb_param;
319	param->cs= ftb->charset;
320	param->doc= (char*) query;
321	param->length= len;
322	param->flags= `0`;
323	param->mode= MYSQL_FTPARSER_FULL_BOOLEAN_INFO;
324	DBUG_RETURN(parser->parse(param));
325	}
326
327
328	static int _ftb_no_dupes_cmp(void* not_used __attribute__((unused)),
329	const void a,const* void *b)
330	{
331	return CMP_NUM((((my_off_t)a)), (((my_off_t)b)));
332	}
333
334	/*
335	When performing prefix search (a word with truncation operator), we
336	must preserve original prefix to ensure that characters which may be
337	expanded/contracted do not break the prefix. This is done by storing
338	newly found key immediatly after the original word in ftbw->word
339	buffer.
340
341	ftbw->word= LENGTH WORD [ LENGTH1 WORD1 ] WEIGHT REFERENCE
342	LENGTH - 1 byte, length of the WORD
343	WORD - LENGTH bytes, the word itself
344	LENGTH1 - 1 byte, length of the WORD1, present in case of prefix search
345	WORD1 - LENGTH bytes, the word itself, present in case of prefix search
346	WEIGHT - 4 bytes (HA_FT_WLEN), either weight or number of subkeys
347	REFERENCE - rec_reflength bytes, pointer to the record
348
349	returns 1 if the search was finished (must-word wasn't found)
350	*/
351	static int _ft2_search_no_lock(FTB ftb, FTB_WORD ftbw, my_bool init_search)
352	{
353	int r;
354	int subkeys=`1`;
355	my_bool can_go_down;
356	MI_INFO *info=ftb->info;
357	uint UNINIT_VAR(off), extra= HA_FT_WLEN + info->s->rec_reflength;
358	uchar *lastkey_buf=ftbw->word+ftbw->off;
359
360	if (ftbw->flags & FTB_FLAG_TRUNC)
361	lastkey_buf+=ftbw->len;
362
363	if (init_search)
364	{
365	ftbw->key_root=info->s->state.key_root[ftb->keynr];
366	ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
367
368	r=_mi_search(info, ftbw->keyinfo, (uchar*) ftbw->word, ftbw->len,
369	SEARCH_FIND \| SEARCH_BIGGER, ftbw->key_root);
370	}
371	else
372	{
373	uint sflag= SEARCH_BIGGER;
374	my_off_t max_docid=`0`;
375	FTB_EXPR *tmp;
376
377	for (tmp= ftbw->max_docid_expr; tmp; tmp= tmp->up)
378	set_if_bigger(max_docid, tmp->max_docid);
379
380	if (ftbw->docid[`0`] < max_docid)
381	{
382	sflag\|= SEARCH_SAME;
383	_mi_dpointer(info, (uchar*) (lastkey_buf + HA_FT_WLEN +
384	(ftbw->off ? `0` : lastkey_buf[`0`] + `1`)),
385	max_docid);
386	}
387	r=_mi_search(info, ftbw->keyinfo, (uchar*) lastkey_buf,
388	USE_WHOLE_KEY, sflag, ftbw->key_root);
389	}
390
391	can_go_down=(!ftbw->off && (init_search \|\| (ftbw->flags & FTB_FLAG_TRUNC)));
392	/ Skip rows inserted by concurrent insert /
393	while (!r)
394	{
395	if (can_go_down)
396	{
397	/ going down ? /
398	off=info->lastkey_length-extra;
399	subkeys=ft_sintXkorr(info->lastkey+off);
400	}
401	if (subkeys<`0` \|\| info->lastpos < info->state->data_file_length)
402	break;
403	r= _mi_search_next(info, ftbw->keyinfo, info->lastkey,
404	info->lastkey_length,
405	SEARCH_BIGGER, ftbw->key_root);
406	}
407
408	if (!r && !ftbw->off)
409	{
410	r= ha_compare_text(ftb->charset,
411	info->lastkey+`1`,
412	info->lastkey_length-extra-`1`,
413	(uchar*) ftbw->word+`1`,
414	ftbw->len-`1`,
415	(my_bool) (ftbw->flags & FTB_FLAG_TRUNC));
416	}
417
418	if (r) / not found /
419	{
420	if (!ftbw->off \|\| !(ftbw->flags & FTB_FLAG_TRUNC))
421	{
422	ftbw->docid[`0`]=HA_OFFSET_ERROR;
423	if ((ftbw->flags & FTB_FLAG_YES) && ftbw->up->up==`0`)
424	{
425	/*
426	This word MUST BE present in every document returned,
427	so we can stop the search right now
428	*/
429	ftb->state=INDEX_DONE;
430	return `1`; / search is done /
431	}
432	else
433	return `0`;
434	}
435
436	/*
437	Going up to the first-level tree to continue search there.
438	Only done when performing prefix search.
439
440	Key buffer data pointer as well as docid[0] may be smaller
441	than values we got while searching first-level tree. Thus
442	they must be restored to original values to avoid dead-loop,
443	when subsequent search for a bigger value eventually ends up
444	in this same second-level tree.
445	*/
446	_mi_dpointer(info, (uchar*) (lastkey_buf+HA_FT_WLEN), ftbw->key_root);
447	ftbw->docid[`0`]= ftbw->key_root;
448	ftbw->key_root=info->s->state.key_root[ftb->keynr];
449	ftbw->keyinfo=info->s->keyinfo+ftb->keynr;
450	ftbw->off=`0`;
451	return _ft2_search_no_lock(ftb, ftbw, `0`);
452	}
453
454	/ matching key found /
455	memcpy(lastkey_buf, info->lastkey, info->lastkey_length);
456	if (lastkey_buf == ftbw->word)
457	ftbw->len=info->lastkey_length-extra;
458
459	/ going down ? /
460	if (subkeys<`0`)
461	{
462	/*
463	yep, going down, to the second-level tree
464	TODO here: subkey-based optimization
465	*/
466	ftbw->off=off;
467	ftbw->key_root=info->lastpos;
468	ftbw->keyinfo=& info->s->ft2_keyinfo;
469	r=_mi_search_first(info, ftbw->keyinfo, ftbw->key_root);
470	DBUG_ASSERT(r==`0`); / found something /
471	memcpy(lastkey_buf+off, info->lastkey, info->lastkey_length);
472	}
473	ftbw->docid[`0`]=info->lastpos;
474	if (ftbw->flags & FTB_FLAG_YES && !(ftbw->flags & FTB_FLAG_TRUNC))
475	ftbw->max_docid_expr->max_docid= info->lastpos;
476	return `0`;
477	}
478
479	static int _ft2_search(FTB ftb, FTB_WORD ftbw, my_bool init_search)
480	{
481	int r;
482	MYISAM_SHARE *share= ftb->info->s;
483	if (share->concurrent_insert)
484	mysql_rwlock_rdlock(&share->key_root_lock[ftb->keynr]);
485	r= _ft2_search_no_lock(ftb, ftbw, init_search);
486	if (share->concurrent_insert)
487	mysql_rwlock_unlock(&share->key_root_lock[ftb->keynr]);
488	return r;
489	}
490
491	static void _ftb_init_index_search(FT_INFO *ftb)
492	{
493	int i;
494	FTB_WORD *ftbw;
495
496	if (ftb->state == UNINITIALIZED \|\| ftb->keynr == NO_SUCH_KEY)
497	return;
498	ftb->state=INDEX_SEARCH;
499
500	for (i= queue_last_element(&ftb->queue);
501	i >= (int) queue_first_element(&ftb->queue);
502	i--)
503	{
504	ftbw=(FTB_WORD *)(queue_element(&ftb->queue, i));
505
506	if (ftbw->flags & FTB_FLAG_TRUNC)
507	{
508	/*
509	special treatment for truncation operator
510	1. there are some (besides this) +words
511	\| no need to search in the index, it can never ADD new rows
512	\| to the result, and to remove half-matched rows we do scan anyway
513	2. -trunc*
514	\| same as 1.
515	3. in 1 and 2, +/- need not be on the same expr. level,
516	but can be on any upper level, as in +word +(trunc1 trunc2)
517	4. otherwise
518	\| We have to index-search for this prefix.
519	\| It may cause duplicates, as in the index (sorted by <word,docid>)
520	\| <aaaa,row1>
521	\| <aabb,row2>
522	\| <aacc,row1>
523	\| Searching for "aa" will find row1 twice...*
524	*/
525	FTB_EXPR *ftbe;
526	for (ftbe=(FTB_EXPR*)ftbw;
527	ftbe->up && !(ftbe->up->flags & FTB_FLAG_TRUNC);
528	ftbe->up->flags\|= FTB_FLAG_TRUNC, ftbe=ftbe->up)
529	{
530	if (ftbe->flags & FTB_FLAG_NO \|\| / 2 /
531	ftbe->up->ythresh - ftbe->up->yweaks >
532	(uint) MY_TEST(ftbe->flags & FTB_FLAG_YES)) / 1 /
533	{
534	FTB_EXPR *top_ftbe=ftbe->up;
535	ftbw->docid[`0`]=HA_OFFSET_ERROR;
536	for (ftbe=(FTB_EXPR *)ftbw;
537	ftbe != top_ftbe && !(ftbe->flags & FTB_FLAG_NO);
538	ftbe=ftbe->up)
539	ftbe->up->yweaks++;
540	ftbe=`0`;
541	break;
542	}
543	}
544	if (!ftbe)
545	continue;
546	/ 4 /
547	if (!is_tree_inited(& ftb->no_dupes))
548	init_tree(& ftb->no_dupes,`0`,`0`,sizeof(my_off_t),
549	_ftb_no_dupes_cmp,`0`,`0`,MYF(`0`));
550	else
551	reset_tree(& ftb->no_dupes);
552	}
553
554	ftbw->off=`0`; / in case of reinit /
555	if (_ft2_search(ftb, ftbw, `1`))
556	return;
557	}
558	queue_fix(& ftb->queue);
559	}
560
561
562	FT_INFO * ft_init_boolean_search(MI_INFO info, uint keynr, uchar query,
563	uint query_len, CHARSET_INFO *cs)
564	{
565	FTB *ftb;
566	FTB_EXPR *ftbe;
567	FTB_WORD *ftbw;
568
569	if (!(ftb=(FTB )my_malloc(sizeof*(FTB), MYF(MY_WME))))
570	return `0`;
571	ftb->please= (struct _ft_vft *) & _ft_vft_boolean;
572	ftb->state=UNINITIALIZED;
573	ftb->info=info;
574	ftb->keynr=keynr;
575	ftb->charset=cs;
576	DBUG_ASSERT(keynr==NO_SUCH_KEY \|\| cs == info->s->keyinfo[keynr].seg->charset);
577	ftb->with_scan=`0`;
578	ftb->lastpos=HA_OFFSET_ERROR;
579	bzero(& ftb->no_dupes, sizeof(TREE));
580	ftb->last_word= `0`;
581
582	init_alloc_root(&ftb->mem_root, "fulltext", `1024`, `1024`, MYF(`0`));
583	ftb->queue.max_elements= `0`;
584	if (!(ftbe=(FTB_EXPR )alloc_root(&ftb->mem_root, sizeof*(FTB_EXPR))))
585	goto err;
586	ftbe->weight=`1`;
587	ftbe->flags=FTB_FLAG_YES;
588	ftbe->nos=`1`;
589	ftbe->up=`0`;
590	ftbe->max_docid= ftbe->ythresh= ftbe->yweaks= `0`;
591	ftbe->docid[`0`]=ftbe->docid[`1`]=HA_OFFSET_ERROR;
592	ftbe->phrase= NULL;
593	ftbe->document= `0`;
594	ftb->root=ftbe;
595	if (unlikely(_ftb_parse_query(ftb, query, query_len,
596	keynr == NO_SUCH_KEY ? &ft_default_parser :
597	info->s->keyinfo[keynr].parser)))
598	goto err;
599	/*
600	Hack: instead of init_queue, we'll use reinit queue to be able
601	to alloc queue with alloc_root()
602	*/
603	if (! (ftb->queue.root= (uchar **)alloc_root(&ftb->mem_root,
604	(ftb->queue.max_elements + `1`) *
605	sizeof(void *))))
606	goto err;
607	reinit_queue(&ftb->queue, ftb->queue.max_elements, `0`, `0`,
608	(int ()(void*, uchar, uchar*))FTB_WORD_cmp, `0`, `0`, `0`);
609	for (ftbw= ftb->last_word; ftbw; ftbw= ftbw->prev)
610	queue_insert(&ftb->queue, (uchar *)ftbw);
611	ftb->list=(FTB_WORD **)alloc_root(&ftb->mem_root,
612	sizeof(FTB_WORD )ftb->queue.elements);
613	memcpy(ftb->list, &queue_top(&ftb->queue), sizeof(FTB_WORD )ftb->queue.elements);
614	my_qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *),
615	(qsort2_cmp)FTB_WORD_cmp_list, (void*)ftb->charset);
616	if (ftb->queue.elements<`2`) ftb->with_scan &= ~FTB_FLAG_TRUNC;
617	ftb->state=READY;
618	return ftb;
619	err:
620	free_root(& ftb->mem_root, MYF(`0`));
621	my_free(ftb);
622	return `0`;
623	}
624
625
626	typedef struct st_my_ftb_phrase_param
627	{
628	LIST *phrase;
629	LIST *document;
630	CHARSET_INFO *cs;
631	uint phrase_length;
632	uint document_length;
633	uint match;
634	} MY_FTB_PHRASE_PARAM;
635
636
637	static int ftb_phrase_add_word(MYSQL_FTPARSER_PARAM *param,
638	const char word, int* word_len,
639	MYSQL_FTPARSER_BOOLEAN_INFO boolean_info __attribute__*((unused)))
640	{
641	MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam;
642	FT_WORD w= (FT_WORD )phrase_param->document->data;
643	LIST phrase, document;
644	w->pos= (uchar*) word;
645	w->len= word_len;
646	phrase_param->document= phrase_param->document->prev;
647	if (phrase_param->phrase_length > phrase_param->document_length)
648	{
649	phrase_param->document_length++;
650	return `0`;
651	}
652	/ TODO: rewrite phrase search to avoid*
653	comparing the same word twice. /*
654	for (phrase= phrase_param->phrase, document= phrase_param->document->next;
655	phrase; phrase= phrase->next, document= document->next)
656	{
657	FT_WORD phrase_word= (FT_WORD )phrase->data;
658	FT_WORD document_word= (FT_WORD )document->data;
659	if (my_strnncoll(phrase_param->cs, (uchar*) phrase_word->pos,
660	phrase_word->len,
661	(uchar*) document_word->pos, document_word->len))
662	return `0`;
663	}
664	phrase_param->match++;
665	return `0`;
666	}
667
668
669	static int ftb_check_phrase_internal(MYSQL_FTPARSER_PARAM *param,
670	const char document, int* len)
671	{
672	FT_WORD word;
673	MY_FTB_PHRASE_PARAM *phrase_param= param->mysql_ftparam;
674	const uchar docend= (uchar) document + len;
675	while (ft_simple_get_word(phrase_param->cs, (uchar**) &document, docend,
676	&word, FALSE))
677	{
678	param->mysql_add_word(param, (char) word.pos, (int*)word.len, `0`);
679	if (phrase_param->match)
680	break;
681	}
682	return `0`;
683	}
684
685
686	/*
687	Checks if given buffer matches phrase list.
688
689	SYNOPSIS
690	_ftb_check_phrase()
691	s0 start of buffer
692	e0 end of buffer
693	phrase broken into list phrase
694	cs charset info
695
696	RETURN VALUE
697	1 is returned if phrase found, 0 else.
698	-1 is returned if error occurs.
699	*/
700
701	static int _ftb_check_phrase(FTB ftb, const* uchar *document, uint len,
702	FTB_EXPR ftbe, struct* st_mysql_ftparser *parser)
703	{
704	MY_FTB_PHRASE_PARAM ftb_param;
705	MYSQL_FTPARSER_PARAM *param;
706	DBUG_ENTER("_ftb_check_phrase");
707	DBUG_ASSERT(parser);
708
709	if (! (param= ftparser_call_initializer(ftb->info, ftb->keynr, `1`)))
710	DBUG_RETURN(`0`);
711
712	ftb_param.phrase= ftbe->phrase;
713	ftb_param.document= ftbe->document;
714	ftb_param.cs= ftb->charset;
715	ftb_param.phrase_length= list_length(ftbe->phrase);
716	ftb_param.document_length= `1`;
717	ftb_param.match= `0`;
718
719	param->mysql_parse= ftb_check_phrase_internal;
720	param->mysql_add_word= ftb_phrase_add_word;
721	param->mysql_ftparam= (void *)&ftb_param;
722	param->cs= ftb->charset;
723	param->doc= (char *) document;
724	param->length= len;
725	param->flags= `0`;
726	param->mode= MYSQL_FTPARSER_WITH_STOPWORDS;
727	if (unlikely(parser->parse(param)))
728	return -`1`;
729	DBUG_RETURN(ftb_param.match ? `1` : `0`);
730	}
731
732
733	static int _ftb_climb_the_tree(FTB ftb, FTB_WORD ftbw, FT_SEG_ITERATOR *ftsi_orig)
734	{
735	FT_SEG_ITERATOR ftsi;
736	FTB_EXPR *ftbe;
737	float weight=ftbw->weight;
738	int yn_flag= ftbw->flags, ythresh, mode=(ftsi_orig != `0`);
739	my_off_t curdoc=ftbw->docid[mode];
740	struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ?
741	&ft_default_parser :
742	ftb->info->s->keyinfo[ftb->keynr].parser;
743
744	for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up)
745	{
746	ythresh = ftbe->ythresh - (mode ? `0` : ftbe->yweaks);
747	if (ftbe->docid[mode] != curdoc)
748	{
749	ftbe->cur_weight=`0`;
750	ftbe->yesses=ftbe->nos=`0`;
751	ftbe->docid[mode]=curdoc;
752	}
753	if (ftbe->nos)
754	break;
755	if (yn_flag & FTB_FLAG_YES)
756	{
757	weight /= ftbe->ythresh;
758	ftbe->cur_weight += weight;
759	if ((int) ++ftbe->yesses == ythresh)
760	{
761	yn_flag=ftbe->flags;
762	weight=ftbe->cur_weight*ftbe->weight;
763	if (mode && ftbe->phrase)
764	{
765	int found= `0`;
766
767	memcpy(&ftsi, ftsi_orig, sizeof(ftsi));
768	while (_mi_ft_segiterator(&ftsi) && !found)
769	{
770	if (!ftsi.pos)
771	continue;
772	found= _ftb_check_phrase(ftb, ftsi.pos, ftsi.len, ftbe, parser);
773	if (unlikely(found < `0`))
774	return `1`;
775	}
776	if (!found)
777	break;
778	} / ftbe->quot /
779	}
780	else
781	break;
782	}
783	else
784	if (yn_flag & FTB_FLAG_NO)
785	{
786	/*
787	NOTE: special sort function of queue assures that all
788	(yn_flag & FTB_FLAG_NO) != 0
789	events for every particular subexpression will
790	"auto-magically" happen BEFORE all the
791	(yn_flag & FTB_FLAG_YES) != 0 events. So no
792	already matched expression can become not-matched again.
793	*/
794	++ftbe->nos;
795	break;
796	}
797	else
798	{
799	if (ftbe->ythresh)
800	weight/=`3`;
801	ftbe->cur_weight += weight;
802	if ((int) ftbe->yesses < ythresh)
803	break;
804	if (!(yn_flag & FTB_FLAG_WONLY))
805	yn_flag= ((int) ftbe->yesses++ == ythresh) ? ftbe->flags : FTB_FLAG_WONLY ;
806	weight*= ftbe->weight;
807	}
808	}
809	return `0`;
810	}
811
812
813	int ft_boolean_read_next(FT_INFO ftb, char* *record)
814	{
815	FTB_EXPR *ftbe;
816	FTB_WORD *ftbw;
817	MI_INFO *info=ftb->info;
818	my_off_t curdoc;
819
820	if (ftb->state != INDEX_SEARCH && ftb->state != INDEX_DONE)
821	return -`1`;
822
823	/ black magic ON /
824	if ((int) _mi_check_index(info, ftb->keynr) < `0`)
825	return my_errno;
826	if (_mi_readinfo(info, F_RDLCK, `1`))
827	return my_errno;
828	/ black magic OFF /
829
830	if (!ftb->queue.elements)
831	return my_errno=HA_ERR_END_OF_FILE;
832
833	/ Attention!!! Address of a local variable is used here! See err: label /
834	ftb->queue.first_cmp_arg=(void *)&curdoc;
835
836	while (ftb->state == INDEX_SEARCH &&
837	(curdoc=((FTB_WORD *)queue_top(& ftb->queue))->docid[`0`]) !=
838	HA_OFFSET_ERROR)
839	{
840	while (curdoc == (ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[`0`])
841	{
842	if (unlikely(_ftb_climb_the_tree(ftb, ftbw, `0`)))
843	{
844	my_errno= HA_ERR_OUT_OF_MEM;
845	goto err;
846	}
847
848	/ update queue /
849	_ft2_search(ftb, ftbw, `0`);
850	queue_replace_top(&ftb->queue);
851	}
852
853	ftbe=ftb->root;
854	if (ftbe->docid[`0`]==curdoc && ftbe->cur_weight>`0` &&
855	ftbe->yesses>=(ftbe->ythresh-ftbe->yweaks) && !ftbe->nos)
856	{
857	/ curdoc matched ! /
858	if (is_tree_inited(&ftb->no_dupes) &&
859	tree_insert(&ftb->no_dupes, &curdoc, `0`,
860	ftb->no_dupes.custom_arg)->count >`1`)
861	/ but it managed already to get past this line once /
862	continue;
863
864	info->lastpos=curdoc;
865	/ Clear all states, except that the table was updated /
866	info->update&= (HA_STATE_CHANGED \| HA_STATE_ROW_CHANGED);
867
868	if (!(info->read_record)(info,curdoc, (uchar) record))
869	{
870	info->update\|= HA_STATE_AKTIV; / Record is read /
871	if (ftb->with_scan &&
872	ft_boolean_find_relevance(ftb,(uchar*) record,`0`)==`0`)
873	continue; / no match /
874	my_errno=`0`;
875	goto err;
876	}
877	goto err;
878	}
879	}
880	ftb->state=INDEX_DONE;
881	my_errno=HA_ERR_END_OF_FILE;
882	err:
883	ftb->queue.first_cmp_arg=(void *)`0`;
884	return my_errno;
885	}
886
887
888	typedef struct st_my_ftb_find_param
889	{
890	FT_INFO *ftb;
891	FT_SEG_ITERATOR *ftsi;
892	} MY_FTB_FIND_PARAM;
893
894
895	static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
896	const char word, int* len,
897	MYSQL_FTPARSER_BOOLEAN_INFO boolean_info __attribute__*((unused)))
898	{
899	MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam;
900	FT_INFO *ftb= ftb_param->ftb;
901	FTB_WORD *ftbw;
902	int a, b, c;
903	/*
904	Find right-most element in the array of query words matching this
905	word from a document.
906	*/
907	for (a= `0`, b= ftb->queue.elements, c= (a+b)/`2`; b-a>`1`; c= (a+b)/`2`)
908	{
909	ftbw= ftb->list[c];
910	if (ha_compare_text(ftb->charset, (uchar*)word, len,
911	(uchar*)ftbw->word+`1`, ftbw->len-`1`,
912	(my_bool) (ftbw->flags & FTB_FLAG_TRUNC)) < `0`)
913	b= c;
914	else
915	a= c;
916	}
917	/*
918	If there were no words with truncation operator, we iterate to the
919	beginning of an array until array element is equal to the word from
920	a document. This is done mainly because the same word may be
921	mentioned twice (or more) in the query.
922
923	In case query has words with truncation operator we must iterate
924	to the beginning of the array. There may be non-matching query words
925	between matching word with truncation operator and the right-most
926	matching element. E.g., if we're looking for 'aaa15' in an array of
927	'aaa1 aaa14 aaa15 aaa16'.*
928
929	Worse of that there still may be match even if the binary search
930	above didn't find matching element. E.g., if we're looking for
931	'aaa15' in an array of 'aaa1 aaa14 aaa16'. The binary search will*
932	stop at 'aaa16'.
933	*/
934	for (; c >= `0`; c--)
935	{
936	ftbw= ftb->list[c];
937	if (ha_compare_text(ftb->charset, (uchar*)word, len,
938	(uchar*)ftbw->word + `1`,ftbw->len - `1`,
939	(my_bool)(ftbw->flags & FTB_FLAG_TRUNC)))
940	{
941	if (ftb->with_scan & FTB_FLAG_TRUNC)
942	continue;
943	else
944	break;
945	}
946	if (ftbw->docid[`1`] == ftb->info->lastpos)
947	continue;
948	ftbw->docid[`1`]= ftb->info->lastpos;
949	if (unlikely(_ftb_climb_the_tree(ftb, ftbw, ftb_param->ftsi)))
950	return `1`;
951	}
952	return(`0`);
953	}
954
955
956	static int ftb_find_relevance_parse(MYSQL_FTPARSER_PARAM *param,
957	const char doc, int* len)
958	{
959	MY_FTB_FIND_PARAM *ftb_param= param->mysql_ftparam;
960	FT_INFO *ftb= ftb_param->ftb;
961	uchar end= (uchar) doc + len;
962	FT_WORD w;
963	while (ft_simple_get_word(ftb->charset, (uchar**) &doc, end, &w, TRUE))
964	param->mysql_add_word(param, (char) w.pos, (int*)w.len, `0`);
965	return(`0`);
966	}
967
968
969	float ft_boolean_find_relevance(FT_INFO ftb, uchar record, uint length)
970	{
971	FTB_EXPR *ftbe;
972	FT_SEG_ITERATOR ftsi, ftsi2;
973	my_off_t docid=ftb->info->lastpos;
974	MY_FTB_FIND_PARAM ftb_param;
975	MYSQL_FTPARSER_PARAM *param;
976	struct st_mysql_ftparser *parser= ftb->keynr == NO_SUCH_KEY ?
977	&ft_default_parser :
978	ftb->info->s->keyinfo[ftb->keynr].parser;
979
980	if (docid == HA_OFFSET_ERROR)
981	return -`2.0`;
982	if (!ftb->queue.elements)
983	return `0`;
984	if (! (param= ftparser_call_initializer(ftb->info, ftb->keynr, `0`)))
985	return `0`;
986
987	if (ftb->state != INDEX_SEARCH && docid <= ftb->lastpos)
988	{
989	FTB_EXPR *x;
990	uint i;
991
992	for (i=`0`; i < ftb->queue.elements; i++)
993	{
994	ftb->list[i]->docid[`1`]=HA_OFFSET_ERROR;
995	for (x=ftb->list[i]->up; x; x=x->up)
996	x->docid[`1`]=HA_OFFSET_ERROR;
997	}
998	}
999
1000	ftb->lastpos=docid;
1001
1002	if (ftb->keynr==NO_SUCH_KEY)
1003	_mi_ft_segiterator_dummy_init(record, length, &ftsi);
1004	else
1005	_mi_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi);
1006	memcpy(&ftsi2, &ftsi, sizeof(ftsi));
1007
1008	ftb_param.ftb= ftb;
1009	ftb_param.ftsi= &ftsi2;
1010	param->mysql_parse= ftb_find_relevance_parse;
1011	param->mysql_add_word= ftb_find_relevance_add_word;
1012	param->mysql_ftparam= (void *)&ftb_param;
1013	param->flags= `0`;
1014	param->cs= ftb->charset;
1015	param->mode= MYSQL_FTPARSER_SIMPLE_MODE;
1016	while (_mi_ft_segiterator(&ftsi))
1017	{
1018	if (!ftsi.pos)
1019	continue;
1020	param->doc= (char *)ftsi.pos;
1021	param->length= ftsi.len;
1022	if (unlikely(parser->parse(param)))
1023	return `0`;
1024	}
1025	ftbe=ftb->root;
1026	if (ftbe->docid[`1`]==docid && ftbe->cur_weight>`0` &&
1027	ftbe->yesses>=ftbe->ythresh && !ftbe->nos)
1028	{ / row matched ! /
1029	return ftbe->cur_weight;
1030	}
1031	else
1032	{ / match failed ! /
1033	return `0.0`;
1034	}
1035	}
1036
1037
1038	void ft_boolean_close_search(FT_INFO *ftb)
1039	{
1040	if (is_tree_inited(& ftb->no_dupes))
1041	{
1042	delete_tree(&ftb->no_dupes, `0`);
1043	}
1044	free_root(& ftb->mem_root, MYF(`0`));
1045	my_free(ftb);
1046	}
1047
1048
1049	float ft_boolean_get_relevance(FT_INFO *ftb)
1050	{
1051	return ftb->root->cur_weight;
1052	}
1053
1054
1055	void ft_boolean_reinit_search(FT_INFO *ftb)
1056	{
1057	_ftb_init_index_search(ftb);
1058	}
1059
1060

Browse the source code of MariaDB/storage/myisam/ft_boolean_search.c