1 | /* Copyright (c) 2000-2008 MySQL AB, 2009 Sun Microsystems, Inc. |
2 | Use is subject to license terms. |
3 | |
4 | This program is free software; you can redistribute it and/or modify |
5 | it under the terms of the GNU General Public License as published by |
6 | the Free Software Foundation; version 2 of the License. |
7 | |
8 | This program is distributed in the hope that it will be useful, |
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 | GNU General Public License for more details. |
12 | |
13 | You should have received a copy of the GNU General Public License |
14 | along with this program; if not, write to the Free Software |
15 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ |
16 | |
17 | /* Written by Sergei A. Golubchik, who has a shared copyright to this code */ |
18 | |
19 | #include "ftdefs.h" |
20 | |
21 | ulong ft_min_word_len= 4; |
22 | ulong ft_max_word_len= HA_FT_MAXCHARLEN; |
23 | ulong ft_query_expansion_limit= 5; |
24 | const char *ft_boolean_syntax= DEFAULT_FTB_SYNTAX; |
25 | |
26 | const HA_KEYSEG ft_keysegs[FT_SEGS]= { |
27 | { |
28 | 0, /* charset */ |
29 | HA_FT_WLEN, /* start */ |
30 | 0, /* null_pos */ |
31 | 0, /* Bit pos */ |
32 | HA_VAR_LENGTH_PART | HA_PACK_KEY, /* flag */ |
33 | HA_FT_MAXBYTELEN, /* length */ |
34 | 63, /* language (will be overwritten) */ |
35 | HA_KEYTYPE_VARTEXT2, /* type */ |
36 | 0, /* null_bit */ |
37 | 2, 0 /* bit_start, bit_length */ |
38 | }, |
39 | { |
40 | /* |
41 | Note, this (and the last HA_KEYTYPE_END) segment should NOT |
42 | be packed in any way, otherwise w_search() won't be able to |
43 | update key entry 'in vivo' |
44 | */ |
45 | 0, 0, 0, 0, HA_NO_SORT, HA_FT_WLEN, 63, HA_FT_WTYPE, 0, 0, 0 |
46 | } |
47 | }; |
48 | |
49 | const struct _ft_vft _ft_vft_nlq= { |
50 | ft_nlq_read_next, ft_nlq_find_relevance, ft_nlq_close_search, |
51 | ft_nlq_get_relevance, ft_nlq_reinit_search |
52 | }; |
53 | const struct _ft_vft _ft_vft_boolean= { |
54 | ft_boolean_read_next, ft_boolean_find_relevance, ft_boolean_close_search, |
55 | ft_boolean_get_relevance, ft_boolean_reinit_search |
56 | }; |
57 | |
58 | const char *ft_stopword_file= 0; |
59 | const char *ft_precompiled_stopwords[]= { |
60 | |
61 | #ifdef COMPILE_STOPWORDS_IN |
62 | |
63 | /* This particular stopword list was taken from SMART distribution |
64 | ftp://ftp.cs.cornell.edu/pub/smart/smart.11.0.tar.Z |
65 | it was slightly modified to my taste, though |
66 | */ |
67 | |
68 | "a's" , |
69 | "able" , |
70 | "about" , |
71 | "above" , |
72 | "according" , |
73 | "accordingly" , |
74 | "across" , |
75 | "actually" , |
76 | "after" , |
77 | "afterwards" , |
78 | "again" , |
79 | "against" , |
80 | "ain't" , |
81 | "all" , |
82 | "allow" , |
83 | "allows" , |
84 | "almost" , |
85 | "alone" , |
86 | "along" , |
87 | "already" , |
88 | "also" , |
89 | "although" , |
90 | "always" , |
91 | "am" , |
92 | "among" , |
93 | "amongst" , |
94 | "an" , |
95 | "and" , |
96 | "another" , |
97 | "any" , |
98 | "anybody" , |
99 | "anyhow" , |
100 | "anyone" , |
101 | "anything" , |
102 | "anyway" , |
103 | "anyways" , |
104 | "anywhere" , |
105 | "apart" , |
106 | "appear" , |
107 | "appreciate" , |
108 | "appropriate" , |
109 | "are" , |
110 | "aren't" , |
111 | "around" , |
112 | "as" , |
113 | "aside" , |
114 | "ask" , |
115 | "asking" , |
116 | "associated" , |
117 | "at" , |
118 | "available" , |
119 | "away" , |
120 | "awfully" , |
121 | "be" , |
122 | "became" , |
123 | "because" , |
124 | "become" , |
125 | "becomes" , |
126 | "becoming" , |
127 | "been" , |
128 | "before" , |
129 | "beforehand" , |
130 | "behind" , |
131 | "being" , |
132 | "believe" , |
133 | "below" , |
134 | "beside" , |
135 | "besides" , |
136 | "best" , |
137 | "better" , |
138 | "between" , |
139 | "beyond" , |
140 | "both" , |
141 | "brief" , |
142 | "but" , |
143 | "by" , |
144 | "c'mon" , |
145 | "c's" , |
146 | "came" , |
147 | "can" , |
148 | "can't" , |
149 | "cannot" , |
150 | "cant" , |
151 | "cause" , |
152 | "causes" , |
153 | "certain" , |
154 | "certainly" , |
155 | "changes" , |
156 | "clearly" , |
157 | "co" , |
158 | "com" , |
159 | "come" , |
160 | "comes" , |
161 | "concerning" , |
162 | "consequently" , |
163 | "consider" , |
164 | "considering" , |
165 | "contain" , |
166 | "containing" , |
167 | "contains" , |
168 | "corresponding" , |
169 | "could" , |
170 | "couldn't" , |
171 | "course" , |
172 | "currently" , |
173 | "definitely" , |
174 | "described" , |
175 | "despite" , |
176 | "did" , |
177 | "didn't" , |
178 | "different" , |
179 | "do" , |
180 | "does" , |
181 | "doesn't" , |
182 | "doing" , |
183 | "don't" , |
184 | "done" , |
185 | "down" , |
186 | "downwards" , |
187 | "during" , |
188 | "each" , |
189 | "edu" , |
190 | "eg" , |
191 | "eight" , |
192 | "either" , |
193 | "else" , |
194 | "elsewhere" , |
195 | "enough" , |
196 | "entirely" , |
197 | "especially" , |
198 | "et" , |
199 | "etc" , |
200 | "even" , |
201 | "ever" , |
202 | "every" , |
203 | "everybody" , |
204 | "everyone" , |
205 | "everything" , |
206 | "everywhere" , |
207 | "ex" , |
208 | "exactly" , |
209 | "example" , |
210 | "except" , |
211 | "far" , |
212 | "few" , |
213 | "fifth" , |
214 | "first" , |
215 | "five" , |
216 | "followed" , |
217 | "following" , |
218 | "follows" , |
219 | "for" , |
220 | "former" , |
221 | "formerly" , |
222 | "forth" , |
223 | "four" , |
224 | "from" , |
225 | "further" , |
226 | "furthermore" , |
227 | "get" , |
228 | "gets" , |
229 | "getting" , |
230 | "given" , |
231 | "gives" , |
232 | "go" , |
233 | "goes" , |
234 | "going" , |
235 | "gone" , |
236 | "got" , |
237 | "gotten" , |
238 | "greetings" , |
239 | "had" , |
240 | "hadn't" , |
241 | "happens" , |
242 | "hardly" , |
243 | "has" , |
244 | "hasn't" , |
245 | "have" , |
246 | "haven't" , |
247 | "having" , |
248 | "he" , |
249 | "he's" , |
250 | "hello" , |
251 | "help" , |
252 | "hence" , |
253 | "her" , |
254 | "here" , |
255 | "here's" , |
256 | "hereafter" , |
257 | "hereby" , |
258 | "herein" , |
259 | "hereupon" , |
260 | "hers" , |
261 | "herself" , |
262 | "hi" , |
263 | "him" , |
264 | "himself" , |
265 | "his" , |
266 | "hither" , |
267 | "hopefully" , |
268 | "how" , |
269 | "howbeit" , |
270 | "however" , |
271 | "i'd" , |
272 | "i'll" , |
273 | "i'm" , |
274 | "i've" , |
275 | "ie" , |
276 | "if" , |
277 | "ignored" , |
278 | "immediate" , |
279 | "in" , |
280 | "inasmuch" , |
281 | "inc" , |
282 | "indeed" , |
283 | "indicate" , |
284 | "indicated" , |
285 | "indicates" , |
286 | "inner" , |
287 | "insofar" , |
288 | "instead" , |
289 | "into" , |
290 | "inward" , |
291 | "is" , |
292 | "isn't" , |
293 | "it" , |
294 | "it'd" , |
295 | "it'll" , |
296 | "it's" , |
297 | "its" , |
298 | "itself" , |
299 | "just" , |
300 | "keep" , |
301 | "keeps" , |
302 | "kept" , |
303 | "know" , |
304 | "knows" , |
305 | "known" , |
306 | "last" , |
307 | "lately" , |
308 | "later" , |
309 | "latter" , |
310 | "latterly" , |
311 | "least" , |
312 | "less" , |
313 | "lest" , |
314 | "let" , |
315 | "let's" , |
316 | "like" , |
317 | "liked" , |
318 | "likely" , |
319 | "little" , |
320 | "look" , |
321 | "looking" , |
322 | "looks" , |
323 | "ltd" , |
324 | "mainly" , |
325 | "many" , |
326 | "may" , |
327 | "maybe" , |
328 | "me" , |
329 | "mean" , |
330 | "meanwhile" , |
331 | "merely" , |
332 | "might" , |
333 | "more" , |
334 | "moreover" , |
335 | "most" , |
336 | "mostly" , |
337 | "much" , |
338 | "must" , |
339 | "my" , |
340 | "myself" , |
341 | "name" , |
342 | "namely" , |
343 | "nd" , |
344 | "near" , |
345 | "nearly" , |
346 | "necessary" , |
347 | "need" , |
348 | "needs" , |
349 | "neither" , |
350 | "never" , |
351 | "nevertheless" , |
352 | "new" , |
353 | "next" , |
354 | "nine" , |
355 | "no" , |
356 | "nobody" , |
357 | "non" , |
358 | "none" , |
359 | "noone" , |
360 | "nor" , |
361 | "normally" , |
362 | "not" , |
363 | "nothing" , |
364 | "novel" , |
365 | "now" , |
366 | "nowhere" , |
367 | "obviously" , |
368 | "of" , |
369 | "off" , |
370 | "often" , |
371 | "oh" , |
372 | "ok" , |
373 | "okay" , |
374 | "old" , |
375 | "on" , |
376 | "once" , |
377 | "one" , |
378 | "ones" , |
379 | "only" , |
380 | "onto" , |
381 | "or" , |
382 | "other" , |
383 | "others" , |
384 | "otherwise" , |
385 | "ought" , |
386 | "our" , |
387 | "ours" , |
388 | "ourselves" , |
389 | "out" , |
390 | "outside" , |
391 | "over" , |
392 | "overall" , |
393 | "own" , |
394 | "particular" , |
395 | "particularly" , |
396 | "per" , |
397 | "perhaps" , |
398 | "placed" , |
399 | "please" , |
400 | "plus" , |
401 | "possible" , |
402 | "presumably" , |
403 | "probably" , |
404 | "provides" , |
405 | "que" , |
406 | "quite" , |
407 | "qv" , |
408 | "rather" , |
409 | "rd" , |
410 | "re" , |
411 | "really" , |
412 | "reasonably" , |
413 | "regarding" , |
414 | "regardless" , |
415 | "regards" , |
416 | "relatively" , |
417 | "respectively" , |
418 | "right" , |
419 | "said" , |
420 | "same" , |
421 | "saw" , |
422 | "say" , |
423 | "saying" , |
424 | "says" , |
425 | "second" , |
426 | "secondly" , |
427 | "see" , |
428 | "seeing" , |
429 | "seem" , |
430 | "seemed" , |
431 | "seeming" , |
432 | "seems" , |
433 | "seen" , |
434 | "self" , |
435 | "selves" , |
436 | "sensible" , |
437 | "sent" , |
438 | "serious" , |
439 | "seriously" , |
440 | "seven" , |
441 | "several" , |
442 | "shall" , |
443 | "she" , |
444 | "should" , |
445 | "shouldn't" , |
446 | "since" , |
447 | "six" , |
448 | "so" , |
449 | "some" , |
450 | "somebody" , |
451 | "somehow" , |
452 | "someone" , |
453 | "something" , |
454 | "sometime" , |
455 | "sometimes" , |
456 | "somewhat" , |
457 | "somewhere" , |
458 | "soon" , |
459 | "sorry" , |
460 | "specified" , |
461 | "specify" , |
462 | "specifying" , |
463 | "still" , |
464 | "sub" , |
465 | "such" , |
466 | "sup" , |
467 | "sure" , |
468 | "t's" , |
469 | "take" , |
470 | "taken" , |
471 | "tell" , |
472 | "tends" , |
473 | "th" , |
474 | "than" , |
475 | "thank" , |
476 | "thanks" , |
477 | "thanx" , |
478 | "that" , |
479 | "that's" , |
480 | "thats" , |
481 | "the" , |
482 | "their" , |
483 | "theirs" , |
484 | "them" , |
485 | "themselves" , |
486 | "then" , |
487 | "thence" , |
488 | "there" , |
489 | "there's" , |
490 | "thereafter" , |
491 | "thereby" , |
492 | "therefore" , |
493 | "therein" , |
494 | "theres" , |
495 | "thereupon" , |
496 | "these" , |
497 | "they" , |
498 | "they'd" , |
499 | "they'll" , |
500 | "they're" , |
501 | "they've" , |
502 | "think" , |
503 | "third" , |
504 | "this" , |
505 | "thorough" , |
506 | "thoroughly" , |
507 | "those" , |
508 | "though" , |
509 | "three" , |
510 | "through" , |
511 | "throughout" , |
512 | "thru" , |
513 | "thus" , |
514 | "to" , |
515 | "together" , |
516 | "too" , |
517 | "took" , |
518 | "toward" , |
519 | "towards" , |
520 | "tried" , |
521 | "tries" , |
522 | "truly" , |
523 | "try" , |
524 | "trying" , |
525 | "twice" , |
526 | "two" , |
527 | "un" , |
528 | "under" , |
529 | "unfortunately" , |
530 | "unless" , |
531 | "unlikely" , |
532 | "until" , |
533 | "unto" , |
534 | "up" , |
535 | "upon" , |
536 | "us" , |
537 | "use" , |
538 | "used" , |
539 | "useful" , |
540 | "uses" , |
541 | "using" , |
542 | "usually" , |
543 | "value" , |
544 | "various" , |
545 | "very" , |
546 | "via" , |
547 | "viz" , |
548 | "vs" , |
549 | "want" , |
550 | "wants" , |
551 | "was" , |
552 | "wasn't" , |
553 | "way" , |
554 | "we" , |
555 | "we'd" , |
556 | "we'll" , |
557 | "we're" , |
558 | "we've" , |
559 | "welcome" , |
560 | "well" , |
561 | "went" , |
562 | "were" , |
563 | "weren't" , |
564 | "what" , |
565 | "what's" , |
566 | "whatever" , |
567 | "when" , |
568 | "whence" , |
569 | "whenever" , |
570 | "where" , |
571 | "where's" , |
572 | "whereafter" , |
573 | "whereas" , |
574 | "whereby" , |
575 | "wherein" , |
576 | "whereupon" , |
577 | "wherever" , |
578 | "whether" , |
579 | "which" , |
580 | "while" , |
581 | "whither" , |
582 | "who" , |
583 | "who's" , |
584 | "whoever" , |
585 | "whole" , |
586 | "whom" , |
587 | "whose" , |
588 | "why" , |
589 | "will" , |
590 | "willing" , |
591 | "wish" , |
592 | "with" , |
593 | "within" , |
594 | "without" , |
595 | "won't" , |
596 | "wonder" , |
597 | "would" , |
598 | "wouldn't" , |
599 | "yes" , |
600 | "yet" , |
601 | "you" , |
602 | "you'd" , |
603 | "you'll" , |
604 | "you're" , |
605 | "you've" , |
606 | "your" , |
607 | "yours" , |
608 | "yourself" , |
609 | "yourselves" , |
610 | "zero" , |
611 | #endif |
612 | |
613 | NULL }; |
614 | |
615 | static int ft_default_parser_parse(MYSQL_FTPARSER_PARAM *param) |
616 | { |
617 | return param->mysql_parse(param, param->doc, param->length); |
618 | } |
619 | |
620 | struct st_mysql_ftparser ft_default_parser= |
621 | { |
622 | MYSQL_FTPARSER_INTERFACE_VERSION, ft_default_parser_parse, 0, 0 |
623 | }; |
624 | |
625 | |