1/* Copyright (c) 2000-2008 MySQL AB, 2009 Sun Microsystems, Inc.
2 Use is subject to license terms.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
16
17/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
18
19#include "ftdefs.h"
20
21ulong ft_min_word_len= 4;
22ulong ft_max_word_len= HA_FT_MAXCHARLEN;
23ulong ft_query_expansion_limit= 5;
24const char *ft_boolean_syntax= DEFAULT_FTB_SYNTAX;
25
26const HA_KEYSEG ft_keysegs[FT_SEGS]= {
27{
28 0, /* charset */
29 HA_FT_WLEN, /* start */
30 0, /* null_pos */
31 0, /* Bit pos */
32 HA_VAR_LENGTH_PART | HA_PACK_KEY, /* flag */
33 HA_FT_MAXBYTELEN, /* length */
34 63, /* language (will be overwritten) */
35 HA_KEYTYPE_VARTEXT2, /* type */
36 0, /* null_bit */
37 2, 0 /* bit_start, bit_length */
38},
39{
40 /*
41 Note, this (and the last HA_KEYTYPE_END) segment should NOT
42 be packed in any way, otherwise w_search() won't be able to
43 update key entry 'in vivo'
44 */
45 0, 0, 0, 0, HA_NO_SORT, HA_FT_WLEN, 63, HA_FT_WTYPE, 0, 0, 0
46}
47};
48
49const struct _ft_vft _ft_vft_nlq= {
50 ft_nlq_read_next, ft_nlq_find_relevance, ft_nlq_close_search,
51 ft_nlq_get_relevance, ft_nlq_reinit_search
52};
53const struct _ft_vft _ft_vft_boolean= {
54 ft_boolean_read_next, ft_boolean_find_relevance, ft_boolean_close_search,
55 ft_boolean_get_relevance, ft_boolean_reinit_search
56};
57
58const char *ft_stopword_file= 0;
59const char *ft_precompiled_stopwords[]= {
60
61#ifdef COMPILE_STOPWORDS_IN
62
63/* This particular stopword list was taken from SMART distribution
64 ftp://ftp.cs.cornell.edu/pub/smart/smart.11.0.tar.Z
65 it was slightly modified to my taste, though
66 */
67
68 "a's",
69 "able",
70 "about",
71 "above",
72 "according",
73 "accordingly",
74 "across",
75 "actually",
76 "after",
77 "afterwards",
78 "again",
79 "against",
80 "ain't",
81 "all",
82 "allow",
83 "allows",
84 "almost",
85 "alone",
86 "along",
87 "already",
88 "also",
89 "although",
90 "always",
91 "am",
92 "among",
93 "amongst",
94 "an",
95 "and",
96 "another",
97 "any",
98 "anybody",
99 "anyhow",
100 "anyone",
101 "anything",
102 "anyway",
103 "anyways",
104 "anywhere",
105 "apart",
106 "appear",
107 "appreciate",
108 "appropriate",
109 "are",
110 "aren't",
111 "around",
112 "as",
113 "aside",
114 "ask",
115 "asking",
116 "associated",
117 "at",
118 "available",
119 "away",
120 "awfully",
121 "be",
122 "became",
123 "because",
124 "become",
125 "becomes",
126 "becoming",
127 "been",
128 "before",
129 "beforehand",
130 "behind",
131 "being",
132 "believe",
133 "below",
134 "beside",
135 "besides",
136 "best",
137 "better",
138 "between",
139 "beyond",
140 "both",
141 "brief",
142 "but",
143 "by",
144 "c'mon",
145 "c's",
146 "came",
147 "can",
148 "can't",
149 "cannot",
150 "cant",
151 "cause",
152 "causes",
153 "certain",
154 "certainly",
155 "changes",
156 "clearly",
157 "co",
158 "com",
159 "come",
160 "comes",
161 "concerning",
162 "consequently",
163 "consider",
164 "considering",
165 "contain",
166 "containing",
167 "contains",
168 "corresponding",
169 "could",
170 "couldn't",
171 "course",
172 "currently",
173 "definitely",
174 "described",
175 "despite",
176 "did",
177 "didn't",
178 "different",
179 "do",
180 "does",
181 "doesn't",
182 "doing",
183 "don't",
184 "done",
185 "down",
186 "downwards",
187 "during",
188 "each",
189 "edu",
190 "eg",
191 "eight",
192 "either",
193 "else",
194 "elsewhere",
195 "enough",
196 "entirely",
197 "especially",
198 "et",
199 "etc",
200 "even",
201 "ever",
202 "every",
203 "everybody",
204 "everyone",
205 "everything",
206 "everywhere",
207 "ex",
208 "exactly",
209 "example",
210 "except",
211 "far",
212 "few",
213 "fifth",
214 "first",
215 "five",
216 "followed",
217 "following",
218 "follows",
219 "for",
220 "former",
221 "formerly",
222 "forth",
223 "four",
224 "from",
225 "further",
226 "furthermore",
227 "get",
228 "gets",
229 "getting",
230 "given",
231 "gives",
232 "go",
233 "goes",
234 "going",
235 "gone",
236 "got",
237 "gotten",
238 "greetings",
239 "had",
240 "hadn't",
241 "happens",
242 "hardly",
243 "has",
244 "hasn't",
245 "have",
246 "haven't",
247 "having",
248 "he",
249 "he's",
250 "hello",
251 "help",
252 "hence",
253 "her",
254 "here",
255 "here's",
256 "hereafter",
257 "hereby",
258 "herein",
259 "hereupon",
260 "hers",
261 "herself",
262 "hi",
263 "him",
264 "himself",
265 "his",
266 "hither",
267 "hopefully",
268 "how",
269 "howbeit",
270 "however",
271 "i'd",
272 "i'll",
273 "i'm",
274 "i've",
275 "ie",
276 "if",
277 "ignored",
278 "immediate",
279 "in",
280 "inasmuch",
281 "inc",
282 "indeed",
283 "indicate",
284 "indicated",
285 "indicates",
286 "inner",
287 "insofar",
288 "instead",
289 "into",
290 "inward",
291 "is",
292 "isn't",
293 "it",
294 "it'd",
295 "it'll",
296 "it's",
297 "its",
298 "itself",
299 "just",
300 "keep",
301 "keeps",
302 "kept",
303 "know",
304 "knows",
305 "known",
306 "last",
307 "lately",
308 "later",
309 "latter",
310 "latterly",
311 "least",
312 "less",
313 "lest",
314 "let",
315 "let's",
316 "like",
317 "liked",
318 "likely",
319 "little",
320 "look",
321 "looking",
322 "looks",
323 "ltd",
324 "mainly",
325 "many",
326 "may",
327 "maybe",
328 "me",
329 "mean",
330 "meanwhile",
331 "merely",
332 "might",
333 "more",
334 "moreover",
335 "most",
336 "mostly",
337 "much",
338 "must",
339 "my",
340 "myself",
341 "name",
342 "namely",
343 "nd",
344 "near",
345 "nearly",
346 "necessary",
347 "need",
348 "needs",
349 "neither",
350 "never",
351 "nevertheless",
352 "new",
353 "next",
354 "nine",
355 "no",
356 "nobody",
357 "non",
358 "none",
359 "noone",
360 "nor",
361 "normally",
362 "not",
363 "nothing",
364 "novel",
365 "now",
366 "nowhere",
367 "obviously",
368 "of",
369 "off",
370 "often",
371 "oh",
372 "ok",
373 "okay",
374 "old",
375 "on",
376 "once",
377 "one",
378 "ones",
379 "only",
380 "onto",
381 "or",
382 "other",
383 "others",
384 "otherwise",
385 "ought",
386 "our",
387 "ours",
388 "ourselves",
389 "out",
390 "outside",
391 "over",
392 "overall",
393 "own",
394 "particular",
395 "particularly",
396 "per",
397 "perhaps",
398 "placed",
399 "please",
400 "plus",
401 "possible",
402 "presumably",
403 "probably",
404 "provides",
405 "que",
406 "quite",
407 "qv",
408 "rather",
409 "rd",
410 "re",
411 "really",
412 "reasonably",
413 "regarding",
414 "regardless",
415 "regards",
416 "relatively",
417 "respectively",
418 "right",
419 "said",
420 "same",
421 "saw",
422 "say",
423 "saying",
424 "says",
425 "second",
426 "secondly",
427 "see",
428 "seeing",
429 "seem",
430 "seemed",
431 "seeming",
432 "seems",
433 "seen",
434 "self",
435 "selves",
436 "sensible",
437 "sent",
438 "serious",
439 "seriously",
440 "seven",
441 "several",
442 "shall",
443 "she",
444 "should",
445 "shouldn't",
446 "since",
447 "six",
448 "so",
449 "some",
450 "somebody",
451 "somehow",
452 "someone",
453 "something",
454 "sometime",
455 "sometimes",
456 "somewhat",
457 "somewhere",
458 "soon",
459 "sorry",
460 "specified",
461 "specify",
462 "specifying",
463 "still",
464 "sub",
465 "such",
466 "sup",
467 "sure",
468 "t's",
469 "take",
470 "taken",
471 "tell",
472 "tends",
473 "th",
474 "than",
475 "thank",
476 "thanks",
477 "thanx",
478 "that",
479 "that's",
480 "thats",
481 "the",
482 "their",
483 "theirs",
484 "them",
485 "themselves",
486 "then",
487 "thence",
488 "there",
489 "there's",
490 "thereafter",
491 "thereby",
492 "therefore",
493 "therein",
494 "theres",
495 "thereupon",
496 "these",
497 "they",
498 "they'd",
499 "they'll",
500 "they're",
501 "they've",
502 "think",
503 "third",
504 "this",
505 "thorough",
506 "thoroughly",
507 "those",
508 "though",
509 "three",
510 "through",
511 "throughout",
512 "thru",
513 "thus",
514 "to",
515 "together",
516 "too",
517 "took",
518 "toward",
519 "towards",
520 "tried",
521 "tries",
522 "truly",
523 "try",
524 "trying",
525 "twice",
526 "two",
527 "un",
528 "under",
529 "unfortunately",
530 "unless",
531 "unlikely",
532 "until",
533 "unto",
534 "up",
535 "upon",
536 "us",
537 "use",
538 "used",
539 "useful",
540 "uses",
541 "using",
542 "usually",
543 "value",
544 "various",
545 "very",
546 "via",
547 "viz",
548 "vs",
549 "want",
550 "wants",
551 "was",
552 "wasn't",
553 "way",
554 "we",
555 "we'd",
556 "we'll",
557 "we're",
558 "we've",
559 "welcome",
560 "well",
561 "went",
562 "were",
563 "weren't",
564 "what",
565 "what's",
566 "whatever",
567 "when",
568 "whence",
569 "whenever",
570 "where",
571 "where's",
572 "whereafter",
573 "whereas",
574 "whereby",
575 "wherein",
576 "whereupon",
577 "wherever",
578 "whether",
579 "which",
580 "while",
581 "whither",
582 "who",
583 "who's",
584 "whoever",
585 "whole",
586 "whom",
587 "whose",
588 "why",
589 "will",
590 "willing",
591 "wish",
592 "with",
593 "within",
594 "without",
595 "won't",
596 "wonder",
597 "would",
598 "wouldn't",
599 "yes",
600 "yet",
601 "you",
602 "you'd",
603 "you'll",
604 "you're",
605 "you've",
606 "your",
607 "yours",
608 "yourself",
609 "yourselves",
610 "zero",
611#endif
612
613 NULL };
614
615static int ft_default_parser_parse(MYSQL_FTPARSER_PARAM *param)
616{
617 return param->mysql_parse(param, param->doc, param->length);
618}
619
620struct st_mysql_ftparser ft_default_parser=
621{
622 MYSQL_FTPARSER_INTERFACE_VERSION, ft_default_parser_parse, 0, 0
623};
624
625