1 | /* Copyright (c) 2000, 2010, Oracle and/or its affiliates |
2 | |
3 | This program is free software; you can redistribute it and/or modify |
4 | it under the terms of the GNU General Public License as published by |
5 | the Free Software Foundation; version 2 of the License. |
6 | |
7 | This program is distributed in the hope that it will be useful, |
8 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
10 | GNU General Public License for more details. |
11 | |
12 | You should have received a copy of the GNU General Public License |
13 | along with this program; if not, write to the Free Software |
14 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ |
15 | |
16 | /* Written by Sergei A. Golubchik, who has a shared copyright to this code */ |
17 | |
18 | #include "ftdefs.h" |
19 | #include "my_compare.h" |
20 | |
21 | |
22 | static CHARSET_INFO *ft_stopword_cs= NULL; |
23 | |
24 | |
25 | typedef struct st_ft_stopwords |
26 | { |
27 | const char * pos; |
28 | uint len; |
29 | } FT_STOPWORD; |
30 | |
31 | static TREE *stopwords3=NULL; |
32 | |
33 | static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)), |
34 | FT_STOPWORD *w1, FT_STOPWORD *w2) |
35 | { |
36 | return ha_compare_text(ft_stopword_cs, |
37 | (uchar *)w1->pos,w1->len, |
38 | (uchar *)w2->pos,w2->len,0); |
39 | } |
40 | |
41 | static int FT_STOPWORD_free(FT_STOPWORD *w, TREE_FREE action, |
42 | void *arg __attribute__((unused))) |
43 | { |
44 | if (action == free_free) |
45 | my_free((void*)w->pos); |
46 | return 0; |
47 | } |
48 | |
49 | static int ft_add_stopword(const char *w) |
50 | { |
51 | FT_STOPWORD sw; |
52 | return !w || |
53 | (((sw.len= (uint) strlen(sw.pos=w)) >= ft_min_word_len) && |
54 | (tree_insert(stopwords3, &sw, 0, stopwords3->custom_arg)==NULL)); |
55 | } |
56 | |
57 | int ft_init_stopwords() |
58 | { |
59 | DBUG_ENTER("ft_init_stopwords" ); |
60 | if (!stopwords3) |
61 | { |
62 | if (!(stopwords3=(TREE *)my_malloc(sizeof(TREE),MYF(0)))) |
63 | DBUG_RETURN(-1); |
64 | init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp, |
65 | (ft_stopword_file ? (tree_element_free)&FT_STOPWORD_free : 0), |
66 | NULL, MYF(0)); |
67 | /* |
68 | Stopword engine currently does not support tricky |
69 | character sets UCS2, UTF16, UTF32. |
70 | Use latin1 to compare stopwords in case of these character sets. |
71 | It's also fine to use latin1 with the built-in stopwords. |
72 | */ |
73 | ft_stopword_cs= default_charset_info->mbminlen == 1 ? |
74 | default_charset_info : &my_charset_latin1; |
75 | } |
76 | |
77 | if (ft_stopword_file) |
78 | { |
79 | File fd; |
80 | size_t len; |
81 | uchar *buffer, *start, *end; |
82 | FT_WORD w; |
83 | int error=-1; |
84 | |
85 | if (!*ft_stopword_file) |
86 | DBUG_RETURN(0); |
87 | |
88 | if ((fd=my_open(ft_stopword_file, O_RDONLY, MYF(MY_WME))) == -1) |
89 | DBUG_RETURN(-1); |
90 | len=(size_t)my_seek(fd, 0L, MY_SEEK_END, MYF(0)); |
91 | my_seek(fd, 0L, MY_SEEK_SET, MYF(0)); |
92 | if (!(start=buffer=my_malloc(len+1, MYF(MY_WME)))) |
93 | goto err0; |
94 | len=my_read(fd, buffer, len, MYF(MY_WME)); |
95 | end=start+len; |
96 | while (ft_simple_get_word(ft_stopword_cs, &start, end, &w, TRUE)) |
97 | { |
98 | if (ft_add_stopword(my_strndup((char*) w.pos, w.len, MYF(0)))) |
99 | goto err1; |
100 | } |
101 | error=0; |
102 | err1: |
103 | my_free(buffer); |
104 | err0: |
105 | my_close(fd, MYF(MY_WME)); |
106 | DBUG_RETURN(error); |
107 | } |
108 | else |
109 | { |
110 | /* compatibility mode: to be removed */ |
111 | char **sws=(char **)ft_precompiled_stopwords; |
112 | |
113 | for (;*sws;sws++) |
114 | { |
115 | if (ft_add_stopword(*sws)) |
116 | DBUG_RETURN(-1); |
117 | } |
118 | ft_stopword_file="(built-in)" ; /* for SHOW VARIABLES */ |
119 | } |
120 | DBUG_RETURN(0); |
121 | } |
122 | |
123 | int is_stopword(const char *word, size_t len) |
124 | { |
125 | FT_STOPWORD sw; |
126 | sw.pos=word; |
127 | sw.len=(uint)len; |
128 | return tree_search(stopwords3,&sw, stopwords3->custom_arg) != NULL; |
129 | } |
130 | |
131 | |
132 | void ft_free_stopwords() |
133 | { |
134 | DBUG_ENTER("ft_free_stopwords" ); |
135 | |
136 | if (stopwords3) |
137 | { |
138 | delete_tree(stopwords3, 0); /* purecov: inspected */ |
139 | my_free(stopwords3); |
140 | stopwords3=0; |
141 | } |
142 | ft_stopword_file= 0; |
143 | DBUG_VOID_RETURN; |
144 | } |
145 | |