| 1 | /* Copyright (c) 2000, 2010, Oracle and/or its affiliates |
| 2 | |
| 3 | This program is free software; you can redistribute it and/or modify |
| 4 | it under the terms of the GNU General Public License as published by |
| 5 | the Free Software Foundation; version 2 of the License. |
| 6 | |
| 7 | This program is distributed in the hope that it will be useful, |
| 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 10 | GNU General Public License for more details. |
| 11 | |
| 12 | You should have received a copy of the GNU General Public License |
| 13 | along with this program; if not, write to the Free Software |
| 14 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ |
| 15 | |
| 16 | /* Written by Sergei A. Golubchik, who has a shared copyright to this code */ |
| 17 | |
| 18 | #include "ftdefs.h" |
| 19 | #include "my_compare.h" |
| 20 | |
| 21 | |
| 22 | static CHARSET_INFO *ft_stopword_cs= NULL; |
| 23 | |
| 24 | |
| 25 | typedef struct st_ft_stopwords |
| 26 | { |
| 27 | const char * pos; |
| 28 | uint len; |
| 29 | } FT_STOPWORD; |
| 30 | |
| 31 | static TREE *stopwords3=NULL; |
| 32 | |
| 33 | static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)), |
| 34 | FT_STOPWORD *w1, FT_STOPWORD *w2) |
| 35 | { |
| 36 | return ha_compare_text(ft_stopword_cs, |
| 37 | (uchar *)w1->pos,w1->len, |
| 38 | (uchar *)w2->pos,w2->len,0); |
| 39 | } |
| 40 | |
| 41 | static int FT_STOPWORD_free(FT_STOPWORD *w, TREE_FREE action, |
| 42 | void *arg __attribute__((unused))) |
| 43 | { |
| 44 | if (action == free_free) |
| 45 | my_free((void*)w->pos); |
| 46 | return 0; |
| 47 | } |
| 48 | |
| 49 | static int ft_add_stopword(const char *w) |
| 50 | { |
| 51 | FT_STOPWORD sw; |
| 52 | return !w || |
| 53 | (((sw.len= (uint) strlen(sw.pos=w)) >= ft_min_word_len) && |
| 54 | (tree_insert(stopwords3, &sw, 0, stopwords3->custom_arg)==NULL)); |
| 55 | } |
| 56 | |
| 57 | int ft_init_stopwords() |
| 58 | { |
| 59 | DBUG_ENTER("ft_init_stopwords" ); |
| 60 | if (!stopwords3) |
| 61 | { |
| 62 | if (!(stopwords3=(TREE *)my_malloc(sizeof(TREE),MYF(0)))) |
| 63 | DBUG_RETURN(-1); |
| 64 | init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp, |
| 65 | (ft_stopword_file ? (tree_element_free)&FT_STOPWORD_free : 0), |
| 66 | NULL, MYF(0)); |
| 67 | /* |
| 68 | Stopword engine currently does not support tricky |
| 69 | character sets UCS2, UTF16, UTF32. |
| 70 | Use latin1 to compare stopwords in case of these character sets. |
| 71 | It's also fine to use latin1 with the built-in stopwords. |
| 72 | */ |
| 73 | ft_stopword_cs= default_charset_info->mbminlen == 1 ? |
| 74 | default_charset_info : &my_charset_latin1; |
| 75 | } |
| 76 | |
| 77 | if (ft_stopword_file) |
| 78 | { |
| 79 | File fd; |
| 80 | size_t len; |
| 81 | uchar *buffer, *start, *end; |
| 82 | FT_WORD w; |
| 83 | int error=-1; |
| 84 | |
| 85 | if (!*ft_stopword_file) |
| 86 | DBUG_RETURN(0); |
| 87 | |
| 88 | if ((fd=my_open(ft_stopword_file, O_RDONLY, MYF(MY_WME))) == -1) |
| 89 | DBUG_RETURN(-1); |
| 90 | len=(size_t)my_seek(fd, 0L, MY_SEEK_END, MYF(0)); |
| 91 | my_seek(fd, 0L, MY_SEEK_SET, MYF(0)); |
| 92 | if (!(start=buffer=my_malloc(len+1, MYF(MY_WME)))) |
| 93 | goto err0; |
| 94 | len=my_read(fd, buffer, len, MYF(MY_WME)); |
| 95 | end=start+len; |
| 96 | while (ft_simple_get_word(ft_stopword_cs, &start, end, &w, TRUE)) |
| 97 | { |
| 98 | if (ft_add_stopword(my_strndup((char*) w.pos, w.len, MYF(0)))) |
| 99 | goto err1; |
| 100 | } |
| 101 | error=0; |
| 102 | err1: |
| 103 | my_free(buffer); |
| 104 | err0: |
| 105 | my_close(fd, MYF(MY_WME)); |
| 106 | DBUG_RETURN(error); |
| 107 | } |
| 108 | else |
| 109 | { |
| 110 | /* compatibility mode: to be removed */ |
| 111 | char **sws=(char **)ft_precompiled_stopwords; |
| 112 | |
| 113 | for (;*sws;sws++) |
| 114 | { |
| 115 | if (ft_add_stopword(*sws)) |
| 116 | DBUG_RETURN(-1); |
| 117 | } |
| 118 | ft_stopword_file="(built-in)" ; /* for SHOW VARIABLES */ |
| 119 | } |
| 120 | DBUG_RETURN(0); |
| 121 | } |
| 122 | |
| 123 | int is_stopword(const char *word, size_t len) |
| 124 | { |
| 125 | FT_STOPWORD sw; |
| 126 | sw.pos=word; |
| 127 | sw.len=(uint)len; |
| 128 | return tree_search(stopwords3,&sw, stopwords3->custom_arg) != NULL; |
| 129 | } |
| 130 | |
| 131 | |
| 132 | void ft_free_stopwords() |
| 133 | { |
| 134 | DBUG_ENTER("ft_free_stopwords" ); |
| 135 | |
| 136 | if (stopwords3) |
| 137 | { |
| 138 | delete_tree(stopwords3, 0); /* purecov: inspected */ |
| 139 | my_free(stopwords3); |
| 140 | stopwords3=0; |
| 141 | } |
| 142 | ft_stopword_file= 0; |
| 143 | DBUG_VOID_RETURN; |
| 144 | } |
| 145 | |