| 1 | /* Copyright (c) 2000, 2010, Oracle and/or its affiliates | 
|---|
| 2 |  | 
|---|
| 3 | This program is free software; you can redistribute it and/or modify | 
|---|
| 4 | it under the terms of the GNU General Public License as published by | 
|---|
| 5 | the Free Software Foundation; version 2 of the License. | 
|---|
| 6 |  | 
|---|
| 7 | This program is distributed in the hope that it will be useful, | 
|---|
| 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
| 10 | GNU General Public License for more details. | 
|---|
| 11 |  | 
|---|
| 12 | You should have received a copy of the GNU General Public License | 
|---|
| 13 | along with this program; if not, write to the Free Software | 
|---|
| 14 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA */ | 
|---|
| 15 |  | 
|---|
| 16 | /* Written by Sergei A. Golubchik, who has a shared copyright to this code */ | 
|---|
| 17 |  | 
|---|
| 18 | #include "ftdefs.h" | 
|---|
| 19 | #include "my_compare.h" | 
|---|
| 20 |  | 
|---|
| 21 |  | 
|---|
| 22 | static CHARSET_INFO *ft_stopword_cs= NULL; | 
|---|
| 23 |  | 
|---|
| 24 |  | 
|---|
| 25 | typedef struct st_ft_stopwords | 
|---|
| 26 | { | 
|---|
| 27 | const char * pos; | 
|---|
| 28 | uint   len; | 
|---|
| 29 | } FT_STOPWORD; | 
|---|
| 30 |  | 
|---|
| 31 | static TREE *stopwords3=NULL; | 
|---|
| 32 |  | 
|---|
| 33 | static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)), | 
|---|
| 34 | FT_STOPWORD *w1, FT_STOPWORD *w2) | 
|---|
| 35 | { | 
|---|
| 36 | return ha_compare_text(ft_stopword_cs, | 
|---|
| 37 | (uchar *)w1->pos,w1->len, | 
|---|
| 38 | (uchar *)w2->pos,w2->len,0); | 
|---|
| 39 | } | 
|---|
| 40 |  | 
|---|
| 41 | static int FT_STOPWORD_free(FT_STOPWORD *w, TREE_FREE action, | 
|---|
| 42 | void *arg __attribute__((unused))) | 
|---|
| 43 | { | 
|---|
| 44 | if (action == free_free) | 
|---|
| 45 | my_free((void*)w->pos); | 
|---|
| 46 | return 0; | 
|---|
| 47 | } | 
|---|
| 48 |  | 
|---|
| 49 | static int ft_add_stopword(const char *w) | 
|---|
| 50 | { | 
|---|
| 51 | FT_STOPWORD sw; | 
|---|
| 52 | return !w || | 
|---|
| 53 | (((sw.len= (uint) strlen(sw.pos=w)) >= ft_min_word_len) && | 
|---|
| 54 | (tree_insert(stopwords3, &sw, 0, stopwords3->custom_arg)==NULL)); | 
|---|
| 55 | } | 
|---|
| 56 |  | 
|---|
| 57 | int ft_init_stopwords() | 
|---|
| 58 | { | 
|---|
| 59 | DBUG_ENTER( "ft_init_stopwords"); | 
|---|
| 60 | if (!stopwords3) | 
|---|
| 61 | { | 
|---|
| 62 | if (!(stopwords3=(TREE *)my_malloc(sizeof(TREE),MYF(0)))) | 
|---|
| 63 | DBUG_RETURN(-1); | 
|---|
| 64 | init_tree(stopwords3,0,0,sizeof(FT_STOPWORD),(qsort_cmp2)&FT_STOPWORD_cmp, | 
|---|
| 65 | (ft_stopword_file ? (tree_element_free)&FT_STOPWORD_free : 0), | 
|---|
| 66 | NULL, MYF(0)); | 
|---|
| 67 | /* | 
|---|
| 68 | Stopword engine currently does not support tricky | 
|---|
| 69 | character sets UCS2, UTF16, UTF32. | 
|---|
| 70 | Use latin1 to compare stopwords in case of these character sets. | 
|---|
| 71 | It's also fine to use latin1 with the built-in stopwords. | 
|---|
| 72 | */ | 
|---|
| 73 | ft_stopword_cs= default_charset_info->mbminlen == 1 ? | 
|---|
| 74 | default_charset_info : &my_charset_latin1; | 
|---|
| 75 | } | 
|---|
| 76 |  | 
|---|
| 77 | if (ft_stopword_file) | 
|---|
| 78 | { | 
|---|
| 79 | File fd; | 
|---|
| 80 | size_t len; | 
|---|
| 81 | uchar *buffer, *start, *end; | 
|---|
| 82 | FT_WORD w; | 
|---|
| 83 | int error=-1; | 
|---|
| 84 |  | 
|---|
| 85 | if (!*ft_stopword_file) | 
|---|
| 86 | DBUG_RETURN(0); | 
|---|
| 87 |  | 
|---|
| 88 | if ((fd=my_open(ft_stopword_file, O_RDONLY, MYF(MY_WME))) == -1) | 
|---|
| 89 | DBUG_RETURN(-1); | 
|---|
| 90 | len=(size_t)my_seek(fd, 0L, MY_SEEK_END, MYF(0)); | 
|---|
| 91 | my_seek(fd, 0L, MY_SEEK_SET, MYF(0)); | 
|---|
| 92 | if (!(start=buffer=my_malloc(len+1, MYF(MY_WME)))) | 
|---|
| 93 | goto err0; | 
|---|
| 94 | len=my_read(fd, buffer, len, MYF(MY_WME)); | 
|---|
| 95 | end=start+len; | 
|---|
| 96 | while (ft_simple_get_word(ft_stopword_cs, &start, end, &w, TRUE)) | 
|---|
| 97 | { | 
|---|
| 98 | if (ft_add_stopword(my_strndup((char*) w.pos, w.len, MYF(0)))) | 
|---|
| 99 | goto err1; | 
|---|
| 100 | } | 
|---|
| 101 | error=0; | 
|---|
| 102 | err1: | 
|---|
| 103 | my_free(buffer); | 
|---|
| 104 | err0: | 
|---|
| 105 | my_close(fd, MYF(MY_WME)); | 
|---|
| 106 | DBUG_RETURN(error); | 
|---|
| 107 | } | 
|---|
| 108 | else | 
|---|
| 109 | { | 
|---|
| 110 | /* compatibility mode: to be removed */ | 
|---|
| 111 | char **sws=(char **)ft_precompiled_stopwords; | 
|---|
| 112 |  | 
|---|
| 113 | for (;*sws;sws++) | 
|---|
| 114 | { | 
|---|
| 115 | if (ft_add_stopword(*sws)) | 
|---|
| 116 | DBUG_RETURN(-1); | 
|---|
| 117 | } | 
|---|
| 118 | ft_stopword_file= "(built-in)"; /* for SHOW VARIABLES */ | 
|---|
| 119 | } | 
|---|
| 120 | DBUG_RETURN(0); | 
|---|
| 121 | } | 
|---|
| 122 |  | 
|---|
| 123 | int is_stopword(const char *word, size_t len) | 
|---|
| 124 | { | 
|---|
| 125 | FT_STOPWORD sw; | 
|---|
| 126 | sw.pos=word; | 
|---|
| 127 | sw.len=(uint)len; | 
|---|
| 128 | return tree_search(stopwords3,&sw, stopwords3->custom_arg) != NULL; | 
|---|
| 129 | } | 
|---|
| 130 |  | 
|---|
| 131 |  | 
|---|
| 132 | void ft_free_stopwords() | 
|---|
| 133 | { | 
|---|
| 134 | DBUG_ENTER( "ft_free_stopwords"); | 
|---|
| 135 |  | 
|---|
| 136 | if (stopwords3) | 
|---|
| 137 | { | 
|---|
| 138 | delete_tree(stopwords3, 0); /* purecov: inspected */ | 
|---|
| 139 | my_free(stopwords3); | 
|---|
| 140 | stopwords3=0; | 
|---|
| 141 | } | 
|---|
| 142 | ft_stopword_file= 0; | 
|---|
| 143 | DBUG_VOID_RETURN; | 
|---|
| 144 | } | 
|---|
| 145 |  | 
|---|