1/*
2 Copyright (c) 2001, 2010, Oracle and/or its affiliates
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
16
17/* Written by Sergei A. Golubchik, who has a shared copyright to this code
18 added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */
19
20#include "ftdefs.h"
21#include <my_getopt.h>
22
23static void usage();
24static void complain(int val);
25static my_bool get_one_option(int, const struct my_option *, char *);
26
27static int count=0, stats=0, dump=0, lstats=0;
28static my_bool verbose;
29static char *query=NULL;
30static uint lengths[256];
31
32#define MAX_LEN (HA_FT_MAXBYTELEN+10)
33#define HOW_OFTEN_TO_WRITE 10000
34
35static struct my_option my_long_options[] =
36{
37 {"help", 'h', "Display help and exit.",
38 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
39 {"help", '?', "Synonym for -h.",
40 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
41 {"count", 'c', "Calculate per-word stats (counts and global weights).",
42 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
43 {"dump", 'd', "Dump index (incl. data offsets and word weights).",
44 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
45 {"length", 'l', "Report length distribution.",
46 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
47 {"stats", 's', "Report global stats.",
48 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
49 {"verbose", 'v', "Be verbose.",
50 &verbose, &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
51 { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
52};
53
54
55int main(int argc,char *argv[])
56{
57 int error=0;
58 uint keylen, keylen2=0, inx, doc_cnt=0;
59 float weight= 1.0;
60 double gws, min_gws=0, avg_gws=0;
61 MI_INFO *info;
62 char buf[MAX_LEN], buf2[MAX_LEN], buf_maxlen[MAX_LEN], buf_min_gws[MAX_LEN];
63 ulong total=0, maxlen=0, uniq=0, max_doc_cnt=0;
64 struct { MI_INFO *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */
65
66 MY_INIT(argv[0]);
67 if ((error= handle_options(&argc, &argv, my_long_options, get_one_option)))
68 exit(error);
69 if (count || dump)
70 verbose=0;
71 if (!count && !dump && !lstats && !query)
72 stats=1;
73
74 if (verbose)
75 setbuf(stdout,NULL);
76
77 if (argc < 2)
78 usage();
79
80 {
81 char *end;
82 inx= (uint) strtoll(argv[1], &end, 10);
83 if (*end)
84 usage();
85 }
86
87 init_key_cache(dflt_key_cache, MI_KEY_BLOCK_LENGTH, KEY_BUFFER_INIT, 0, 0, 0, 0);
88
89 if (!(info=mi_open(argv[0], O_RDONLY,
90 HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER)))
91 {
92 error=my_errno;
93 goto err;
94 }
95
96 *buf2=0;
97 aio->info=info;
98
99 if ((inx >= info->s->base.keys) ||
100 !(info->s->keyinfo[inx].flag & HA_FULLTEXT))
101 {
102 printf("Key %d in table %s is not a FULLTEXT key\n", inx, info->filename);
103 goto err;
104 }
105
106 mi_lock_database(info, F_EXTRA_LCK);
107
108 info->lastpos= HA_OFFSET_ERROR;
109 info->update|= HA_STATE_PREV_FOUND;
110
111 while (!(error=mi_rnext(info,NULL,inx)))
112 {
113 FT_WEIGTH subkeys;
114 keylen=*(info->lastkey);
115
116 subkeys.i =ft_sintXkorr(info->lastkey+keylen+1);
117 if (subkeys.i >= 0)
118 weight= subkeys.f;
119
120#ifdef HAVE_SNPRINTF
121 snprintf(buf,MAX_LEN,"%.*s",(int) keylen,info->lastkey+1);
122#else
123 sprintf(buf,"%.*s",(int) keylen,info->lastkey+1);
124#endif
125 my_casedn_str(default_charset_info,buf);
126 total++;
127 lengths[keylen]++;
128
129 if (count || stats)
130 {
131 if (strcmp(buf, buf2))
132 {
133 if (*buf2)
134 {
135 uniq++;
136 avg_gws+=gws=GWS_IN_USE;
137 if (count)
138 printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
139 if (maxlen<keylen2)
140 {
141 maxlen=keylen2;
142 strmov(buf_maxlen, buf2);
143 }
144 if (max_doc_cnt < doc_cnt)
145 {
146 max_doc_cnt=doc_cnt;
147 strmov(buf_min_gws, buf2);
148 min_gws=gws;
149 }
150 }
151 strmov(buf2, buf);
152 keylen2=keylen;
153 doc_cnt=0;
154 }
155 doc_cnt+= (subkeys.i >= 0 ? 1 : -subkeys.i);
156 }
157 if (dump)
158 {
159 if (subkeys.i >= 0)
160 printf("%9lx %20.7f %s\n", (long) info->lastpos,weight,buf);
161 else
162 printf("%9lx => %17d %s\n",(long) info->lastpos,-subkeys.i,buf);
163 }
164 if (verbose && (total%HOW_OFTEN_TO_WRITE)==0)
165 printf("%10ld\r",total);
166 }
167 mi_lock_database(info, F_UNLCK);
168
169 if (count || stats)
170 {
171 if (*buf2)
172 {
173 uniq++;
174 avg_gws+=gws=GWS_IN_USE;
175 if (count)
176 printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
177 if (maxlen<keylen2)
178 {
179 maxlen=keylen2;
180 strmov(buf_maxlen, buf2);
181 }
182 if (max_doc_cnt < doc_cnt)
183 {
184 max_doc_cnt=doc_cnt;
185 strmov(buf_min_gws, buf2);
186 min_gws=gws;
187 }
188 }
189 }
190
191 if (stats)
192 {
193 count=0;
194 for (inx=0;inx<256;inx++)
195 {
196 count+=lengths[inx];
197 if ((ulong) count >= total/2)
198 break;
199 }
200 printf("Total rows: %lu\nTotal words: %lu\n"
201 "Unique words: %lu\nLongest word: %lu chars (%s)\n"
202 "Median length: %u\n"
203 "Average global weight: %f\n"
204 "Most common word: %lu times, weight: %f (%s)\n",
205 (long) info->state->records, total, uniq, maxlen, buf_maxlen,
206 inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws);
207 }
208 if (lstats)
209 {
210 count=0;
211 for (inx=0; inx<256; inx++)
212 {
213 count+=lengths[inx];
214 if (count && lengths[inx])
215 printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx,
216 (ulong) lengths[inx],100.0*lengths[inx]/total,(ulong) count,
217 100.0*count/total);
218 }
219 }
220
221err:
222 if (error && error != HA_ERR_END_OF_FILE)
223 printf("got error %d\n",my_errno);
224 if (info)
225 mi_close(info);
226 return 0;
227}
228
229
230static my_bool
231get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
232 char *argument __attribute__((unused)))
233{
234 switch(optid) {
235 case 'd':
236 dump=1;
237 complain(count || query);
238 break;
239 case 's':
240 stats=1;
241 complain(query!=0);
242 break;
243 case 'c':
244 count= 1;
245 complain(dump || query);
246 break;
247 case 'l':
248 lstats=1;
249 complain(query!=0);
250 break;
251 case '?':
252 case 'h':
253 usage();
254 }
255 return 0;
256}
257
258
259static void usage()
260{
261 printf("Use: myisam_ftdump <table_name> <index_num>\n");
262 my_print_help(my_long_options);
263 my_print_variables(my_long_options);
264 exit(1);
265}
266
267
268static void complain(int val) /* Kinda assert :-) */
269{
270 if (val)
271 {
272 printf("You cannot use these options together!\n");
273 exit(1);
274 }
275}
276
277#include "mi_extrafunc.h"
278