1/* Copyright (c) 2000, 2013, Oracle and/or its affiliates
2 Copyright (c) 2009, 2013, Monty Program Ab.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
16
17/* Pack MyISAM file */
18
19#ifndef USE_MY_FUNC
20#define USE_MY_FUNC /* We need at least my_malloc */
21#endif
22
23#include "myisamdef.h"
24#include "my_default.h"
25#include <queues.h>
26#include <my_tree.h>
27#include "mysys_err.h"
28#ifndef __GNU_LIBRARY__
29#define __GNU_LIBRARY__ /* Skip warnings in getopt.h */
30#endif
31#include <my_getopt.h>
32#include <assert.h>
33
34#if SIZEOF_LONG_LONG > 4
35#define BITS_SAVED 64
36#else
37#define BITS_SAVED 32
38#endif
39
40#define IS_OFFSET ((uint) 32768) /* Bit if offset or char in tree */
41#define HEAD_LENGTH 32
42#define ALLOWED_JOIN_DIFF 256 /* Diff allowed to join trees */
43
44#define DATA_TMP_EXT ".TMD"
45#define OLD_EXT ".OLD"
46#define FRM_EXT ".frm"
47#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE
48
49struct st_file_buffer {
50 File file;
51 uchar *buffer,*pos,*end;
52 my_off_t pos_in_file;
53 int bits;
54 ulonglong bitbucket;
55};
56
57struct st_huff_tree;
58struct st_huff_element;
59
60typedef struct st_huff_counts {
61 uint field_length,max_zero_fill;
62 uint pack_type;
63 uint max_end_space,max_pre_space,length_bits,min_space;
64 ulong max_length;
65 enum en_fieldtype field_type;
66 struct st_huff_tree *tree; /* Tree for field */
67 my_off_t counts[256];
68 my_off_t end_space[8];
69 my_off_t pre_space[8];
70 my_off_t tot_end_space,tot_pre_space,zero_fields,empty_fields,bytes_packed;
71 TREE int_tree; /* Tree for detecting distinct column values. */
72 uchar *tree_buff; /* Column values, 'field_length' each. */
73 uchar *tree_pos; /* Points to end of column values in 'tree_buff'. */
74} HUFF_COUNTS;
75
76typedef struct st_huff_element HUFF_ELEMENT;
77
78/*
79 WARNING: It is crucial for the optimizations in calc_packed_length()
80 that 'count' is the first element of 'HUFF_ELEMENT'.
81*/
82struct st_huff_element {
83 my_off_t count;
84 union un_element {
85 struct st_nod {
86 HUFF_ELEMENT *left,*right;
87 } nod;
88 struct st_leaf {
89 HUFF_ELEMENT *null;
90 uint element_nr; /* Number of element */
91 } leaf;
92 } a;
93};
94
95
96typedef struct st_huff_tree {
97 HUFF_ELEMENT *root,*element_buffer;
98 HUFF_COUNTS *counts;
99 uint tree_number;
100 uint elements;
101 my_off_t bytes_packed;
102 uint tree_pack_length;
103 uint min_chr,max_chr,char_bits,offset_bits,max_offset,height;
104 ulonglong *code;
105 uchar *code_len;
106} HUFF_TREE;
107
108
109typedef struct st_isam_mrg {
110 MI_INFO **file,**current,**end;
111 uint free_file;
112 uint count;
113 uint min_pack_length; /* Theese is used by packed data */
114 uint max_pack_length;
115 uint ref_length;
116 uint max_blob_length;
117 my_off_t records;
118 /* true if at least one source file has at least one disabled index */
119 my_bool src_file_has_indexes_disabled;
120} PACK_MRG_INFO;
121
122
123extern int main(int argc,char * *argv);
124static void get_options(int *argc,char ***argv);
125static MI_INFO *open_isam_file(char *name,int mode);
126static my_bool open_isam_files(PACK_MRG_INFO *mrg,char **names,uint count);
127static int compress(PACK_MRG_INFO *file,char *join_name);
128static int create_dest_frm(char *source_table, char *dest_table);
129static HUFF_COUNTS *init_huff_count(MI_INFO *info,my_off_t records);
130static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees,
131 uint trees,
132 HUFF_COUNTS *huff_counts,
133 uint fields);
134static int compare_tree(void* cmp_arg __attribute__((unused)),
135 const uchar *s,const uchar *t);
136static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts);
137static void check_counts(HUFF_COUNTS *huff_counts,uint trees,
138 my_off_t records);
139static int test_space_compress(HUFF_COUNTS *huff_counts,my_off_t records,
140 uint max_space_length,my_off_t *space_counts,
141 my_off_t tot_space_count,
142 enum en_fieldtype field_type);
143static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts,uint trees);
144static int make_huff_tree(HUFF_TREE *tree,HUFF_COUNTS *huff_counts);
145static int compare_huff_elements(void *not_used, uchar *a,uchar *b);
146static int save_counts_in_queue(uchar *key,element_count count,
147 HUFF_TREE *tree);
148static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,uint flag);
149static uint join_same_trees(HUFF_COUNTS *huff_counts,uint trees);
150static int make_huff_decode_table(HUFF_TREE *huff_tree,uint trees);
151static void make_traverse_code_tree(HUFF_TREE *huff_tree,
152 HUFF_ELEMENT *element,uint size,
153 ulonglong code);
154static int write_header(PACK_MRG_INFO *isam_file, uint header_length,uint trees,
155 my_off_t tot_elements,my_off_t filelength);
156static void write_field_info(HUFF_COUNTS *counts, uint fields,uint trees);
157static my_off_t write_huff_tree(HUFF_TREE *huff_tree,uint trees);
158static uint *make_offset_code_tree(HUFF_TREE *huff_tree,
159 HUFF_ELEMENT *element,
160 uint *offset);
161static uint max_bit(uint value);
162static int compress_isam_file(PACK_MRG_INFO *file,HUFF_COUNTS *huff_counts);
163static char *make_new_name(char *new_name,char *old_name);
164static char *make_old_name(char *new_name,char *old_name);
165static void init_file_buffer(File file,pbool read_buffer);
166static int flush_buffer(ulong neaded_length);
167static void end_file_buffer(void);
168static void write_bits(ulonglong value, uint bits);
169static void flush_bits(void);
170static int save_state(MI_INFO *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length,
171 ha_checksum crc);
172static int save_state_mrg(File file,PACK_MRG_INFO *isam_file,my_off_t new_length,
173 ha_checksum crc);
174static int mrg_close(PACK_MRG_INFO *mrg);
175static int mrg_rrnd(PACK_MRG_INFO *info,uchar *buf);
176static void mrg_reset(PACK_MRG_INFO *mrg);
177#if !defined(DBUG_OFF)
178static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count);
179static int fakecmp(my_off_t **count1, my_off_t **count2);
180#endif
181
182
183static int error_on_write=0,test_only=0,verbose=0,silent=0,
184 write_loop=0,force_pack=0, isamchk_neaded=0;
185static int tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL;
186static my_bool backup, opt_wait;
187/*
188 tree_buff_length is somewhat arbitrary. The bigger it is the better
189 the chance to win in terms of compression factor. On the other hand,
190 this table becomes part of the compressed file header. And its length
191 is coded with 16 bits in the header. Hence the limit is 2**16 - 1.
192*/
193static uint tree_buff_length= 65536 - MALLOC_OVERHEAD;
194static char tmp_dir[FN_REFLEN]={0},*join_table;
195static my_off_t intervall_length;
196static ha_checksum glob_crc;
197static struct st_file_buffer file_buffer;
198static QUEUE queue;
199static HUFF_COUNTS *global_count;
200static char zero_string[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
201static const char *load_default_groups[]= { "myisampack",0 };
202
203 /* The main program */
204
205int main(int argc, char **argv)
206{
207 int error,ok;
208 PACK_MRG_INFO merge;
209 char **default_argv;
210 MY_INIT(argv[0]);
211
212 load_defaults_or_exit("my", load_default_groups, &argc, &argv);
213 default_argv= argv;
214 get_options(&argc,&argv);
215
216 error=ok=isamchk_neaded=0;
217 if (join_table)
218 {
219 /*
220 Join files into one and create FRM file for the compressed table only if
221 the compression succeeds
222 */
223 if (open_isam_files(&merge,argv,(uint) argc) ||
224 compress(&merge, join_table) || create_dest_frm(argv[0], join_table))
225 error=1;
226 }
227 else while (argc--)
228 {
229 MI_INFO *isam_file;
230 if (!(isam_file=open_isam_file(*argv++,O_RDWR)))
231 error=1;
232 else
233 {
234 merge.file= &isam_file;
235 merge.current=0;
236 merge.free_file=0;
237 merge.count=1;
238 if (compress(&merge,0))
239 error=1;
240 else
241 ok=1;
242 }
243 }
244 if (ok && isamchk_neaded && !silent)
245 puts("Remember to run myisamchk -rq on compressed tables");
246 (void) fflush(stdout);
247 (void) fflush(stderr);
248 free_defaults(default_argv);
249 my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
250 exit(error ? 2 : 0);
251#ifndef _lint
252 return 0; /* No compiler warning */
253#endif
254}
255
256enum options_mp {OPT_CHARSETS_DIR_MP=256};
257
258static struct my_option my_long_options[] =
259{
260 {"backup", 'b', "Make a backup of the table as table_name.OLD.",
261 &backup, &backup, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
262 {"character-sets-dir", OPT_CHARSETS_DIR_MP,
263 "Directory where character sets are.", (char**) &charsets_dir,
264 (char**) &charsets_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
265 {"debug", '#', "Output debug log. Often this is 'd:t:o,filename'.",
266 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
267 {"force", 'f',
268 "Force packing of table even if it gets bigger or if tempfile exists.",
269 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
270 {"join", 'j',
271 "Join all given tables into 'new_table_name'. All tables MUST have identical layouts.",
272 &join_table, &join_table, 0, GET_STR, REQUIRED_ARG, 0, 0, 0,
273 0, 0, 0},
274 {"help", '?', "Display this help and exit.",
275 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
276 {"silent", 's', "Be more silent.",
277 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
278 {"tmpdir", 'T', "Use temporary directory to store temporary table.",
279 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
280 {"test", 't', "Don't pack table, only test packing it.",
281 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
282 {"verbose", 'v', "Write info about progress and packing result. Use many -v for more verbosity!",
283 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
284 {"version", 'V', "Output version information and exit.",
285 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
286 {"wait", 'w', "Wait and retry if table is in use.", &opt_wait,
287 &opt_wait, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
288 { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
289};
290
291
292static void print_version(void)
293{
294 printf("%s Ver 1.23 for %s on %s\n",
295 my_progname, SYSTEM_TYPE, MACHINE_TYPE);
296}
297
298
299static void usage(void)
300{
301 print_version();
302 puts("Copyright 2002-2008 MySQL AB, 2008 Sun Microsystems, Inc.");
303 puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,");
304 puts("and you are welcome to modify and redistribute it under the GPL license\n");
305
306 puts("Pack a MyISAM-table to take much less space.");
307 puts("Keys are not updated, you must run myisamchk -rq on the index (.MYI) file");
308 puts("afterwards to update the keys.");
309 puts("You should give the .MYI file as the filename argument.");
310
311 printf("\nUsage: %s [OPTIONS] filename...\n", my_progname);
312 my_print_help(my_long_options);
313 print_defaults("my", load_default_groups);
314 my_print_variables(my_long_options);
315}
316
317
318static my_bool
319get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
320 char *argument)
321{
322 uint length;
323
324 switch(optid) {
325 case 'f':
326 force_pack= 1;
327 tmpfile_createflag= O_RDWR | O_TRUNC;
328 break;
329 case 's':
330 write_loop= verbose= 0;
331 silent= 1;
332 break;
333 case 't':
334 test_only= 1;
335 /* Avoid to reset 'verbose' if it was already set > 1. */
336 if (! verbose)
337 verbose= 1;
338 break;
339 case 'T':
340 length= (uint) (strmov(tmp_dir, argument) - tmp_dir);
341 if (length != dirname_length(tmp_dir))
342 {
343 tmp_dir[length]=FN_LIBCHAR;
344 tmp_dir[length+1]=0;
345 }
346 break;
347 case 'v':
348 verbose++; /* Allow for selecting the level of verbosity. */
349 silent= 0;
350 break;
351 case '#':
352 DBUG_PUSH(argument ? argument : "d:t:o");
353 break;
354 case 'V':
355 print_version();
356 exit(0);
357 case 'I':
358 case '?':
359 usage();
360 exit(0);
361 }
362 return 0;
363}
364
365 /* reads options */
366 /* Initiates DEBUG - but no debugging here ! */
367
368static void get_options(int *argc,char ***argv)
369{
370 int ho_error;
371
372 my_progname= argv[0][0];
373 if (isatty(fileno(stdout)))
374 write_loop=1;
375
376 if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option)))
377 exit(ho_error);
378
379 if (!*argc)
380 {
381 usage();
382 exit(1);
383 }
384 if (join_table)
385 {
386 backup=0; /* Not needed */
387 tmp_dir[0]=0;
388 }
389 return;
390}
391
392
393static MI_INFO *open_isam_file(char *name,int mode)
394{
395 MI_INFO *isam_file;
396 MYISAM_SHARE *share;
397 DBUG_ENTER("open_isam_file");
398
399 if (!(isam_file=mi_open(name,mode,
400 (opt_wait ? HA_OPEN_WAIT_IF_LOCKED :
401 HA_OPEN_ABORT_IF_LOCKED))))
402 {
403 (void) fprintf(stderr, "%s gave error %d on open\n", name, my_errno);
404 DBUG_RETURN(0);
405 }
406 share=isam_file->s;
407 if (share->options & HA_OPTION_COMPRESS_RECORD && !join_table)
408 {
409 if (!force_pack)
410 {
411 (void) fprintf(stderr, "%s is already compressed\n", name);
412 (void) mi_close(isam_file);
413 DBUG_RETURN(0);
414 }
415 if (verbose)
416 puts("Recompressing already compressed table");
417 share->options&= ~HA_OPTION_READ_ONLY_DATA; /* We are modifing it */
418
419 /* We want to use the new checksums if we have null fields */
420 if (share->has_null_fields)
421 share->options|= HA_OPTION_NULL_FIELDS;
422
423 }
424 if (! force_pack && share->state.state.records != 0 &&
425 (share->state.state.records <= 1 ||
426 share->state.state.data_file_length < 1024))
427 {
428 (void) fprintf(stderr, "%s is too small to compress\n", name);
429 (void) mi_close(isam_file);
430 DBUG_RETURN(0);
431 }
432 (void) mi_lock_database(isam_file,F_WRLCK);
433 DBUG_RETURN(isam_file);
434}
435
436
437static my_bool open_isam_files(PACK_MRG_INFO *mrg, char **names, uint count)
438{
439 uint i,j;
440 mrg->count=0;
441 mrg->current=0;
442 mrg->file=(MI_INFO**) my_malloc(sizeof(MI_INFO*)*count,MYF(MY_FAE));
443 mrg->free_file=1;
444 mrg->src_file_has_indexes_disabled= 0;
445 for (i=0; i < count ; i++)
446 {
447 if (!(mrg->file[i]=open_isam_file(names[i],O_RDONLY)))
448 goto error;
449
450 mrg->src_file_has_indexes_disabled|=
451 ! mi_is_all_keys_active(mrg->file[i]->s->state.key_map,
452 mrg->file[i]->s->base.keys);
453 }
454 /* Check that files are identical */
455 for (j=0 ; j < count-1 ; j++)
456 {
457 MI_COLUMNDEF *m1,*m2,*end;
458 if (mrg->file[j]->s->base.reclength != mrg->file[j+1]->s->base.reclength ||
459 mrg->file[j]->s->base.fields != mrg->file[j+1]->s->base.fields)
460 goto diff_file;
461 m1=mrg->file[j]->s->rec;
462 end=m1+mrg->file[j]->s->base.fields;
463 m2=mrg->file[j+1]->s->rec;
464 for ( ; m1 != end ; m1++,m2++)
465 {
466 if (m1->type != m2->type || m1->length != m2->length)
467 goto diff_file;
468 }
469 }
470 mrg->count=count;
471 return 0;
472
473 diff_file:
474 (void) fprintf(stderr, "%s: Tables '%s' and '%s' are not identical\n",
475 my_progname, names[j], names[j+1]);
476 error:
477 while (i--)
478 mi_close(mrg->file[i]);
479 my_free(mrg->file);
480 return 1;
481}
482
483
484static int compress(PACK_MRG_INFO *mrg,char *result_table)
485{
486 int error;
487 File new_file,join_isam_file;
488 MI_INFO *isam_file;
489 MYISAM_SHARE *share;
490 char org_name[FN_REFLEN],new_name[FN_REFLEN],temp_name[FN_REFLEN];
491 uint i,header_length,fields,trees,used_trees;
492 my_off_t old_length,new_length,tot_elements;
493 HUFF_COUNTS *huff_counts;
494 HUFF_TREE *huff_trees;
495 DBUG_ENTER("compress");
496
497 isam_file=mrg->file[0]; /* Take this as an example */
498 share=isam_file->s;
499 new_file=join_isam_file= -1;
500 trees=fields=0;
501 huff_trees=0;
502 huff_counts=0;
503
504 /* Create temporary or join file */
505
506 if (backup)
507 (void) fn_format(org_name,isam_file->filename,"",MI_NAME_DEXT,2);
508 else
509 (void) fn_format(org_name,isam_file->filename,"",MI_NAME_DEXT,2+4+16);
510 if (!test_only && result_table)
511 {
512 /* Make a new indexfile based on first file in list */
513 uint length;
514 uchar *buff;
515 strmov(org_name,result_table); /* Fix error messages */
516 (void) fn_format(new_name,result_table,"",MI_NAME_IEXT,2);
517 if ((join_isam_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME)))
518 < 0)
519 goto err;
520 length=(uint) share->base.keystart;
521 if (!(buff= (uchar*) my_malloc(length,MYF(MY_WME))))
522 goto err;
523 if (my_pread(share->kfile,buff,length,0L,MYF(MY_WME | MY_NABP)) ||
524 my_write(join_isam_file,buff,length,
525 MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
526 {
527 my_free(buff);
528 goto err;
529 }
530 my_free(buff);
531 (void) fn_format(new_name,result_table,"",MI_NAME_DEXT,2);
532 }
533 else if (!tmp_dir[0])
534 (void) make_new_name(new_name,org_name);
535 else
536 (void) fn_format(new_name,org_name,tmp_dir,DATA_TMP_EXT,1+2+4);
537 if (!test_only &&
538 (new_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME))) < 0)
539 goto err;
540
541 /* Start calculating statistics */
542
543 mrg->records=0;
544 for (i=0 ; i < mrg->count ; i++)
545 mrg->records+=mrg->file[i]->s->state.state.records;
546
547 DBUG_PRINT("info", ("Compressing %s: (%lu records)",
548 result_table ? new_name : org_name,
549 (ulong) mrg->records));
550 if (write_loop || verbose)
551 {
552 printf("Compressing %s: (%lu records)\n",
553 result_table ? new_name : org_name, (ulong) mrg->records);
554 }
555 trees=fields=share->base.fields;
556 huff_counts=init_huff_count(isam_file,mrg->records);
557
558 /*
559 Read the whole data file(s) for statistics.
560 */
561 DBUG_PRINT("info", ("- Calculating statistics"));
562 if (write_loop || verbose)
563 printf("- Calculating statistics\n");
564 if (get_statistic(mrg,huff_counts))
565 goto err;
566
567 old_length=0;
568 for (i=0; i < mrg->count ; i++)
569 old_length+= (mrg->file[i]->s->state.state.data_file_length -
570 mrg->file[i]->s->state.state.empty);
571
572 /*
573 Create a global priority queue in preparation for making
574 temporary Huffman trees.
575 */
576 if (init_queue(&queue, 256, 0, 0, compare_huff_elements, 0, 0, 0))
577 goto err;
578
579 /*
580 Check each column if we should use pre-space-compress, end-space-
581 compress, empty-field-compress or zero-field-compress.
582 */
583 check_counts(huff_counts,fields,mrg->records);
584
585 /*
586 Build a Huffman tree for each column.
587 */
588 huff_trees=make_huff_trees(huff_counts,trees);
589
590 /*
591 If the packed lengths of combined columns is less then the sum of
592 the non-combined columns, then create common Huffman trees for them.
593 We do this only for byte compressed columns, not for distinct values
594 compressed columns.
595 */
596 if ((int) (used_trees=join_same_trees(huff_counts,trees)) < 0)
597 goto err;
598
599 /*
600 Assign codes to all byte or column values.
601 */
602 if (make_huff_decode_table(huff_trees,fields))
603 goto err;
604
605 /* Prepare a file buffer. */
606 init_file_buffer(new_file,0);
607
608 /*
609 Reserve space in the target file for the fixed compressed file header.
610 */
611 file_buffer.pos_in_file=HEAD_LENGTH;
612 if (! test_only)
613 my_seek(new_file,file_buffer.pos_in_file,MY_SEEK_SET,MYF(0));
614
615 /*
616 Write field infos: field type, pack type, length bits, tree number.
617 */
618 write_field_info(huff_counts,fields,used_trees);
619
620 /*
621 Write decode trees.
622 */
623 if (!(tot_elements=write_huff_tree(huff_trees,trees)))
624 goto err;
625
626 /*
627 Calculate the total length of the compression info header.
628 This includes the fixed compressed file header, the column compression
629 type descriptions, and the decode trees.
630 */
631 header_length=(uint) file_buffer.pos_in_file+
632 (uint) (file_buffer.pos-file_buffer.buffer);
633
634 /*
635 Compress the source file into the target file.
636 */
637 DBUG_PRINT("info", ("- Compressing file"));
638 if (write_loop || verbose)
639 printf("- Compressing file\n");
640 error=compress_isam_file(mrg,huff_counts);
641 new_length=file_buffer.pos_in_file;
642 if (!error && !test_only)
643 {
644 uchar buff[MEMMAP_EXTRA_MARGIN]; /* End marginal for memmap */
645 bzero(buff,sizeof(buff));
646 error=my_write(file_buffer.file,buff,sizeof(buff),
647 MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0;
648 }
649
650 /*
651 Write the fixed compressed file header.
652 */
653 if (!error)
654 error=write_header(mrg,header_length,used_trees,tot_elements,
655 new_length);
656
657 /* Flush the file buffer. */
658 end_file_buffer();
659
660 /* Display statistics. */
661 DBUG_PRINT("info", ("Min record length: %6d Max length: %6d "
662 "Mean total length: %6ld\n",
663 mrg->min_pack_length, mrg->max_pack_length,
664 (ulong) (mrg->records ? (new_length/mrg->records) : 0)));
665 if (verbose && mrg->records)
666 printf("Min record length: %6d Max length: %6d "
667 "Mean total length: %6ld\n", mrg->min_pack_length,
668 mrg->max_pack_length, (ulong) (new_length/mrg->records));
669
670 /* Close source and target file. */
671 if (!test_only)
672 {
673 error|=my_close(new_file,MYF(MY_WME));
674 if (!result_table)
675 {
676 error|=my_close(isam_file->dfile,MYF(MY_WME));
677 isam_file->dfile= -1; /* Tell mi_close file is closed */
678 }
679 }
680
681 /* Cleanup. */
682 free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields);
683 if (! test_only && ! error)
684 {
685 if (result_table)
686 {
687 error=save_state_mrg(join_isam_file,mrg,new_length,glob_crc);
688 }
689 else
690 {
691 if (backup)
692 {
693 if (my_rename(org_name,make_old_name(temp_name,isam_file->filename),
694 MYF(MY_WME)))
695 error=1;
696 else
697 {
698 if (tmp_dir[0])
699 error=my_copy(new_name,org_name,MYF(MY_WME));
700 else
701 error=my_rename(new_name,org_name,MYF(MY_WME));
702 if (!error)
703 {
704 (void) my_copystat(temp_name,org_name,MYF(MY_COPYTIME));
705 if (tmp_dir[0])
706 (void) my_delete(new_name,MYF(MY_WME));
707 }
708 }
709 }
710 else
711 {
712 if (tmp_dir[0])
713 {
714 error=my_copy(new_name,org_name,
715 MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_COPYTIME));
716 if (!error)
717 (void) my_delete(new_name,MYF(MY_WME));
718 }
719 else
720 error=my_redel(org_name, new_name, 0, MYF(MY_WME | MY_COPYTIME));
721 }
722 if (! error)
723 error=save_state(isam_file,mrg,new_length,glob_crc);
724 }
725 }
726 error|=mrg_close(mrg);
727 if (join_isam_file >= 0)
728 error|=my_close(join_isam_file,MYF(MY_WME));
729 if (error)
730 {
731 (void) fprintf(stderr, "Aborting: %s is not compressed\n", org_name);
732 (void) my_delete(new_name,MYF(MY_WME));
733 DBUG_RETURN(-1);
734 }
735 if (write_loop || verbose)
736 {
737 if (old_length)
738 printf("%.4g%% \n",
739 (((longlong) (old_length - new_length)) * 100.0 /
740 (longlong) old_length));
741 else
742 puts("Empty file saved in compressed format");
743 }
744 DBUG_RETURN(0);
745
746 err:
747 free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields);
748 if (new_file >= 0)
749 (void) my_close(new_file,MYF(0));
750 if (join_isam_file >= 0)
751 (void) my_close(join_isam_file,MYF(0));
752 mrg_close(mrg);
753 (void) fprintf(stderr, "Aborted: %s is not compressed\n", org_name);
754 DBUG_RETURN(-1);
755}
756
757
758/**
759 Create FRM for the destination table for --join operation
760 Copy the first table FRM as the destination table FRM file. Doing so
761 will help the mysql server to recognize the newly created table.
762 See Bug#36573.
763
764 @param source_table Name of the source table
765 @param dest_table Name of the destination table
766 @retval 0 Successful copy operation
767
768 @note We always return 0 because we don't want myisampack to report error
769 even if the copy operation fails.
770*/
771
772static int create_dest_frm(char *source_table, char *dest_table)
773{
774 char source_name[FN_REFLEN], dest_name[FN_REFLEN];
775
776 DBUG_ENTER("create_dest_frm");
777
778 (void) fn_format(source_name, source_table,
779 "", FRM_EXT, MY_UNPACK_FILENAME | MY_RESOLVE_SYMLINKS);
780 (void) fn_format(dest_name, dest_table,
781 "", FRM_EXT, MY_UNPACK_FILENAME | MY_RESOLVE_SYMLINKS);
782 /*
783 Error messages produced by my_copy() are suppressed as this
784 is not vital for --join operation. User shouldn't see any error messages
785 like "source file frm not found" and "unable to create destination frm
786 file. So we don't pass the flag MY_WME -Write Message on Error to
787 my_copy()
788 */
789 (void) my_copy(source_name, dest_name, MYF(MY_DONT_OVERWRITE_FILE));
790
791 DBUG_RETURN(0);
792}
793
794
795 /* Init a huff_count-struct for each field and init it */
796
797static HUFF_COUNTS *init_huff_count(MI_INFO *info,my_off_t records)
798{
799 reg2 uint i;
800 reg1 HUFF_COUNTS *count;
801 if ((count = (HUFF_COUNTS*) my_malloc(info->s->base.fields*
802 sizeof(HUFF_COUNTS),
803 MYF(MY_ZEROFILL | MY_WME))))
804 {
805 for (i=0 ; i < info->s->base.fields ; i++)
806 {
807 enum en_fieldtype type;
808 count[i].field_length=info->s->rec[i].length;
809 type= count[i].field_type= (enum en_fieldtype) info->s->rec[i].type;
810 if (type == FIELD_INTERVALL ||
811 type == FIELD_CONSTANT ||
812 type == FIELD_ZERO)
813 type = FIELD_NORMAL;
814 if (count[i].field_length <= 8 &&
815 (type == FIELD_NORMAL ||
816 type == FIELD_SKIP_ZERO))
817 count[i].max_zero_fill= count[i].field_length;
818 /*
819 For every column initialize a tree, which is used to detect distinct
820 column values. 'int_tree' works together with 'tree_buff' and
821 'tree_pos'. It's keys are implemented by pointers into 'tree_buff'.
822 This is accomplished by '-1' as the element size.
823 */
824 init_tree(&count[i].int_tree,0,0,-1,(qsort_cmp2) compare_tree, NULL,
825 NULL, MYF(0));
826 if (records && type != FIELD_BLOB && type != FIELD_VARCHAR)
827 count[i].tree_pos=count[i].tree_buff =
828 my_malloc(count[i].field_length > 1 ? tree_buff_length : 2,
829 MYF(MY_WME));
830 }
831 }
832 return count;
833}
834
835
836 /* Free memory used by counts and trees */
837
838static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees, uint trees,
839 HUFF_COUNTS *huff_counts,
840 uint fields)
841{
842 register uint i;
843
844 if (huff_trees)
845 {
846 for (i=0 ; i < trees ; i++)
847 {
848 if (huff_trees[i].element_buffer)
849 my_free(huff_trees[i].element_buffer);
850 if (huff_trees[i].code)
851 my_free(huff_trees[i].code);
852 }
853 my_free(huff_trees);
854 }
855 if (huff_counts)
856 {
857 for (i=0 ; i < fields ; i++)
858 {
859 if (huff_counts[i].tree_buff)
860 {
861 my_free(huff_counts[i].tree_buff);
862 delete_tree(&huff_counts[i].int_tree, 0);
863 }
864 }
865 my_free(huff_counts);
866 }
867 delete_queue(&queue); /* This is safe to free */
868 return;
869}
870
871 /* Read through old file and gather some statistics */
872
873static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
874{
875 int error;
876 uint length;
877 ulong reclength,max_blob_length;
878 uchar *record,*pos,*next_pos,*end_pos,*start_pos;
879 ha_rows record_count;
880 my_bool static_row_size;
881 HUFF_COUNTS *count,*end_count;
882 TREE_ELEMENT *element;
883 DBUG_ENTER("get_statistic");
884
885 reclength=mrg->file[0]->s->base.reclength;
886 record=(uchar*) my_alloca(reclength);
887 end_count=huff_counts+mrg->file[0]->s->base.fields;
888 record_count=0; glob_crc=0;
889 max_blob_length=0;
890
891 /* Check how to calculate checksum */
892 static_row_size=1;
893 for (count=huff_counts ; count < end_count ; count++)
894 {
895 if (count->field_type == FIELD_BLOB ||
896 count->field_type == FIELD_VARCHAR)
897 {
898 static_row_size=0;
899 break;
900 }
901 }
902
903 mrg_reset(mrg);
904 while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE)
905 {
906 ulong tot_blob_length=0;
907 if (! error)
908 {
909 /* glob_crc is a checksum over all bytes of all records. */
910 if (static_row_size)
911 glob_crc+=mi_static_checksum(mrg->file[0],record);
912 else
913 glob_crc+=mi_checksum(mrg->file[0],record);
914
915 /* Count the incidence of values separately for every column. */
916 for (pos=record,count=huff_counts ;
917 count < end_count ;
918 count++,
919 pos=next_pos)
920 {
921 next_pos=end_pos=(start_pos=pos)+count->field_length;
922
923 /*
924 Put the whole column value in a tree if there is room for it.
925 'int_tree' is used to quickly check for duplicate values.
926 'tree_buff' collects as many distinct column values as
927 possible. If the field length is > 1, it is tree_buff_length,
928 else 2 bytes. Each value is 'field_length' bytes big. If there
929 are more distinct column values than fit into the buffer, we
930 give up with this tree. BLOBs and VARCHARs do not have a
931 tree_buff as it can only be used with fixed length columns.
932 For the special case of field length == 1, we handle only the
933 case that there is only one distinct value in the table(s).
934 Otherwise, we can have a maximum of 256 distinct values. This
935 is then handled by the normal Huffman tree build.
936
937 Another limit for collecting distinct column values is the
938 number of values itself. Since we would need to build a
939 Huffman tree for the values, we are limited by the 'IS_OFFSET'
940 constant. This constant expresses a bit which is used to
941 determine if a tree element holds a final value or an offset
942 to a child element. Hence, all values and offsets need to be
943 smaller than 'IS_OFFSET'. A tree element is implemented with
944 two integer values, one for the left branch and one for the
945 right branch. For the extreme case that the first element
946 points to the last element, the number of integers in the tree
947 must be less or equal to IS_OFFSET. So the number of elements
948 must be less or equal to IS_OFFSET / 2.
949
950 WARNING: At first, we insert a pointer into the record buffer
951 as the key for the tree. If we got a new distinct value, which
952 is really inserted into the tree, instead of being counted
953 only, we will copy the column value from the record buffer to
954 'tree_buff' and adjust the key pointer of the tree accordingly.
955 */
956 if (count->tree_buff)
957 {
958 global_count=count;
959 if (!(element=tree_insert(&count->int_tree,pos, 0,
960 count->int_tree.custom_arg)) ||
961 (element->count == 1 &&
962 (count->tree_buff + tree_buff_length <
963 count->tree_pos + count->field_length)) ||
964 (count->int_tree.elements_in_tree > IS_OFFSET / 2) ||
965 (count->field_length == 1 &&
966 count->int_tree.elements_in_tree > 1))
967 {
968 delete_tree(&count->int_tree, 0);
969 my_free(count->tree_buff);
970 count->tree_buff=0;
971 }
972 else
973 {
974 /*
975 If tree_insert() succeeds, it either creates a new element
976 or increments the counter of an existing element.
977 */
978 if (element->count == 1)
979 {
980 /* Copy the new column value into 'tree_buff'. */
981 memcpy(count->tree_pos,pos,(size_t) count->field_length);
982 /* Adjust the key pointer in the tree. */
983 tree_set_pointer(element,count->tree_pos);
984 /* Point behind the last column value so far. */
985 count->tree_pos+=count->field_length;
986 }
987 }
988 }
989
990 /* Save character counters and space-counts and zero-field-counts */
991 if (count->field_type == FIELD_NORMAL ||
992 count->field_type == FIELD_SKIP_ENDSPACE)
993 {
994 /* Ignore trailing space. */
995 for ( ; end_pos > pos ; end_pos--)
996 if (end_pos[-1] != ' ')
997 break;
998 /* Empty fields are just counted. Go to the next record. */
999 if (end_pos == pos)
1000 {
1001 count->empty_fields++;
1002 count->max_zero_fill=0;
1003 continue;
1004 }
1005 /*
1006 Count the total of all trailing spaces and the number of
1007 short trailing spaces. Remember the longest trailing space.
1008 */
1009 length= (uint) (next_pos-end_pos);
1010 count->tot_end_space+=length;
1011 if (length < 8)
1012 count->end_space[length]++;
1013 if (count->max_end_space < length)
1014 count->max_end_space = length;
1015 }
1016
1017 if (count->field_type == FIELD_NORMAL ||
1018 count->field_type == FIELD_SKIP_PRESPACE)
1019 {
1020 /* Ignore leading space. */
1021 for (pos=start_pos; pos < end_pos ; pos++)
1022 if (pos[0] != ' ')
1023 break;
1024 /* Empty fields are just counted. Go to the next record. */
1025 if (end_pos == pos)
1026 {
1027 count->empty_fields++;
1028 count->max_zero_fill=0;
1029 continue;
1030 }
1031 /*
1032 Count the total of all leading spaces and the number of
1033 short leading spaces. Remember the longest leading space.
1034 */
1035 length= (uint) (pos-start_pos);
1036 count->tot_pre_space+=length;
1037 if (length < 8)
1038 count->pre_space[length]++;
1039 if (count->max_pre_space < length)
1040 count->max_pre_space = length;
1041 }
1042
1043 /* Calculate pos, end_pos, and max_length for variable length fields. */
1044 if (count->field_type == FIELD_BLOB)
1045 {
1046 uint field_length=count->field_length -portable_sizeof_char_ptr;
1047 ulong blob_length= _mi_calc_blob_length(field_length, start_pos);
1048 memcpy(&pos, start_pos+field_length, sizeof(char*));
1049 end_pos=pos+blob_length;
1050 tot_blob_length+=blob_length;
1051 set_if_bigger(count->max_length,blob_length);
1052 }
1053 else if (count->field_type == FIELD_VARCHAR)
1054 {
1055 uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1);
1056 length= (pack_length == 1 ? (uint) *(uchar*) start_pos :
1057 uint2korr(start_pos));
1058 pos= start_pos+pack_length;
1059 end_pos= pos+length;
1060 set_if_bigger(count->max_length,length);
1061 }
1062
1063 /* Evaluate 'max_zero_fill' for short fields. */
1064 if (count->field_length <= 8 &&
1065 (count->field_type == FIELD_NORMAL ||
1066 count->field_type == FIELD_SKIP_ZERO))
1067 {
1068 uint i;
1069 /* Zero fields are just counted. Go to the next record. */
1070 if (!memcmp((uchar*) start_pos,zero_string,count->field_length))
1071 {
1072 count->zero_fields++;
1073 continue;
1074 }
1075 /*
1076 max_zero_fill starts with field_length. It is decreased every
1077 time a shorter "zero trailer" is found. It is set to zero when
1078 an empty field is found (see above). This suggests that the
1079 variable should be called 'min_zero_fill'.
1080 */
1081 for (i =0 ; i < count->max_zero_fill && ! end_pos[-1 - (int) i] ;
1082 i++) ;
1083 if (i < count->max_zero_fill)
1084 count->max_zero_fill=i;
1085 }
1086
1087 /* Ignore zero fields and check fields. */
1088 if (count->field_type == FIELD_ZERO ||
1089 count->field_type == FIELD_CHECK)
1090 continue;
1091
1092 /*
1093 Count the incidence of every byte value in the
1094 significant field value.
1095 */
1096 for ( ; pos < end_pos ; pos++)
1097 count->counts[(uchar) *pos]++;
1098
1099 /* Step to next field. */
1100 }
1101
1102 if (tot_blob_length > max_blob_length)
1103 max_blob_length=tot_blob_length;
1104 record_count++;
1105 if (write_loop && record_count % WRITE_COUNT == 0)
1106 {
1107 printf("%lu\r", (ulong) record_count);
1108 (void) fflush(stdout);
1109 }
1110 }
1111 else if (error != HA_ERR_RECORD_DELETED)
1112 {
1113 (void) fprintf(stderr, "Got error %d while reading rows", error);
1114 break;
1115 }
1116
1117 /* Step to next record. */
1118 }
1119 if (write_loop)
1120 {
1121 printf(" \r");
1122 (void) fflush(stdout);
1123 }
1124
1125 /*
1126 If --debug=d,fakebigcodes is set, fake the counts to get big Huffman
1127 codes.
1128 */
1129 DBUG_EXECUTE_IF("fakebigcodes", fakebigcodes(huff_counts, end_count););
1130
1131 DBUG_PRINT("info", ("Found the following number of incidents "
1132 "of the byte codes:"));
1133 if (verbose >= 2)
1134 printf("Found the following number of incidents "
1135 "of the byte codes:\n");
1136 for (count= huff_counts ; count < end_count; count++)
1137 {
1138 uint idx;
1139 my_off_t total_count;
1140 char llbuf[32];
1141
1142 DBUG_PRINT("info", ("column: %3u", (uint) (count - huff_counts + 1)));
1143 if (verbose >= 2)
1144 printf("column: %3u\n", (uint) (count - huff_counts + 1));
1145 if (count->tree_buff)
1146 {
1147 DBUG_PRINT("info", ("number of distinct values: %u",
1148 (uint) ((count->tree_pos - count->tree_buff) /
1149 count->field_length)));
1150 if (verbose >= 2)
1151 printf("number of distinct values: %u\n",
1152 (uint) ((count->tree_pos - count->tree_buff) /
1153 count->field_length));
1154 }
1155 total_count= 0;
1156 for (idx= 0; idx < 256; idx++)
1157 {
1158 if (count->counts[idx])
1159 {
1160 total_count+= count->counts[idx];
1161 DBUG_PRINT("info", ("counts[0x%02x]: %12s", idx,
1162 llstr((longlong) count->counts[idx], llbuf)));
1163 if (verbose >= 2)
1164 printf("counts[0x%02x]: %12s\n", idx,
1165 llstr((longlong) count->counts[idx], llbuf));
1166 }
1167 }
1168 DBUG_PRINT("info", ("total: %12s", llstr((longlong) total_count,
1169 llbuf)));
1170 if ((verbose >= 2) && total_count)
1171 {
1172 printf("total: %12s\n",
1173 llstr((longlong) total_count, llbuf));
1174 }
1175 }
1176
1177 mrg->records=record_count;
1178 mrg->max_blob_length=max_blob_length;
1179 my_afree((uchar*) record);
1180 DBUG_RETURN(error != HA_ERR_END_OF_FILE);
1181}
1182
1183static int compare_huff_elements(void *not_used __attribute__((unused)),
1184 uchar *a, uchar *b)
1185{
1186 return *((my_off_t*) a) < *((my_off_t*) b) ? -1 :
1187 (*((my_off_t*) a) == *((my_off_t*) b) ? 0 : 1);
1188}
1189
1190 /* Check each tree if we should use pre-space-compress, end-space-
1191 compress, empty-field-compress or zero-field-compress */
1192
1193static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
1194 my_off_t records)
1195{
1196 uint space_fields,fill_zero_fields,field_count[(int) FIELD_enum_val_count];
1197 my_off_t old_length,new_length,length;
1198 DBUG_ENTER("check_counts");
1199
1200 bzero((uchar*) field_count,sizeof(field_count));
1201 space_fields=fill_zero_fields=0;
1202
1203 for (; trees-- ; huff_counts++)
1204 {
1205 if (huff_counts->field_type == FIELD_BLOB)
1206 {
1207 huff_counts->length_bits=max_bit(huff_counts->max_length);
1208 goto found_pack;
1209 }
1210 else if (huff_counts->field_type == FIELD_VARCHAR)
1211 {
1212 huff_counts->length_bits=max_bit(huff_counts->max_length);
1213 goto found_pack;
1214 }
1215 else if (huff_counts->field_type == FIELD_CHECK)
1216 {
1217 huff_counts->bytes_packed=0;
1218 huff_counts->counts[0]=0;
1219 goto found_pack;
1220 }
1221
1222 huff_counts->field_type=FIELD_NORMAL;
1223 huff_counts->pack_type=0;
1224
1225 /* Check for zero-filled records (in this column), or zero records. */
1226 if (huff_counts->zero_fields || ! records)
1227 {
1228 my_off_t old_space_count;
1229 /*
1230 If there are only zero filled records (in this column),
1231 or no records at all, we are done.
1232 */
1233 if (huff_counts->zero_fields == records)
1234 {
1235 huff_counts->field_type= FIELD_ZERO;
1236 huff_counts->bytes_packed=0;
1237 huff_counts->counts[0]=0;
1238 goto found_pack;
1239 }
1240 /* Remeber the number of significant spaces. */
1241 old_space_count=huff_counts->counts[' '];
1242 /* Add all leading and trailing spaces. */
1243 huff_counts->counts[' ']+= (huff_counts->tot_end_space +
1244 huff_counts->tot_pre_space +
1245 huff_counts->empty_fields *
1246 huff_counts->field_length);
1247 /* Check, what the compressed length of this would be. */
1248 old_length=calc_packed_length(huff_counts,0)+records/8;
1249 /* Get the number of zero bytes. */
1250 length=huff_counts->zero_fields*huff_counts->field_length;
1251 /* Add it to the counts. */
1252 huff_counts->counts[0]+=length;
1253 /* Check, what the compressed length of this would be. */
1254 new_length=calc_packed_length(huff_counts,0);
1255 /* If the compression without the zeroes would be shorter, we are done. */
1256 if (old_length < new_length && huff_counts->field_length > 1)
1257 {
1258 huff_counts->field_type=FIELD_SKIP_ZERO;
1259 huff_counts->counts[0]-=length;
1260 huff_counts->bytes_packed=old_length- records/8;
1261 goto found_pack;
1262 }
1263 /* Remove the insignificant spaces, but keep the zeroes. */
1264 huff_counts->counts[' ']=old_space_count;
1265 }
1266 /* Check, what the compressed length of this column would be. */
1267 huff_counts->bytes_packed=calc_packed_length(huff_counts,0);
1268
1269 /*
1270 If there are enough empty records (in this column),
1271 treating them specially may pay off.
1272 */
1273 if (huff_counts->empty_fields)
1274 {
1275 if (huff_counts->field_length > 2 &&
1276 huff_counts->empty_fields + (records - huff_counts->empty_fields)*
1277 (1+max_bit(MY_MAX(huff_counts->max_pre_space,
1278 huff_counts->max_end_space))) <
1279 records * max_bit(huff_counts->field_length))
1280 {
1281 huff_counts->pack_type |= PACK_TYPE_SPACE_FIELDS;
1282 }
1283 else
1284 {
1285 length=huff_counts->empty_fields*huff_counts->field_length;
1286 if (huff_counts->tot_end_space || ! huff_counts->tot_pre_space)
1287 {
1288 huff_counts->tot_end_space+=length;
1289 huff_counts->max_end_space=huff_counts->field_length;
1290 if (huff_counts->field_length < 8)
1291 huff_counts->end_space[huff_counts->field_length]+=
1292 huff_counts->empty_fields;
1293 }
1294 if (huff_counts->tot_pre_space)
1295 {
1296 huff_counts->tot_pre_space+=length;
1297 huff_counts->max_pre_space=huff_counts->field_length;
1298 if (huff_counts->field_length < 8)
1299 huff_counts->pre_space[huff_counts->field_length]+=
1300 huff_counts->empty_fields;
1301 }
1302 }
1303 }
1304
1305 /*
1306 If there are enough trailing spaces (in this column),
1307 treating them specially may pay off.
1308 */
1309 if (huff_counts->tot_end_space)
1310 {
1311 huff_counts->counts[' ']+=huff_counts->tot_pre_space;
1312 if (test_space_compress(huff_counts,records,huff_counts->max_end_space,
1313 huff_counts->end_space,
1314 huff_counts->tot_end_space,FIELD_SKIP_ENDSPACE))
1315 goto found_pack;
1316 huff_counts->counts[' ']-=huff_counts->tot_pre_space;
1317 }
1318
1319 /*
1320 If there are enough leading spaces (in this column),
1321 treating them specially may pay off.
1322 */
1323 if (huff_counts->tot_pre_space)
1324 {
1325 if (test_space_compress(huff_counts,records,huff_counts->max_pre_space,
1326 huff_counts->pre_space,
1327 huff_counts->tot_pre_space,FIELD_SKIP_PRESPACE))
1328 goto found_pack;
1329 }
1330
1331 found_pack: /* Found field-packing */
1332
1333 /* Test if we can use zero-fill */
1334
1335 if (huff_counts->max_zero_fill &&
1336 (huff_counts->field_type == FIELD_NORMAL ||
1337 huff_counts->field_type == FIELD_SKIP_ZERO))
1338 {
1339 huff_counts->counts[0]-=huff_counts->max_zero_fill*
1340 (huff_counts->field_type == FIELD_SKIP_ZERO ?
1341 records - huff_counts->zero_fields : records);
1342 huff_counts->pack_type|=PACK_TYPE_ZERO_FILL;
1343 huff_counts->bytes_packed=calc_packed_length(huff_counts,0);
1344 }
1345
1346 /* Test if intervall-field is better */
1347
1348 if (huff_counts->tree_buff)
1349 {
1350 HUFF_TREE tree;
1351
1352 DBUG_EXECUTE_IF("forceintervall",
1353 huff_counts->bytes_packed= ~ (my_off_t) 0;);
1354 tree.element_buffer=0;
1355 if (!make_huff_tree(&tree,huff_counts) &&
1356 tree.bytes_packed+tree.tree_pack_length < huff_counts->bytes_packed)
1357 {
1358 if (tree.elements == 1)
1359 huff_counts->field_type=FIELD_CONSTANT;
1360 else
1361 huff_counts->field_type=FIELD_INTERVALL;
1362 huff_counts->pack_type=0;
1363 }
1364 else
1365 {
1366 my_free(huff_counts->tree_buff);
1367 delete_tree(&huff_counts->int_tree, 0);
1368 huff_counts->tree_buff=0;
1369 }
1370 if (tree.element_buffer)
1371 my_free(tree.element_buffer);
1372 }
1373 if (huff_counts->pack_type & PACK_TYPE_SPACE_FIELDS)
1374 space_fields++;
1375 if (huff_counts->pack_type & PACK_TYPE_ZERO_FILL)
1376 fill_zero_fields++;
1377 field_count[huff_counts->field_type]++;
1378 }
1379 DBUG_PRINT("info", ("normal: %3d empty-space: %3d "
1380 "empty-zero: %3d empty-fill: %3d",
1381 field_count[FIELD_NORMAL],space_fields,
1382 field_count[FIELD_SKIP_ZERO],fill_zero_fields));
1383 DBUG_PRINT("info", ("pre-space: %3d end-space: %3d "
1384 "intervall-fields: %3d zero: %3d",
1385 field_count[FIELD_SKIP_PRESPACE],
1386 field_count[FIELD_SKIP_ENDSPACE],
1387 field_count[FIELD_INTERVALL],
1388 field_count[FIELD_ZERO]));
1389 if (verbose)
1390 printf("\nnormal: %3d empty-space: %3d "
1391 "empty-zero: %3d empty-fill: %3d\n"
1392 "pre-space: %3d end-space: %3d "
1393 "intervall-fields: %3d zero: %3d\n",
1394 field_count[FIELD_NORMAL],space_fields,
1395 field_count[FIELD_SKIP_ZERO],fill_zero_fields,
1396 field_count[FIELD_SKIP_PRESPACE],
1397 field_count[FIELD_SKIP_ENDSPACE],
1398 field_count[FIELD_INTERVALL],
1399 field_count[FIELD_ZERO]);
1400 DBUG_VOID_RETURN;
1401}
1402
1403 /* Test if we can use space-compression and empty-field-compression */
1404
1405static int
1406test_space_compress(HUFF_COUNTS *huff_counts, my_off_t records,
1407 uint max_space_length, my_off_t *space_counts,
1408 my_off_t tot_space_count, enum en_fieldtype field_type)
1409{
1410 int min_pos;
1411 uint length_bits,i;
1412 my_off_t space_count,min_space_count,min_pack,new_length,skip;
1413
1414 length_bits=max_bit(max_space_length);
1415
1416 /* Default no end_space-packing */
1417 space_count=huff_counts->counts[(uint) ' '];
1418 min_space_count= (huff_counts->counts[(uint) ' ']+= tot_space_count);
1419 min_pack=calc_packed_length(huff_counts,0);
1420 min_pos= -2;
1421 huff_counts->counts[(uint) ' ']=space_count;
1422
1423 /* Test with allways space-count */
1424 new_length=huff_counts->bytes_packed+length_bits*records/8;
1425 if (new_length+1 < min_pack)
1426 {
1427 min_pos= -1;
1428 min_pack=new_length;
1429 min_space_count=space_count;
1430 }
1431 /* Test with length-flag */
1432 for (skip=0L, i=0 ; i < 8 ; i++)
1433 {
1434 if (space_counts[i])
1435 {
1436 if (i)
1437 huff_counts->counts[(uint) ' ']+=space_counts[i];
1438 skip+=huff_counts->pre_space[i];
1439 new_length=calc_packed_length(huff_counts,0)+
1440 (records+(records-skip)*(1+length_bits))/8;
1441 if (new_length < min_pack)
1442 {
1443 min_pos=(int) i;
1444 min_pack=new_length;
1445 min_space_count=huff_counts->counts[(uint) ' '];
1446 }
1447 }
1448 }
1449
1450 huff_counts->counts[(uint) ' ']=min_space_count;
1451 huff_counts->bytes_packed=min_pack;
1452 switch (min_pos) {
1453 case -2:
1454 return(0); /* No space-compress */
1455 case -1: /* Always space-count */
1456 huff_counts->field_type=field_type;
1457 huff_counts->min_space=0;
1458 huff_counts->length_bits=max_bit(max_space_length);
1459 break;
1460 default:
1461 huff_counts->field_type=field_type;
1462 huff_counts->min_space=(uint) min_pos;
1463 huff_counts->pack_type|=PACK_TYPE_SELECTED;
1464 huff_counts->length_bits=max_bit(max_space_length);
1465 break;
1466 }
1467 return(1); /* Using space-compress */
1468}
1469
1470
1471 /* Make a huff_tree of each huff_count */
1472
1473static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts, uint trees)
1474{
1475 uint tree;
1476 HUFF_TREE *huff_tree;
1477 DBUG_ENTER("make_huff_trees");
1478
1479 if (!(huff_tree=(HUFF_TREE*) my_malloc(trees*sizeof(HUFF_TREE),
1480 MYF(MY_WME | MY_ZEROFILL))))
1481 DBUG_RETURN(0);
1482
1483 for (tree=0 ; tree < trees ; tree++)
1484 {
1485 if (make_huff_tree(huff_tree+tree,huff_counts+tree))
1486 {
1487 while (tree--)
1488 my_free(huff_tree[tree].element_buffer);
1489 my_free(huff_tree);
1490 DBUG_RETURN(0);
1491 }
1492 }
1493 DBUG_RETURN(huff_tree);
1494}
1495
1496/*
1497 Build a Huffman tree.
1498
1499 SYNOPSIS
1500 make_huff_tree()
1501 huff_tree The Huffman tree.
1502 huff_counts The counts.
1503
1504 DESCRIPTION
1505 Build a Huffman tree according to huff_counts->counts or
1506 huff_counts->tree_buff. tree_buff, if non-NULL contains up to
1507 tree_buff_length of distinct column values. In that case, whole
1508 values can be Huffman encoded instead of single bytes.
1509
1510 RETURN
1511 0 OK
1512 != 0 Error
1513*/
1514
1515static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
1516{
1517 uint i,found,bits_packed,first,last;
1518 my_off_t bytes_packed;
1519 HUFF_ELEMENT *a,*b,*new_huff_el;
1520
1521 first=last=0;
1522 if (huff_counts->tree_buff)
1523 {
1524 /* Calculate the number of distinct values in tree_buff. */
1525 found= (uint) (huff_counts->tree_pos - huff_counts->tree_buff) /
1526 huff_counts->field_length;
1527 first=0; last=found-1;
1528 }
1529 else
1530 {
1531 /* Count the number of byte codes found in the column. */
1532 for (i=found=0 ; i < 256 ; i++)
1533 {
1534 if (huff_counts->counts[i])
1535 {
1536 if (! found++)
1537 first=i;
1538 last=i;
1539 }
1540 }
1541 if (found < 2)
1542 found=2;
1543 }
1544
1545 /* When using 'tree_buff' we can have more that 256 values. */
1546 if (queue.max_elements < found)
1547 {
1548 delete_queue(&queue);
1549 if (init_queue(&queue,found, 0, 0, compare_huff_elements, 0, 0, 0))
1550 return -1;
1551 }
1552
1553 /* Allocate or reallocate an element buffer for the Huffman tree. */
1554 if (!huff_tree->element_buffer)
1555 {
1556 if (!(huff_tree->element_buffer=
1557 (HUFF_ELEMENT*) my_malloc(found*2*sizeof(HUFF_ELEMENT),MYF(MY_WME))))
1558 return 1;
1559 }
1560 else
1561 {
1562 HUFF_ELEMENT *temp;
1563 if (!(temp=
1564 (HUFF_ELEMENT*) my_realloc((uchar*) huff_tree->element_buffer,
1565 found*2*sizeof(HUFF_ELEMENT),
1566 MYF(MY_WME))))
1567 return 1;
1568 huff_tree->element_buffer=temp;
1569 }
1570
1571 huff_counts->tree=huff_tree;
1572 huff_tree->counts=huff_counts;
1573 huff_tree->min_chr=first;
1574 huff_tree->max_chr=last;
1575 huff_tree->char_bits=max_bit(last-first);
1576 huff_tree->offset_bits=max_bit(found-1)+1;
1577
1578 if (huff_counts->tree_buff)
1579 {
1580 huff_tree->elements=0;
1581 huff_tree->tree_pack_length=(1+15+16+5+5+
1582 (huff_tree->char_bits+1)*found+
1583 (huff_tree->offset_bits+1)*
1584 (found-2)+7)/8 +
1585 (uint) (huff_tree->counts->tree_pos-
1586 huff_tree->counts->tree_buff);
1587 /*
1588 Put a HUFF_ELEMENT into the queue for every distinct column value.
1589
1590 tree_walk() calls save_counts_in_queue() for every element in
1591 'int_tree'. This takes elements from the target trees element
1592 buffer and places references to them into the buffer of the
1593 priority queue. We insert in column value order, but the order is
1594 in fact irrelevant here. We will establish the correct order
1595 later.
1596 */
1597 tree_walk(&huff_counts->int_tree,
1598 (int (*)(void*, element_count,void*)) save_counts_in_queue,
1599 (uchar*) huff_tree, left_root_right);
1600 }
1601 else
1602 {
1603 huff_tree->elements=found;
1604 huff_tree->tree_pack_length=(9+9+5+5+
1605 (huff_tree->char_bits+1)*found+
1606 (huff_tree->offset_bits+1)*
1607 (found-2)+7)/8;
1608 /*
1609 Put a HUFF_ELEMENT into the queue for every byte code found in the column.
1610
1611 The elements are taken from the target trees element buffer.
1612 Instead of using queue_insert(), we just place references to the
1613 elements into the buffer of the priority queue. We insert in byte
1614 value order, but the order is in fact irrelevant here. We will
1615 establish the correct order later.
1616 */
1617 for (i=first, found=0 ; i <= last ; i++)
1618 {
1619 if (huff_counts->counts[i])
1620 {
1621 new_huff_el=huff_tree->element_buffer+(found++);
1622 new_huff_el->count=huff_counts->counts[i];
1623 new_huff_el->a.leaf.null=0;
1624 new_huff_el->a.leaf.element_nr=i;
1625 queue.root[found]=(uchar*) new_huff_el;
1626 }
1627 }
1628 /*
1629 If there is only a single byte value in this field in all records,
1630 add a second element with zero incidence. This is required to enter
1631 the loop, which builds the Huffman tree.
1632 */
1633 while (found < 2)
1634 {
1635 new_huff_el=huff_tree->element_buffer+(found++);
1636 new_huff_el->count=0;
1637 new_huff_el->a.leaf.null=0;
1638 if (last)
1639 new_huff_el->a.leaf.element_nr=huff_tree->min_chr=last-1;
1640 else
1641 new_huff_el->a.leaf.element_nr=huff_tree->max_chr=last+1;
1642 queue.root[found]=(uchar*) new_huff_el;
1643 }
1644 }
1645
1646 /* Make a queue from the queue buffer. */
1647 queue.elements=found;
1648
1649 /*
1650 Make a priority queue from the queue. Construct its index so that we
1651 have a partially ordered tree.
1652 */
1653 queue_fix(&queue);
1654
1655 /* The Huffman algorithm. */
1656 bytes_packed=0; bits_packed=0;
1657 for (i=1 ; i < found ; i++)
1658 {
1659 /*
1660 Pop the top element from the queue (the one with the least incidence).
1661 Popping from a priority queue includes a re-ordering of the queue,
1662 to get the next least incidence element to the top.
1663 */
1664 a=(HUFF_ELEMENT*) queue_remove_top(&queue);
1665 /* Copy the next least incidence element */
1666 b=(HUFF_ELEMENT*) queue_top(&queue);
1667 /* Get a new element from the element buffer. */
1668 new_huff_el=huff_tree->element_buffer+found+i;
1669 /* The new element gets the sum of the two least incidence elements. */
1670 new_huff_el->count=a->count+b->count;
1671 /*
1672 The Huffman algorithm assigns another bit to the code for a byte
1673 every time that bytes incidence is combined (directly or indirectly)
1674 to a new element as one of the two least incidence elements.
1675 This means that one more bit per incidence of that byte is required
1676 in the resulting file. So we add the new combined incidence as the
1677 number of bits by which the result grows.
1678 */
1679 bits_packed+=(uint) (new_huff_el->count & 7);
1680 bytes_packed+=new_huff_el->count/8;
1681 /* The new element points to its children, lesser in left. */
1682 new_huff_el->a.nod.left=a;
1683 new_huff_el->a.nod.right=b;
1684 /*
1685 Replace the copied top element by the new element and re-order the
1686 queue.
1687 */
1688 queue_top(&queue)= (uchar*) new_huff_el;
1689 queue_replace_top(&queue);
1690 }
1691 huff_tree->root=(HUFF_ELEMENT*) queue.root[1];
1692 huff_tree->bytes_packed=bytes_packed+(bits_packed+7)/8;
1693 return 0;
1694}
1695
1696static int compare_tree(void* cmp_arg __attribute__((unused)),
1697 register const uchar *s, register const uchar *t)
1698{
1699 uint length;
1700 for (length=global_count->field_length; length-- ;)
1701 if (*s++ != *t++)
1702 return (int) s[-1] - (int) t[-1];
1703 return 0;
1704}
1705
1706/*
1707 Organize distinct column values and their incidences into a priority queue.
1708
1709 SYNOPSIS
1710 save_counts_in_queue()
1711 key The column value.
1712 count The incidence of this value.
1713 tree The Huffman tree to be built later.
1714
1715 DESCRIPTION
1716 We use the element buffer of the targeted tree. The distinct column
1717 values are organized in a priority queue first. The Huffman
1718 algorithm will later organize the elements into a Huffman tree. For
1719 the time being, we just place references to the elements into the
1720 queue buffer. The buffer will later be organized into a priority
1721 queue.
1722
1723 RETURN
1724 0
1725 */
1726
1727static int save_counts_in_queue(uchar *key, element_count count,
1728 HUFF_TREE *tree)
1729{
1730 HUFF_ELEMENT *new_huff_el;
1731
1732 new_huff_el=tree->element_buffer+(tree->elements++);
1733 new_huff_el->count=count;
1734 new_huff_el->a.leaf.null=0;
1735 new_huff_el->a.leaf.element_nr= (uint) (key- tree->counts->tree_buff) /
1736 tree->counts->field_length;
1737 queue.root[tree->elements]=(uchar*) new_huff_el;
1738 return 0;
1739}
1740
1741
1742/*
1743 Calculate length of file if given counts should be used.
1744
1745 SYNOPSIS
1746 calc_packed_length()
1747 huff_counts The counts for a column of the table(s).
1748 add_tree_lenght If the decode tree length should be added.
1749
1750 DESCRIPTION
1751 We need to follow the Huffman algorithm until we know, how many bits
1752 are required for each byte code. But we do not need the resulting
1753 Huffman tree. Hence, we can leave out some steps which are essential
1754 in make_huff_tree().
1755
1756 RETURN
1757 Number of bytes required to compress this table column.
1758*/
1759
1760static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,
1761 uint add_tree_lenght)
1762{
1763 uint i,found,bits_packed,first,last;
1764 my_off_t bytes_packed;
1765 HUFF_ELEMENT element_buffer[256];
1766 DBUG_ENTER("calc_packed_length");
1767
1768 /*
1769 WARNING: We use a small hack for efficiency: Instead of placing
1770 references to HUFF_ELEMENTs into the queue, we just insert
1771 references to the counts of the byte codes which appeared in this
1772 table column. During the Huffman algorithm they are successively
1773 replaced by references to HUFF_ELEMENTs. This works, because
1774 HUFF_ELEMENTs have the incidence count at their beginning.
1775 Regardless, wether the queue array contains references to counts of
1776 type my_off_t or references to HUFF_ELEMENTs which have the count of
1777 type my_off_t at their beginning, it always points to a count of the
1778 same type.
1779
1780 Instead of using queue_insert(), we just copy the references into
1781 the buffer of the priority queue. We insert in byte value order, but
1782 the order is in fact irrelevant here. We will establish the correct
1783 order later.
1784 */
1785 first=last=0;
1786 for (i=found=0 ; i < 256 ; i++)
1787 {
1788 if (huff_counts->counts[i])
1789 {
1790 if (! found++)
1791 first=i;
1792 last=i;
1793 /* We start with root[1], which is the queues top element. */
1794 queue.root[found]=(uchar*) &huff_counts->counts[i];
1795 }
1796 }
1797 if (!found)
1798 DBUG_RETURN(0); /* Empty tree */
1799 /*
1800 If there is only a single byte value in this field in all records,
1801 add a second element with zero incidence. This is required to enter
1802 the loop, which follows the Huffman algorithm.
1803 */
1804 if (found < 2)
1805 queue.root[++found]=(uchar*) &huff_counts->counts[last ? 0 : 1];
1806
1807 /* Make a queue from the queue buffer. */
1808 queue.elements=found;
1809
1810 bytes_packed=0; bits_packed=0;
1811 /* Add the length of the coding table, which would become part of the file. */
1812 if (add_tree_lenght)
1813 bytes_packed=(8+9+5+5+(max_bit(last-first)+1)*found+
1814 (max_bit(found-1)+1+1)*(found-2) +7)/8;
1815
1816 /*
1817 Make a priority queue from the queue. Construct its index so that we
1818 have a partially ordered tree.
1819 */
1820 queue_fix(&queue);
1821
1822 /* The Huffman algorithm. */
1823 for (i=0 ; i < found-1 ; i++)
1824 {
1825 my_off_t *a;
1826 my_off_t *b;
1827 HUFF_ELEMENT *new_huff_el;
1828
1829 /*
1830 Pop the top element from the queue (the one with the least
1831 incidence). Popping from a priority queue includes a re-ordering
1832 of the queue, to get the next least incidence element to the top.
1833 */
1834 a= (my_off_t*) queue_remove_top(&queue);
1835 /* Copy the next least incidence element. */
1836 b= (my_off_t*) queue_top(&queue);
1837 /* Create a new element in a local (automatic) buffer. */
1838 new_huff_el= element_buffer + i;
1839 /* The new element gets the sum of the two least incidence elements. */
1840 new_huff_el->count= *a + *b;
1841 /*
1842 The Huffman algorithm assigns another bit to the code for a byte
1843 every time that bytes incidence is combined (directly or indirectly)
1844 to a new element as one of the two least incidence elements.
1845 This means that one more bit per incidence of that byte is required
1846 in the resulting file. So we add the new combined incidence as the
1847 number of bits by which the result grows.
1848 */
1849 bits_packed+=(uint) (new_huff_el->count & 7);
1850 bytes_packed+=new_huff_el->count/8;
1851 /*
1852 Replace the copied top element by the new element and re-order the
1853 queue. This successively replaces the references to counts by
1854 references to HUFF_ELEMENTs.
1855 */
1856 queue_top(&queue)= (uchar*) new_huff_el;
1857 queue_replace_top(&queue);
1858 }
1859 DBUG_RETURN(bytes_packed+(bits_packed+7)/8);
1860}
1861
1862
1863 /* Remove trees that don't give any compression */
1864
1865static uint join_same_trees(HUFF_COUNTS *huff_counts, uint trees)
1866{
1867 uint k,tree_number;
1868 HUFF_COUNTS count,*i,*j,*last_count;
1869
1870 last_count=huff_counts+trees;
1871 for (tree_number=0, i=huff_counts ; i < last_count ; i++)
1872 {
1873 if (!i->tree->tree_number)
1874 {
1875 i->tree->tree_number= ++tree_number;
1876 if (i->tree_buff)
1877 continue; /* Don't join intervall */
1878 for (j=i+1 ; j < last_count ; j++)
1879 {
1880 if (! j->tree->tree_number && ! j->tree_buff)
1881 {
1882 for (k=0 ; k < 256 ; k++)
1883 count.counts[k]=i->counts[k]+j->counts[k];
1884 if (calc_packed_length(&count,1) <=
1885 i->tree->bytes_packed + j->tree->bytes_packed+
1886 i->tree->tree_pack_length+j->tree->tree_pack_length+
1887 ALLOWED_JOIN_DIFF)
1888 {
1889 memcpy(i->counts, count.counts,
1890 sizeof(count.counts[0])*256);
1891 my_free(j->tree->element_buffer);
1892 j->tree->element_buffer=0;
1893 j->tree=i->tree;
1894 bmove((uchar*) i->counts,(uchar*) count.counts,
1895 sizeof(count.counts[0])*256);
1896 if (make_huff_tree(i->tree,i))
1897 return (uint) -1;
1898 }
1899 }
1900 }
1901 }
1902 }
1903 DBUG_PRINT("info", ("Original trees: %d After join: %d",
1904 trees, tree_number));
1905 if (verbose)
1906 printf("Original trees: %d After join: %d\n", trees, tree_number);
1907 return tree_number; /* Return trees left */
1908}
1909
1910
1911/*
1912 Fill in huff_tree encode tables.
1913
1914 SYNOPSIS
1915 make_huff_decode_table()
1916 huff_tree An array of HUFF_TREE which are to be encoded.
1917 trees The number of HUFF_TREE in the array.
1918
1919 RETURN
1920 0 success
1921 != 0 error
1922*/
1923
1924static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees)
1925{
1926 uint elements;
1927 for ( ; trees-- ; huff_tree++)
1928 {
1929 if (huff_tree->tree_number > 0)
1930 {
1931 elements=huff_tree->counts->tree_buff ? huff_tree->elements : 256;
1932 if (!(huff_tree->code =
1933 (ulonglong*) my_malloc(elements*
1934 (sizeof(ulonglong) + sizeof(uchar)),
1935 MYF(MY_WME | MY_ZEROFILL))))
1936 return 1;
1937 huff_tree->code_len=(uchar*) (huff_tree->code+elements);
1938 make_traverse_code_tree(huff_tree, huff_tree->root,
1939 8 * sizeof(ulonglong), 0);
1940 }
1941 }
1942 return 0;
1943}
1944
1945
1946static void make_traverse_code_tree(HUFF_TREE *huff_tree,
1947 HUFF_ELEMENT *element,
1948 uint size, ulonglong code)
1949{
1950 uint chr;
1951 if (!element->a.leaf.null)
1952 {
1953 chr=element->a.leaf.element_nr;
1954 huff_tree->code_len[chr]= (uchar) (8 * sizeof(ulonglong) - size);
1955 huff_tree->code[chr]= (code >> size);
1956 if (huff_tree->height < 8 * sizeof(ulonglong) - size)
1957 huff_tree->height= 8 * sizeof(ulonglong) - size;
1958 }
1959 else
1960 {
1961 size--;
1962 make_traverse_code_tree(huff_tree,element->a.nod.left,size,code);
1963 make_traverse_code_tree(huff_tree, element->a.nod.right, size,
1964 code + (((ulonglong) 1) << size));
1965 }
1966 return;
1967}
1968
1969
1970/*
1971 Convert a value into binary digits.
1972
1973 SYNOPSIS
1974 bindigits()
1975 value The value.
1976 length The number of low order bits to convert.
1977
1978 NOTE
1979 The result string is in static storage. It is reused on every call.
1980 So you cannot use it twice in one expression.
1981
1982 RETURN
1983 A pointer to a static NUL-terminated string.
1984 */
1985
1986static char *bindigits(ulonglong value, uint bits)
1987{
1988 static char digits[72];
1989 char *ptr= digits;
1990 uint idx= bits;
1991
1992 DBUG_ASSERT(idx < sizeof(digits));
1993 while (idx)
1994 *(ptr++)= '0' + ((char) (value >> (--idx)) & (char) 1);
1995 *ptr= '\0';
1996 return digits;
1997}
1998
1999
2000/*
2001 Convert a value into hexadecimal digits.
2002
2003 SYNOPSIS
2004 hexdigits()
2005 value The value.
2006
2007 NOTE
2008 The result string is in static storage. It is reused on every call.
2009 So you cannot use it twice in one expression.
2010
2011 RETURN
2012 A pointer to a static NUL-terminated string.
2013 */
2014
2015static char *hexdigits(ulonglong value)
2016{
2017 static char digits[20];
2018 char *ptr= digits;
2019 uint idx= 2 * sizeof(value); /* Two hex digits per byte. */
2020
2021 DBUG_ASSERT(idx < sizeof(digits));
2022 while (idx)
2023 {
2024 if ((*(ptr++)= '0' + ((char) (value >> (4 * (--idx))) & (char) 0xf)) > '9')
2025 *(ptr - 1)+= 'a' - '9' - 1;
2026 }
2027 *ptr= '\0';
2028 return digits;
2029}
2030
2031
2032 /* Write header to new packed data file */
2033
2034static int write_header(PACK_MRG_INFO *mrg,uint head_length,uint trees,
2035 my_off_t tot_elements,my_off_t filelength)
2036{
2037 uchar *buff= (uchar*) file_buffer.pos;
2038
2039 bzero(buff,HEAD_LENGTH);
2040 memcpy(buff,myisam_pack_file_magic,4);
2041 int4store(buff+4,head_length);
2042 int4store(buff+8, mrg->min_pack_length);
2043 int4store(buff+12,mrg->max_pack_length);
2044 int4store(buff+16,tot_elements);
2045 int4store(buff+20,intervall_length);
2046 int2store(buff+24,trees);
2047 buff[26]=(char) mrg->ref_length;
2048 /* Save record pointer length */
2049 buff[27]= (uchar) mi_get_pointer_length((ulonglong) filelength,2);
2050 if (test_only)
2051 return 0;
2052 my_seek(file_buffer.file,0L,MY_SEEK_SET,MYF(0));
2053 return my_write(file_buffer.file,(const uchar *) file_buffer.pos,HEAD_LENGTH,
2054 MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0;
2055}
2056
2057 /* Write fieldinfo to new packed file */
2058
2059static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees)
2060{
2061 reg1 uint i;
2062 uint huff_tree_bits;
2063 huff_tree_bits=max_bit(trees ? trees-1 : 0);
2064
2065 DBUG_PRINT("info", (" "));
2066 DBUG_PRINT("info", ("column types:"));
2067 DBUG_PRINT("info", ("FIELD_NORMAL 0"));
2068 DBUG_PRINT("info", ("FIELD_SKIP_ENDSPACE 1"));
2069 DBUG_PRINT("info", ("FIELD_SKIP_PRESPACE 2"));
2070 DBUG_PRINT("info", ("FIELD_SKIP_ZERO 3"));
2071 DBUG_PRINT("info", ("FIELD_BLOB 4"));
2072 DBUG_PRINT("info", ("FIELD_CONSTANT 5"));
2073 DBUG_PRINT("info", ("FIELD_INTERVALL 6"));
2074 DBUG_PRINT("info", ("FIELD_ZERO 7"));
2075 DBUG_PRINT("info", ("FIELD_VARCHAR 8"));
2076 DBUG_PRINT("info", ("FIELD_CHECK 9"));
2077 DBUG_PRINT("info", (" "));
2078 DBUG_PRINT("info", ("pack type as a set of flags:"));
2079 DBUG_PRINT("info", ("PACK_TYPE_SELECTED 1"));
2080 DBUG_PRINT("info", ("PACK_TYPE_SPACE_FIELDS 2"));
2081 DBUG_PRINT("info", ("PACK_TYPE_ZERO_FILL 4"));
2082 DBUG_PRINT("info", (" "));
2083 if (verbose >= 2)
2084 {
2085 printf("\n");
2086 printf("column types:\n");
2087 printf("FIELD_NORMAL 0\n");
2088 printf("FIELD_SKIP_ENDSPACE 1\n");
2089 printf("FIELD_SKIP_PRESPACE 2\n");
2090 printf("FIELD_SKIP_ZERO 3\n");
2091 printf("FIELD_BLOB 4\n");
2092 printf("FIELD_CONSTANT 5\n");
2093 printf("FIELD_INTERVALL 6\n");
2094 printf("FIELD_ZERO 7\n");
2095 printf("FIELD_VARCHAR 8\n");
2096 printf("FIELD_CHECK 9\n");
2097 printf("\n");
2098 printf("pack type as a set of flags:\n");
2099 printf("PACK_TYPE_SELECTED 1\n");
2100 printf("PACK_TYPE_SPACE_FIELDS 2\n");
2101 printf("PACK_TYPE_ZERO_FILL 4\n");
2102 printf("\n");
2103 }
2104 for (i=0 ; i++ < fields ; counts++)
2105 {
2106 write_bits((ulonglong) (int) counts->field_type, 5);
2107 write_bits(counts->pack_type,6);
2108 if (counts->pack_type & PACK_TYPE_ZERO_FILL)
2109 write_bits(counts->max_zero_fill,5);
2110 else
2111 write_bits(counts->length_bits,5);
2112 write_bits((ulonglong) counts->tree->tree_number - 1, huff_tree_bits);
2113 DBUG_PRINT("info", ("column: %3u type: %2u pack: %2u zero: %4u "
2114 "lbits: %2u tree: %2u length: %4u",
2115 i , counts->field_type, counts->pack_type,
2116 counts->max_zero_fill, counts->length_bits,
2117 counts->tree->tree_number, counts->field_length));
2118 if (verbose >= 2)
2119 printf("column: %3u type: %2u pack: %2u zero: %4u lbits: %2u "
2120 "tree: %2u length: %4u\n", i , counts->field_type,
2121 counts->pack_type, counts->max_zero_fill, counts->length_bits,
2122 counts->tree->tree_number, counts->field_length);
2123 }
2124 flush_bits();
2125 return;
2126}
2127
2128 /* Write all huff_trees to new datafile. Return tot count of
2129 elements in all trees
2130 Returns 0 on error */
2131
2132static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees)
2133{
2134 uint i,int_length;
2135 uint tree_no;
2136 uint codes;
2137 uint errors= 0;
2138 uint *packed_tree,*offset,length;
2139 my_off_t elements;
2140
2141 /* Find the highest number of elements in the trees. */
2142 for (i=length=0 ; i < trees ; i++)
2143 if (huff_tree[i].tree_number > 0 && huff_tree[i].elements > length)
2144 length=huff_tree[i].elements;
2145 /*
2146 Allocate a buffer for packing a decode tree. Two numbers per element
2147 (left child and right child).
2148 */
2149 if (!(packed_tree=(uint*) my_alloca(sizeof(uint)*length*2)))
2150 {
2151 my_error(EE_OUTOFMEMORY, MYF(ME_BELL+ME_FATALERROR),
2152 sizeof(uint)*length*2);
2153 return 0;
2154 }
2155
2156 DBUG_PRINT("info", (" "));
2157 if (verbose >= 2)
2158 printf("\n");
2159 tree_no= 0;
2160 intervall_length=0;
2161 for (elements=0; trees-- ; huff_tree++)
2162 {
2163 /* Skip columns that have been joined with other columns. */
2164 if (huff_tree->tree_number == 0)
2165 continue; /* Deleted tree */
2166 tree_no++;
2167 DBUG_PRINT("info", (" "));
2168 if (verbose >= 3)
2169 printf("\n");
2170 /* Count the total number of elements (byte codes or column values). */
2171 elements+=huff_tree->elements;
2172 huff_tree->max_offset=2;
2173 /* Build a tree of offsets and codes for decoding in 'packed_tree'. */
2174 if (huff_tree->elements <= 1)
2175 offset=packed_tree;
2176 else
2177 offset=make_offset_code_tree(huff_tree,huff_tree->root,packed_tree);
2178
2179 /* This should be the same as 'length' above. */
2180 huff_tree->offset_bits=max_bit(huff_tree->max_offset);
2181
2182 /*
2183 Since we check this during collecting the distinct column values,
2184 this should never happen.
2185 */
2186 if (huff_tree->max_offset >= IS_OFFSET)
2187 { /* This should be impossible */
2188 (void) fprintf(stderr, "Tree offset got too big: %d, aborted\n",
2189 huff_tree->max_offset);
2190 my_afree((uchar*) packed_tree);
2191 return 0;
2192 }
2193
2194 DBUG_PRINT("info", ("pos: %lu elements: %u tree-elements: %lu "
2195 "char_bits: %u\n",
2196 (ulong) (file_buffer.pos - file_buffer.buffer),
2197 huff_tree->elements, (ulong) (offset - packed_tree),
2198 huff_tree->char_bits));
2199 if (!huff_tree->counts->tree_buff)
2200 {
2201 /* We do a byte compression on this column. Mark with bit 0. */
2202 write_bits(0,1);
2203 write_bits(huff_tree->min_chr,8);
2204 write_bits(huff_tree->elements,9);
2205 write_bits(huff_tree->char_bits,5);
2206 write_bits(huff_tree->offset_bits,5);
2207 int_length=0;
2208 }
2209 else
2210 {
2211 int_length=(uint) (huff_tree->counts->tree_pos -
2212 huff_tree->counts->tree_buff);
2213 /* We have distinct column values for this column. Mark with bit 1. */
2214 write_bits(1,1);
2215 write_bits(huff_tree->elements,15);
2216 write_bits(int_length,16);
2217 write_bits(huff_tree->char_bits,5);
2218 write_bits(huff_tree->offset_bits,5);
2219 intervall_length+=int_length;
2220 }
2221 DBUG_PRINT("info", ("tree: %2u elements: %4u char_bits: %2u "
2222 "offset_bits: %2u %s: %5u codelen: %2u",
2223 tree_no, huff_tree->elements, huff_tree->char_bits,
2224 huff_tree->offset_bits, huff_tree->counts->tree_buff ?
2225 "bufflen" : "min_chr", huff_tree->counts->tree_buff ?
2226 int_length : huff_tree->min_chr, huff_tree->height));
2227 if (verbose >= 2)
2228 printf("tree: %2u elements: %4u char_bits: %2u offset_bits: %2u "
2229 "%s: %5u codelen: %2u\n", tree_no, huff_tree->elements,
2230 huff_tree->char_bits, huff_tree->offset_bits,
2231 huff_tree->counts->tree_buff ? "bufflen" : "min_chr",
2232 huff_tree->counts->tree_buff ? int_length :
2233 huff_tree->min_chr, huff_tree->height);
2234
2235 /* Check that the code tree length matches the element count. */
2236 length=(uint) (offset-packed_tree);
2237 if (length != huff_tree->elements*2-2)
2238 {
2239 (void) fprintf(stderr, "error: Huff-tree-length: %d != calc_length: %d\n",
2240 length, huff_tree->elements * 2 - 2);
2241 errors++;
2242 break;
2243 }
2244
2245 for (i=0 ; i < length ; i++)
2246 {
2247 if (packed_tree[i] & IS_OFFSET)
2248 write_bits(packed_tree[i] - IS_OFFSET+ (1 << huff_tree->offset_bits),
2249 huff_tree->offset_bits+1);
2250 else
2251 write_bits(packed_tree[i]-huff_tree->min_chr,huff_tree->char_bits+1);
2252 DBUG_PRINT("info", ("tree[0x%04x]: %s0x%04x",
2253 i, (packed_tree[i] & IS_OFFSET) ?
2254 " -> " : "", (packed_tree[i] & IS_OFFSET) ?
2255 packed_tree[i] - IS_OFFSET + i : packed_tree[i]));
2256 if (verbose >= 3)
2257 printf("tree[0x%04x]: %s0x%04x\n",
2258 i, (packed_tree[i] & IS_OFFSET) ? " -> " : "",
2259 (packed_tree[i] & IS_OFFSET) ?
2260 packed_tree[i] - IS_OFFSET + i : packed_tree[i]);
2261 }
2262 flush_bits();
2263
2264 /*
2265 Display coding tables and check their correctness.
2266 */
2267 codes= huff_tree->counts->tree_buff ? huff_tree->elements : 256;
2268 for (i= 0; i < codes; i++)
2269 {
2270 ulonglong code;
2271 uint bits;
2272 uint len;
2273 uint idx;
2274
2275 if (! (len= huff_tree->code_len[i]))
2276 continue;
2277 DBUG_PRINT("info", ("code[0x%04x]: 0x%s bits: %2u bin: %s", i,
2278 hexdigits(huff_tree->code[i]), huff_tree->code_len[i],
2279 bindigits(huff_tree->code[i],
2280 huff_tree->code_len[i])));
2281 if (verbose >= 3)
2282 printf("code[0x%04x]: 0x%s bits: %2u bin: %s\n", i,
2283 hexdigits(huff_tree->code[i]), huff_tree->code_len[i],
2284 bindigits(huff_tree->code[i], huff_tree->code_len[i]));
2285
2286 /* Check that the encode table decodes correctly. */
2287 code= 0;
2288 bits= 0;
2289 idx= 0;
2290 DBUG_EXECUTE_IF("forcechkerr1", len--;);
2291 DBUG_EXECUTE_IF("forcechkerr2", bits= 8 * sizeof(code););
2292 DBUG_EXECUTE_IF("forcechkerr3", idx= length;);
2293 for (;;)
2294 {
2295 if (! len)
2296 {
2297 (void) fflush(stdout);
2298 (void) fprintf(stderr, "error: code 0x%s with %u bits not found\n",
2299 hexdigits(huff_tree->code[i]), huff_tree->code_len[i]);
2300 errors++;
2301 break;
2302 }
2303 code<<= 1;
2304 code|= (huff_tree->code[i] >> (--len)) & 1;
2305 bits++;
2306 if (bits > 8 * sizeof(code))
2307 {
2308 (void) fflush(stdout);
2309 (void) fprintf(stderr, "error: Huffman code too long: %u/%u\n",
2310 bits, (uint) (8 * sizeof(code)));
2311 errors++;
2312 break;
2313 }
2314 idx+= (uint) code & 1;
2315 if (idx >= length)
2316 {
2317 (void) fflush(stdout);
2318 (void) fprintf(stderr, "error: illegal tree offset: %u/%u\n",
2319 idx, length);
2320 errors++;
2321 break;
2322 }
2323 if (packed_tree[idx] & IS_OFFSET)
2324 idx+= packed_tree[idx] & ~IS_OFFSET;
2325 else
2326 break; /* Hit a leaf. This contains the result value. */
2327 }
2328 if (errors)
2329 break;
2330
2331 DBUG_EXECUTE_IF("forcechkerr4", packed_tree[idx]++;);
2332 if (packed_tree[idx] != i)
2333 {
2334 (void) fflush(stdout);
2335 (void) fprintf(stderr, "error: decoded value 0x%04x should be: 0x%04x\n",
2336 packed_tree[idx], i);
2337 errors++;
2338 break;
2339 }
2340 } /*end for (codes)*/
2341 if (errors)
2342 break;
2343
2344 /* Write column values in case of distinct column value compression. */
2345 if (huff_tree->counts->tree_buff)
2346 {
2347 for (i=0 ; i < int_length ; i++)
2348 {
2349 write_bits((ulonglong) (uchar) huff_tree->counts->tree_buff[i], 8);
2350 DBUG_PRINT("info", ("column_values[0x%04x]: 0x%02x",
2351 i, (uchar) huff_tree->counts->tree_buff[i]));
2352 if (verbose >= 3)
2353 printf("column_values[0x%04x]: 0x%02x\n",
2354 i, (uchar) huff_tree->counts->tree_buff[i]);
2355 }
2356 }
2357 flush_bits();
2358 }
2359 DBUG_PRINT("info", (" "));
2360 if (verbose >= 2)
2361 printf("\n");
2362 my_afree((uchar*) packed_tree);
2363 if (errors)
2364 {
2365 (void) fprintf(stderr, "Error: Generated decode trees are corrupt. Stop.\n");
2366 return 0;
2367 }
2368 return elements;
2369}
2370
2371
2372static uint *make_offset_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element,
2373 uint *offset)
2374{
2375 uint *prev_offset;
2376
2377 prev_offset= offset;
2378 /*
2379 'a.leaf.null' takes the same place as 'a.nod.left'. If this is null,
2380 then there is no left child and, hence no right child either. This
2381 is a property of a binary tree. An element is either a node with two
2382 childs, or a leaf without childs.
2383
2384 The current element is always a node with two childs. Go left first.
2385 */
2386 if (!element->a.nod.left->a.leaf.null)
2387 {
2388 /* Store the byte code or the index of the column value. */
2389 prev_offset[0] =(uint) element->a.nod.left->a.leaf.element_nr;
2390 offset+=2;
2391 }
2392 else
2393 {
2394 /*
2395 Recursively traverse the tree to the left. Mark it as an offset to
2396 another tree node (in contrast to a byte code or column value index).
2397 */
2398 prev_offset[0]= IS_OFFSET+2;
2399 offset=make_offset_code_tree(huff_tree,element->a.nod.left,offset+2);
2400 }
2401
2402 /* Now, check the right child. */
2403 if (!element->a.nod.right->a.leaf.null)
2404 {
2405 /* Store the byte code or the index of the column value. */
2406 prev_offset[1]=element->a.nod.right->a.leaf.element_nr;
2407 return offset;
2408 }
2409 else
2410 {
2411 /*
2412 Recursively traverse the tree to the right. Mark it as an offset to
2413 another tree node (in contrast to a byte code or column value index).
2414 */
2415 uint temp=(uint) (offset-prev_offset-1);
2416 prev_offset[1]= IS_OFFSET+ temp;
2417 if (huff_tree->max_offset < temp)
2418 huff_tree->max_offset = temp;
2419 return make_offset_code_tree(huff_tree,element->a.nod.right,offset);
2420 }
2421}
2422
2423 /* Get number of bits neaded to represent value */
2424
2425static uint max_bit(register uint value)
2426{
2427 reg2 uint power=1;
2428
2429 while ((value>>=1))
2430 power++;
2431 return (power);
2432}
2433
2434
2435static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
2436{
2437 int error;
2438 uint i,max_calc_length,pack_ref_length,min_record_length,max_record_length,
2439 intervall,field_length,max_pack_length,pack_blob_length;
2440 my_off_t record_count;
2441 char llbuf[32];
2442 ulong length,pack_length;
2443 uchar *record,*pos,*end_pos,*record_pos,*start_pos;
2444 HUFF_COUNTS *count,*end_count;
2445 HUFF_TREE *tree;
2446 MI_INFO *isam_file=mrg->file[0];
2447 uint pack_version= (uint) isam_file->s->pack.version;
2448 DBUG_ENTER("compress_isam_file");
2449
2450 /* Allocate a buffer for the records (excluding blobs). */
2451 if (!(record=(uchar*) my_alloca(isam_file->s->base.reclength)))
2452 return -1;
2453
2454 end_count=huff_counts+isam_file->s->base.fields;
2455 min_record_length= (uint) ~0;
2456 max_record_length=0;
2457
2458 /*
2459 Calculate the maximum number of bits required to pack the records.
2460 Remember to understand 'max_zero_fill' as 'min_zero_fill'.
2461 The tree height determines the maximum number of bits per value.
2462 Some fields skip leading or trailing spaces or zeroes. The skipped
2463 number of bytes is encoded by 'length_bits' bits.
2464 Empty blobs and varchar are encoded with a single 1 bit. Other blobs
2465 and varchar get a leading 0 bit.
2466 */
2467 for (i=max_calc_length=0 ; i < isam_file->s->base.fields ; i++)
2468 {
2469 if (!(huff_counts[i].pack_type & PACK_TYPE_ZERO_FILL))
2470 huff_counts[i].max_zero_fill=0;
2471 if (huff_counts[i].field_type == FIELD_CONSTANT ||
2472 huff_counts[i].field_type == FIELD_ZERO ||
2473 huff_counts[i].field_type == FIELD_CHECK)
2474 continue;
2475 if (huff_counts[i].field_type == FIELD_INTERVALL)
2476 max_calc_length+=huff_counts[i].tree->height;
2477 else if (huff_counts[i].field_type == FIELD_BLOB ||
2478 huff_counts[i].field_type == FIELD_VARCHAR)
2479 max_calc_length+=huff_counts[i].tree->height*huff_counts[i].max_length + huff_counts[i].length_bits +1;
2480 else
2481 max_calc_length+=
2482 (huff_counts[i].field_length - huff_counts[i].max_zero_fill)*
2483 huff_counts[i].tree->height+huff_counts[i].length_bits;
2484 }
2485 max_calc_length= (max_calc_length + 7) / 8;
2486 pack_ref_length= calc_pack_length(pack_version, max_calc_length);
2487 record_count=0;
2488 /* 'max_blob_length' is the max length of all blobs of a record. */
2489 pack_blob_length= isam_file->s->base.blobs ?
2490 calc_pack_length(pack_version, mrg->max_blob_length) : 0;
2491 max_pack_length=pack_ref_length+pack_blob_length;
2492
2493 DBUG_PRINT("fields", ("==="));
2494 mrg_reset(mrg);
2495 while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE)
2496 {
2497 ulong tot_blob_length=0;
2498 if (! error)
2499 {
2500 if (flush_buffer((ulong) max_calc_length + (ulong) max_pack_length))
2501 break;
2502 record_pos= (uchar*) file_buffer.pos;
2503 file_buffer.pos+=max_pack_length;
2504 for (start_pos=record, count= huff_counts; count < end_count ; count++)
2505 {
2506 end_pos=start_pos+(field_length=count->field_length);
2507 tree=count->tree;
2508
2509 DBUG_PRINT("fields", ("column: %3lu type: %2u pack: %2u zero: %4u "
2510 "lbits: %2u tree: %2u length: %4u",
2511 (ulong) (count - huff_counts + 1),
2512 count->field_type,
2513 count->pack_type, count->max_zero_fill,
2514 count->length_bits, count->tree->tree_number,
2515 count->field_length));
2516
2517 /* Check if the column contains spaces only. */
2518 if (count->pack_type & PACK_TYPE_SPACE_FIELDS)
2519 {
2520 for (pos=start_pos ; *pos == ' ' && pos < end_pos; pos++) ;
2521 if (pos == end_pos)
2522 {
2523 DBUG_PRINT("fields",
2524 ("PACK_TYPE_SPACE_FIELDS spaces only, bits: 1"));
2525 DBUG_PRINT("fields", ("---"));
2526 write_bits(1,1);
2527 start_pos=end_pos;
2528 continue;
2529 }
2530 DBUG_PRINT("fields",
2531 ("PACK_TYPE_SPACE_FIELDS not only spaces, bits: 1"));
2532 write_bits(0,1);
2533 }
2534 end_pos-=count->max_zero_fill;
2535 field_length-=count->max_zero_fill;
2536
2537 switch (count->field_type) {
2538 case FIELD_SKIP_ZERO:
2539 if (!memcmp((uchar*) start_pos,zero_string,field_length))
2540 {
2541 DBUG_PRINT("fields", ("FIELD_SKIP_ZERO zeroes only, bits: 1"));
2542 write_bits(1,1);
2543 start_pos=end_pos;
2544 break;
2545 }
2546 DBUG_PRINT("fields", ("FIELD_SKIP_ZERO not only zeroes, bits: 1"));
2547 write_bits(0,1);
2548 /* Fall through */
2549 case FIELD_NORMAL:
2550 DBUG_PRINT("fields", ("FIELD_NORMAL %lu bytes",
2551 (ulong) (end_pos - start_pos)));
2552 for ( ; start_pos < end_pos ; start_pos++)
2553 {
2554 DBUG_PRINT("fields",
2555 ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
2556 (uchar) *start_pos,
2557 hexdigits(tree->code[(uchar) *start_pos]),
2558 (uint) tree->code_len[(uchar) *start_pos],
2559 bindigits(tree->code[(uchar) *start_pos],
2560 (uint) tree->code_len[(uchar) *start_pos])));
2561 write_bits(tree->code[(uchar) *start_pos],
2562 (uint) tree->code_len[(uchar) *start_pos]);
2563 }
2564 break;
2565 case FIELD_SKIP_ENDSPACE:
2566 for (pos=end_pos ; pos > start_pos && pos[-1] == ' ' ; pos--) ;
2567 length= (ulong) (end_pos - pos);
2568 if (count->pack_type & PACK_TYPE_SELECTED)
2569 {
2570 if (length > count->min_space)
2571 {
2572 DBUG_PRINT("fields",
2573 ("FIELD_SKIP_ENDSPACE more than min_space, bits: 1"));
2574 DBUG_PRINT("fields",
2575 ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u",
2576 length, field_length, count->length_bits));
2577 write_bits(1,1);
2578 write_bits(length,count->length_bits);
2579 }
2580 else
2581 {
2582 DBUG_PRINT("fields",
2583 ("FIELD_SKIP_ENDSPACE not more than min_space, "
2584 "bits: 1"));
2585 write_bits(0,1);
2586 pos=end_pos;
2587 }
2588 }
2589 else
2590 {
2591 DBUG_PRINT("fields",
2592 ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u",
2593 length, field_length, count->length_bits));
2594 write_bits(length,count->length_bits);
2595 }
2596 /* Encode all significant bytes. */
2597 DBUG_PRINT("fields", ("FIELD_SKIP_ENDSPACE %lu bytes",
2598 (ulong) (pos - start_pos)));
2599 for ( ; start_pos < pos ; start_pos++)
2600 {
2601 DBUG_PRINT("fields",
2602 ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
2603 (uchar) *start_pos,
2604 hexdigits(tree->code[(uchar) *start_pos]),
2605 (uint) tree->code_len[(uchar) *start_pos],
2606 bindigits(tree->code[(uchar) *start_pos],
2607 (uint) tree->code_len[(uchar) *start_pos])));
2608 write_bits(tree->code[(uchar) *start_pos],
2609 (uint) tree->code_len[(uchar) *start_pos]);
2610 }
2611 start_pos=end_pos;
2612 break;
2613 case FIELD_SKIP_PRESPACE:
2614 for (pos=start_pos ; pos < end_pos && pos[0] == ' ' ; pos++) ;
2615 length= (ulong) (pos - start_pos);
2616 if (count->pack_type & PACK_TYPE_SELECTED)
2617 {
2618 if (length > count->min_space)
2619 {
2620 DBUG_PRINT("fields",
2621 ("FIELD_SKIP_PRESPACE more than min_space, bits: 1"));
2622 DBUG_PRINT("fields",
2623 ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u",
2624 length, field_length, count->length_bits));
2625 write_bits(1,1);
2626 write_bits(length,count->length_bits);
2627 }
2628 else
2629 {
2630 DBUG_PRINT("fields",
2631 ("FIELD_SKIP_PRESPACE not more than min_space, "
2632 "bits: 1"));
2633 pos=start_pos;
2634 write_bits(0,1);
2635 }
2636 }
2637 else
2638 {
2639 DBUG_PRINT("fields",
2640 ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u",
2641 length, field_length, count->length_bits));
2642 write_bits(length,count->length_bits);
2643 }
2644 /* Encode all significant bytes. */
2645 DBUG_PRINT("fields", ("FIELD_SKIP_PRESPACE %lu bytes",
2646 (ulong) (end_pos - start_pos)));
2647 for (start_pos=pos ; start_pos < end_pos ; start_pos++)
2648 {
2649 DBUG_PRINT("fields",
2650 ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
2651 (uchar) *start_pos,
2652 hexdigits(tree->code[(uchar) *start_pos]),
2653 (uint) tree->code_len[(uchar) *start_pos],
2654 bindigits(tree->code[(uchar) *start_pos],
2655 (uint) tree->code_len[(uchar) *start_pos])));
2656 write_bits(tree->code[(uchar) *start_pos],
2657 (uint) tree->code_len[(uchar) *start_pos]);
2658 }
2659 break;
2660 case FIELD_CONSTANT:
2661 case FIELD_ZERO:
2662 case FIELD_CHECK:
2663 DBUG_PRINT("fields", ("FIELD_CONSTANT/ZERO/CHECK"));
2664 start_pos=end_pos;
2665 break;
2666 case FIELD_INTERVALL:
2667 global_count=count;
2668 pos=(uchar*) tree_search(&count->int_tree, start_pos,
2669 count->int_tree.custom_arg);
2670 intervall=(uint) (pos - count->tree_buff)/field_length;
2671 DBUG_PRINT("fields", ("FIELD_INTERVALL"));
2672 DBUG_PRINT("fields", ("index: %4u code: 0x%s bits: %2u",
2673 intervall, hexdigits(tree->code[intervall]),
2674 (uint) tree->code_len[intervall]));
2675 write_bits(tree->code[intervall],(uint) tree->code_len[intervall]);
2676 start_pos=end_pos;
2677 break;
2678 case FIELD_BLOB:
2679 {
2680 ulong blob_length=_mi_calc_blob_length(field_length-
2681 portable_sizeof_char_ptr,
2682 start_pos);
2683 /* Empty blobs are encoded with a single 1 bit. */
2684 if (!blob_length)
2685 {
2686 DBUG_PRINT("fields", ("FIELD_BLOB empty, bits: 1"));
2687 write_bits(1,1);
2688 }
2689 else
2690 {
2691 uchar *blob,*blob_end;
2692 DBUG_PRINT("fields", ("FIELD_BLOB not empty, bits: 1"));
2693 write_bits(0,1);
2694 /* Write the blob length. */
2695 DBUG_PRINT("fields", ("FIELD_BLOB %lu bytes, bits: %2u",
2696 blob_length, count->length_bits));
2697 write_bits(blob_length,count->length_bits);
2698 memcpy(&blob, end_pos-portable_sizeof_char_ptr, sizeof(char*));
2699 blob_end=blob+blob_length;
2700 /* Encode the blob bytes. */
2701 for ( ; blob < blob_end ; blob++)
2702 {
2703 DBUG_PRINT("fields",
2704 ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
2705 (uchar) *blob, hexdigits(tree->code[(uchar) *blob]),
2706 (uint) tree->code_len[(uchar) *blob],
2707 bindigits(tree->code[(uchar) *start_pos],
2708 (uint)tree->code_len[(uchar) *start_pos])));
2709 write_bits(tree->code[(uchar) *blob],
2710 (uint) tree->code_len[(uchar) *blob]);
2711 }
2712 tot_blob_length+=blob_length;
2713 }
2714 start_pos= end_pos;
2715 break;
2716 }
2717 case FIELD_VARCHAR:
2718 {
2719 uint var_pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1);
2720 ulong col_length= (var_pack_length == 1 ?
2721 (uint) *(uchar*) start_pos :
2722 uint2korr(start_pos));
2723 /* Empty varchar are encoded with a single 1 bit. */
2724 if (!col_length)
2725 {
2726 DBUG_PRINT("fields", ("FIELD_VARCHAR empty, bits: 1"));
2727 write_bits(1,1); /* Empty varchar */
2728 }
2729 else
2730 {
2731 uchar *end= start_pos + var_pack_length + col_length;
2732 DBUG_PRINT("fields", ("FIELD_VARCHAR not empty, bits: 1"));
2733 write_bits(0,1);
2734 /* Write the varchar length. */
2735 DBUG_PRINT("fields", ("FIELD_VARCHAR %lu bytes, bits: %2u",
2736 col_length, count->length_bits));
2737 write_bits(col_length,count->length_bits);
2738 /* Encode the varchar bytes. */
2739 for (start_pos+= var_pack_length ; start_pos < end ; start_pos++)
2740 {
2741 DBUG_PRINT("fields",
2742 ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
2743 (uchar) *start_pos,
2744 hexdigits(tree->code[(uchar) *start_pos]),
2745 (uint) tree->code_len[(uchar) *start_pos],
2746 bindigits(tree->code[(uchar) *start_pos],
2747 (uint)tree->code_len[(uchar) *start_pos])));
2748 write_bits(tree->code[(uchar) *start_pos],
2749 (uint) tree->code_len[(uchar) *start_pos]);
2750 }
2751 }
2752 start_pos= end_pos;
2753 break;
2754 }
2755 case FIELD_LAST:
2756 case FIELD_enum_val_count:
2757 abort(); /* Impossible */
2758 }
2759 start_pos+=count->max_zero_fill;
2760 DBUG_PRINT("fields", ("---"));
2761 }
2762 flush_bits();
2763 length=(ulong) ((uchar*) file_buffer.pos - record_pos) - max_pack_length;
2764 pack_length= save_pack_length(pack_version, record_pos, length);
2765 if (pack_blob_length)
2766 pack_length+= save_pack_length(pack_version, record_pos + pack_length,
2767 tot_blob_length);
2768 DBUG_PRINT("fields", ("record: %lu length: %lu blob-length: %lu "
2769 "length-bytes: %lu", (ulong) record_count, length,
2770 tot_blob_length, pack_length));
2771 DBUG_PRINT("fields", ("==="));
2772
2773 /* Correct file buffer if the header was smaller */
2774 if (pack_length != max_pack_length)
2775 {
2776 bmove(record_pos+pack_length,record_pos+max_pack_length,length);
2777 file_buffer.pos-= (max_pack_length-pack_length);
2778 }
2779 if (length < (ulong) min_record_length)
2780 min_record_length=(uint) length;
2781 if (length > (ulong) max_record_length)
2782 max_record_length=(uint) length;
2783 record_count++;
2784 if (write_loop && record_count % WRITE_COUNT == 0)
2785 {
2786 printf("%lu\r", (ulong) record_count);
2787 (void) fflush(stdout);
2788 }
2789 }
2790 else if (error != HA_ERR_RECORD_DELETED)
2791 break;
2792 }
2793 if (error == HA_ERR_END_OF_FILE)
2794 error=0;
2795 else
2796 {
2797 (void) fprintf(stderr, "%s: Got error %d reading records\n",
2798 my_progname, error);
2799 }
2800 if (verbose >= 2)
2801 printf("wrote %s records.\n", llstr((longlong) record_count, llbuf));
2802
2803 my_afree((uchar*) record);
2804 mrg->ref_length=max_pack_length;
2805 mrg->min_pack_length=max_record_length ? min_record_length : 0;
2806 mrg->max_pack_length=max_record_length;
2807 DBUG_RETURN(error || error_on_write || flush_buffer(~(ulong) 0));
2808}
2809
2810
2811static char *make_new_name(char *new_name, char *old_name)
2812{
2813 return fn_format(new_name,old_name,"",DATA_TMP_EXT,2+4);
2814}
2815
2816static char *make_old_name(char *new_name, char *old_name)
2817{
2818 return fn_format(new_name,old_name,"",OLD_EXT,2+4);
2819}
2820
2821 /* rutines for bit writing buffer */
2822
2823static void init_file_buffer(File file, pbool read_buffer)
2824{
2825 file_buffer.file=file;
2826 file_buffer.buffer= (uchar*) my_malloc(ALIGN_SIZE(RECORD_CACHE_SIZE),
2827 MYF(MY_WME));
2828 file_buffer.end=file_buffer.buffer+ALIGN_SIZE(RECORD_CACHE_SIZE)-8;
2829 file_buffer.pos_in_file=0;
2830 error_on_write=0;
2831 if (read_buffer)
2832 {
2833
2834 file_buffer.pos=file_buffer.end;
2835 file_buffer.bits=0;
2836 }
2837 else
2838 {
2839 file_buffer.pos=file_buffer.buffer;
2840 file_buffer.bits=BITS_SAVED;
2841 }
2842 file_buffer.bitbucket= 0;
2843}
2844
2845
2846static int flush_buffer(ulong neaded_length)
2847{
2848 ulong length;
2849
2850 /*
2851 file_buffer.end is 8 bytes lower than the real end of the buffer.
2852 This is done so that the end-of-buffer condition does not need to be
2853 checked for every byte (see write_bits()). Consequently,
2854 file_buffer.pos can become greater than file_buffer.end. The
2855 algorithms in the other functions ensure that there will never be
2856 more than 8 bytes written to the buffer without an end-of-buffer
2857 check. So the buffer cannot be overrun. But we need to check for the
2858 near-to-buffer-end condition to avoid a negative result, which is
2859 casted to unsigned and thus becomes giant.
2860 */
2861 if ((file_buffer.pos < file_buffer.end) &&
2862 ((ulong) (file_buffer.end - file_buffer.pos) > neaded_length))
2863 return 0;
2864 length=(ulong) (file_buffer.pos-file_buffer.buffer);
2865 file_buffer.pos=file_buffer.buffer;
2866 file_buffer.pos_in_file+=length;
2867 if (test_only)
2868 return 0;
2869 if (error_on_write|| my_write(file_buffer.file,
2870 (const uchar*) file_buffer.buffer,
2871 length,
2872 MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
2873 {
2874 error_on_write=1;
2875 return 1;
2876 }
2877
2878 if (neaded_length != ~(ulong) 0 &&
2879 (ulong) (file_buffer.end-file_buffer.buffer) < neaded_length)
2880 {
2881 char *tmp;
2882 neaded_length+=256; /* some margin */
2883 tmp= my_realloc((char*) file_buffer.buffer, neaded_length,MYF(MY_WME));
2884 if (!tmp)
2885 return 1;
2886 file_buffer.pos= ((uchar*) tmp +
2887 (ulong) (file_buffer.pos - file_buffer.buffer));
2888 file_buffer.buffer= (uchar*) tmp;
2889 file_buffer.end= (uchar*) (tmp+neaded_length-8);
2890 }
2891 return 0;
2892}
2893
2894
2895static void end_file_buffer(void)
2896{
2897 my_free(file_buffer.buffer);
2898}
2899
2900 /* output `bits` low bits of `value' */
2901
2902static void write_bits(register ulonglong value, register uint bits)
2903{
2904 DBUG_ASSERT(((bits < 8 * sizeof(value)) && ! (value >> bits)) ||
2905 (bits == 8 * sizeof(value)));
2906
2907 if ((file_buffer.bits-= (int) bits) >= 0)
2908 {
2909 file_buffer.bitbucket|= value << file_buffer.bits;
2910 }
2911 else
2912 {
2913 reg3 ulonglong bit_buffer;
2914 bits= (uint) -file_buffer.bits;
2915 bit_buffer= (file_buffer.bitbucket |
2916 ((bits != 8 * sizeof(value)) ? (value >> bits) : 0));
2917#if BITS_SAVED == 64
2918 *file_buffer.pos++= (uchar) (bit_buffer >> 56);
2919 *file_buffer.pos++= (uchar) (bit_buffer >> 48);
2920 *file_buffer.pos++= (uchar) (bit_buffer >> 40);
2921 *file_buffer.pos++= (uchar) (bit_buffer >> 32);
2922#endif
2923 *file_buffer.pos++= (uchar) (bit_buffer >> 24);
2924 *file_buffer.pos++= (uchar) (bit_buffer >> 16);
2925 *file_buffer.pos++= (uchar) (bit_buffer >> 8);
2926 *file_buffer.pos++= (uchar) (bit_buffer);
2927
2928 if (bits != 8 * sizeof(value))
2929 value&= (((ulonglong) 1) << bits) - 1;
2930 if (file_buffer.pos >= file_buffer.end)
2931 (void) flush_buffer(~ (ulong) 0);
2932 file_buffer.bits=(int) (BITS_SAVED - bits);
2933 file_buffer.bitbucket= value << (BITS_SAVED - bits);
2934 }
2935 return;
2936}
2937
2938 /* Flush bits in bit_buffer to buffer */
2939
2940static void flush_bits(void)
2941{
2942 int bits;
2943 ulonglong bit_buffer;
2944
2945 bits= file_buffer.bits & ~7;
2946 bit_buffer= file_buffer.bitbucket >> bits;
2947 bits= BITS_SAVED - bits;
2948 while (bits > 0)
2949 {
2950 bits-= 8;
2951 *file_buffer.pos++= (uchar) (bit_buffer >> bits);
2952 }
2953 if (file_buffer.pos >= file_buffer.end)
2954 (void) flush_buffer(~ (ulong) 0);
2955 file_buffer.bits= BITS_SAVED;
2956 file_buffer.bitbucket= 0;
2957}
2958
2959
2960/****************************************************************************
2961** functions to handle the joined files
2962****************************************************************************/
2963
2964static int save_state(MI_INFO *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length,
2965 ha_checksum crc)
2966{
2967 MYISAM_SHARE *share=isam_file->s;
2968 uint options=mi_uint2korr(share->state.header.options);
2969 uint key;
2970 DBUG_ENTER("save_state");
2971
2972 options|= (HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA |
2973 (share->options & HA_OPTION_NULL_FIELDS));
2974 mi_int2store(share->state.header.options,options);
2975
2976 share->state.state.data_file_length=new_length;
2977 share->state.state.del=0;
2978 share->state.state.empty=0;
2979 share->state.dellink= HA_OFFSET_ERROR;
2980 share->state.split=(ha_rows) mrg->records;
2981 share->state.version=(ulong) time((time_t*) 0);
2982 if (! mi_is_all_keys_active(share->state.key_map, share->base.keys))
2983 {
2984 /*
2985 Some indexes are disabled, cannot use current key_file_length value
2986 as an estimate of upper bound of index file size. Use packed data file
2987 size instead.
2988 */
2989 share->state.state.key_file_length= new_length;
2990 }
2991 /*
2992 If there are no disabled indexes, keep key_file_length value from
2993 original file so "myisamchk -rq" can use this value (this is necessary
2994 because index size cannot be easily calculated for fulltext keys)
2995 */
2996 mi_clear_all_keys_active(share->state.key_map);
2997 for (key=0 ; key < share->base.keys ; key++)
2998 share->state.key_root[key]= HA_OFFSET_ERROR;
2999 for (key=0 ; key < share->state.header.max_block_size_index ; key++)
3000 share->state.key_del[key]= HA_OFFSET_ERROR;
3001 isam_file->state->checksum=crc; /* Save crc here */
3002 share->changed=1; /* Force write of header */
3003 share->state.open_count=0;
3004 share->global_changed=0;
3005 (void) my_chsize(share->kfile, share->base.keystart, 0, MYF(0));
3006 if (share->base.keys)
3007 isamchk_neaded=1;
3008 DBUG_RETURN(mi_state_info_write(share->kfile,&share->state,1+2));
3009}
3010
3011
3012static int save_state_mrg(File file,PACK_MRG_INFO *mrg,my_off_t new_length,
3013 ha_checksum crc)
3014{
3015 MI_STATE_INFO state;
3016 MI_INFO *isam_file=mrg->file[0];
3017 uint options;
3018 DBUG_ENTER("save_state_mrg");
3019
3020 state= isam_file->s->state;
3021 options= (mi_uint2korr(state.header.options) | HA_OPTION_COMPRESS_RECORD |
3022 HA_OPTION_READ_ONLY_DATA |
3023 (isam_file->s->options & HA_OPTION_NULL_FIELDS));
3024 mi_int2store(state.header.options,options);
3025 state.state.data_file_length=new_length;
3026 state.state.del=0;
3027 state.state.empty=0;
3028 state.state.records=state.split=(ha_rows) mrg->records;
3029 /* See comment above in save_state about key_file_length handling. */
3030 if (mrg->src_file_has_indexes_disabled)
3031 {
3032 isam_file->s->state.state.key_file_length=
3033 MY_MAX(isam_file->s->state.state.key_file_length, new_length);
3034 }
3035 state.dellink= HA_OFFSET_ERROR;
3036 state.version=(ulong) time((time_t*) 0);
3037 mi_clear_all_keys_active(state.key_map);
3038 state.state.checksum=crc;
3039 if (isam_file->s->base.keys)
3040 isamchk_neaded=1;
3041 state.changed=STATE_CHANGED | STATE_NOT_ANALYZED; /* Force check of table */
3042 DBUG_RETURN (mi_state_info_write(file,&state,1+2));
3043}
3044
3045
3046/* reset for mrg_rrnd */
3047
3048static void mrg_reset(PACK_MRG_INFO *mrg)
3049{
3050 if (mrg->current)
3051 {
3052 mi_extra(*mrg->current, HA_EXTRA_NO_CACHE, 0);
3053 mrg->current=0;
3054 }
3055}
3056
3057static int mrg_rrnd(PACK_MRG_INFO *info,uchar *buf)
3058{
3059 int error;
3060 MI_INFO *isam_info;
3061 my_off_t filepos;
3062
3063 if (!info->current)
3064 {
3065 isam_info= *(info->current=info->file);
3066 info->end=info->current+info->count;
3067 mi_reset(isam_info);
3068 mi_extra(isam_info, HA_EXTRA_CACHE, 0);
3069 filepos=isam_info->s->pack.header_length;
3070 }
3071 else
3072 {
3073 isam_info= *info->current;
3074 filepos= isam_info->nextpos;
3075 }
3076
3077 for (;;)
3078 {
3079 isam_info->update&= HA_STATE_CHANGED;
3080 if (!(error=(*isam_info->s->read_rnd)(isam_info,(uchar*) buf,
3081 filepos, 1)) ||
3082 error != HA_ERR_END_OF_FILE)
3083 return (error);
3084 mi_extra(isam_info,HA_EXTRA_NO_CACHE, 0);
3085 if (info->current+1 == info->end)
3086 return(HA_ERR_END_OF_FILE);
3087 info->current++;
3088 isam_info= *info->current;
3089 filepos=isam_info->s->pack.header_length;
3090 mi_reset(isam_info);
3091 mi_extra(isam_info,HA_EXTRA_CACHE, 0);
3092 }
3093}
3094
3095
3096static int mrg_close(PACK_MRG_INFO *mrg)
3097{
3098 uint i;
3099 int error=0;
3100 for (i=0 ; i < mrg->count ; i++)
3101 error|=mi_close(mrg->file[i]);
3102 if (mrg->free_file)
3103 my_free(mrg->file);
3104 return error;
3105}
3106
3107
3108#if !defined(DBUG_OFF)
3109/*
3110 Fake the counts to get big Huffman codes.
3111
3112 SYNOPSIS
3113 fakebigcodes()
3114 huff_counts A pointer to the counts array.
3115 end_count A pointer past the counts array.
3116
3117 DESCRIPTION
3118
3119 Huffman coding works by removing the two least frequent values from
3120 the list of values and add a new value with the sum of their
3121 incidences in a loop until only one value is left. Every time a
3122 value is reused for a new value, it gets one more bit for its
3123 encoding. Hence, the least frequent values get the longest codes.
3124
3125 To get a maximum code length for a value, two of the values must
3126 have an incidence of 1. As their sum is 2, the next infrequent value
3127 must have at least an incidence of 2, then 4, 8, 16 and so on. This
3128 means that one needs 2**n bytes (values) for a code length of n
3129 bits. However, using more distinct values forces the use of longer
3130 codes, or reaching the code length with less total bytes (values).
3131
3132 To get 64(32)-bit codes, I sort the counts by decreasing incidence.
3133 I assign counts of 1 to the two most frequent values, a count of 2
3134 for the next one, then 4, 8, and so on until 2**64-1(2**30-1). All
3135 the remaining values get 1. That way every possible byte has an
3136 assigned code, though not all codes are used if not all byte values
3137 are present in the column.
3138
3139 This strategy would work with distinct column values too, but
3140 requires that at least 64(32) values are present. To make things
3141 easier here, I cancel all distinct column values and force byte
3142 compression for all columns.
3143
3144 RETURN
3145 void
3146*/
3147
3148static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count)
3149{
3150 HUFF_COUNTS *count;
3151 my_off_t *cur_count_p;
3152 my_off_t *end_count_p;
3153 my_off_t **cur_sort_p;
3154 my_off_t **end_sort_p;
3155 my_off_t *sort_counts[256];
3156 my_off_t total;
3157 DBUG_ENTER("fakebigcodes");
3158
3159 for (count= huff_counts; count < end_count; count++)
3160 {
3161 /*
3162 Remove distinct column values.
3163 */
3164 if (huff_counts->tree_buff)
3165 {
3166 my_free(huff_counts->tree_buff);
3167 delete_tree(&huff_counts->int_tree, 0);
3168 huff_counts->tree_buff= NULL;
3169 DBUG_PRINT("fakebigcodes", ("freed distinct column values"));
3170 }
3171
3172 /*
3173 Sort counts by decreasing incidence.
3174 */
3175 cur_count_p= count->counts;
3176 end_count_p= cur_count_p + 256;
3177 cur_sort_p= sort_counts;
3178 while (cur_count_p < end_count_p)
3179 *(cur_sort_p++)= cur_count_p++;
3180 (void) my_qsort(sort_counts, 256, sizeof(my_off_t*), (qsort_cmp) fakecmp);
3181
3182 /*
3183 Assign faked counts.
3184 */
3185 cur_sort_p= sort_counts;
3186#if SIZEOF_LONG_LONG > 4
3187 end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 1;
3188#else
3189 end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 2;
3190#endif
3191 /* Most frequent value gets a faked count of 1. */
3192 **(cur_sort_p++)= 1;
3193 total= 1;
3194 while (cur_sort_p < end_sort_p)
3195 {
3196 **(cur_sort_p++)= total;
3197 total<<= 1;
3198 }
3199 /* Set the last value. */
3200 **(cur_sort_p++)= --total;
3201 /*
3202 Set the remaining counts.
3203 */
3204 end_sort_p= sort_counts + 256;
3205 while (cur_sort_p < end_sort_p)
3206 **(cur_sort_p++)= 1;
3207 }
3208 DBUG_VOID_RETURN;
3209}
3210
3211
3212/*
3213 Compare two counts for reverse sorting.
3214
3215 SYNOPSIS
3216 fakecmp()
3217 count1 One count.
3218 count2 Another count.
3219
3220 RETURN
3221 1 count1 < count2
3222 0 count1 == count2
3223 -1 count1 > count2
3224*/
3225
3226static int fakecmp(my_off_t **count1, my_off_t **count2)
3227{
3228 return ((**count1 < **count2) ? 1 :
3229 (**count1 > **count2) ? -1 : 0);
3230}
3231#endif
3232
3233#include "mi_extrafunc.h"
3234