1/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
15
16/* Pack MARIA file */
17
18#ifndef USE_MY_FUNC
19#define USE_MY_FUNC /* We need at least my_malloc */
20#endif
21
22#include "maria_def.h"
23#include <queues.h>
24#include <my_tree.h>
25#include "mysys_err.h"
26#ifdef MSDOS
27#include <io.h>
28#endif
29#ifndef __GNU_LIBRARY__
30#define __GNU_LIBRARY__ /* Skip warnings in getopt.h */
31#endif
32#include <my_getopt.h>
33#include <assert.h>
34
35#if SIZEOF_LONG_LONG > 4
36#define BITS_SAVED 64
37#else
38#define BITS_SAVED 32
39#endif
40
41#define IS_OFFSET ((uint) 32768) /* Bit if offset or char in tree */
42#define HEAD_LENGTH 32
43#define ALLOWED_JOIN_DIFF 256 /* Diff allowed to join trees */
44
45#define DATA_TMP_EXT ".TMD"
46#define OLD_EXT ".OLD"
47#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE
48
49struct st_file_buffer {
50 File file;
51 uchar *buffer,*pos,*end;
52 my_off_t pos_in_file;
53 int bits;
54 ulonglong bitbucket;
55};
56
57struct st_huff_tree;
58struct st_huff_element;
59
60typedef struct st_huff_counts {
61 uint field_length,max_zero_fill;
62 uint pack_type;
63 uint max_end_space,max_pre_space,length_bits,min_space;
64 ulong max_length;
65 enum en_fieldtype field_type;
66 struct st_huff_tree *tree; /* Tree for field */
67 my_off_t counts[256];
68 my_off_t end_space[8];
69 my_off_t pre_space[8];
70 my_off_t tot_end_space,tot_pre_space,zero_fields,empty_fields,bytes_packed;
71 TREE int_tree; /* Tree for detecting distinct column values. */
72 uchar *tree_buff; /* Column values, 'field_length' each. */
73 uchar *tree_pos; /* Points to end of column values in 'tree_buff'. */
74} HUFF_COUNTS;
75
76typedef struct st_huff_element HUFF_ELEMENT;
77
78/*
79 WARNING: It is crucial for the optimizations in calc_packed_length()
80 that 'count' is the first element of 'HUFF_ELEMENT'.
81*/
82struct st_huff_element {
83 my_off_t count;
84 union un_element {
85 struct st_nod {
86 HUFF_ELEMENT *left,*right;
87 } nod;
88 struct st_leaf {
89 HUFF_ELEMENT *null;
90 uint element_nr; /* Number of element */
91 } leaf;
92 } a;
93};
94
95
96typedef struct st_huff_tree {
97 HUFF_ELEMENT *root,*element_buffer;
98 HUFF_COUNTS *counts;
99 uint tree_number;
100 uint elements;
101 my_off_t bytes_packed;
102 uint tree_pack_length;
103 uint min_chr,max_chr,char_bits,offset_bits,max_offset,height;
104 ulonglong *code;
105 uchar *code_len;
106} HUFF_TREE;
107
108
109typedef struct st_isam_mrg {
110 MARIA_HA **file,**current,**end;
111 uint free_file;
112 uint count;
113 uint min_pack_length; /* Theese is used by packed data */
114 uint max_pack_length;
115 uint ref_length;
116 uint max_blob_length;
117 my_off_t records;
118 /* true if at least one source file has at least one disabled index */
119 my_bool src_file_has_indexes_disabled;
120} PACK_MRG_INFO;
121
122
123extern int main(int argc,char * *argv);
124static void get_options(int *argc,char ***argv);
125static MARIA_HA *open_maria_file(char *name,int mode);
126static my_bool open_maria_files(PACK_MRG_INFO *mrg,char **names,uint count);
127static int compress(PACK_MRG_INFO *file,char *join_name);
128static HUFF_COUNTS *init_huff_count(MARIA_HA *info,my_off_t records);
129static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees,
130 uint trees,
131 HUFF_COUNTS *huff_counts,
132 uint fields);
133static int compare_tree(void* cmp_arg __attribute__((unused)),
134 const uchar *s,const uchar *t);
135static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts);
136static void check_counts(HUFF_COUNTS *huff_counts,uint trees,
137 my_off_t records);
138static int test_space_compress(HUFF_COUNTS *huff_counts,my_off_t records,
139 uint max_space_length,my_off_t *space_counts,
140 my_off_t tot_space_count,
141 enum en_fieldtype field_type);
142static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts,uint trees);
143static int make_huff_tree(HUFF_TREE *tree,HUFF_COUNTS *huff_counts);
144static int compare_huff_elements(void *not_used, uchar *a,uchar *b);
145static int save_counts_in_queue(uchar *key,element_count count,
146 HUFF_TREE *tree);
147static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,uint flag);
148static uint join_same_trees(HUFF_COUNTS *huff_counts,uint trees);
149static int make_huff_decode_table(HUFF_TREE *huff_tree,uint trees);
150static void make_traverse_code_tree(HUFF_TREE *huff_tree,
151 HUFF_ELEMENT *element,uint size,
152 ulonglong code);
153static int write_header(PACK_MRG_INFO *isam_file, uint header_length,uint trees,
154 my_off_t tot_elements,my_off_t filelength);
155static void write_field_info(HUFF_COUNTS *counts, uint fields,uint trees);
156static my_off_t write_huff_tree(HUFF_TREE *huff_tree,uint trees);
157static uint *make_offset_code_tree(HUFF_TREE *huff_tree,
158 HUFF_ELEMENT *element,
159 uint *offset);
160static uint max_bit(uint value);
161static int compress_maria_file(PACK_MRG_INFO *file,HUFF_COUNTS *huff_counts);
162static char *make_new_name(char *new_name,char *old_name);
163static char *make_old_name(char *new_name,char *old_name);
164static void init_file_buffer(File file,pbool read_buffer);
165static int flush_buffer(ulong neaded_length);
166static void end_file_buffer(void);
167static void write_bits(ulonglong value, uint bits);
168static void flush_bits(void);
169static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,
170 my_off_t new_length, ha_checksum crc);
171static int save_state_mrg(File file,PACK_MRG_INFO *isam_file,
172 my_off_t new_length, ha_checksum crc);
173static int mrg_close(PACK_MRG_INFO *mrg);
174static int mrg_rrnd(PACK_MRG_INFO *info,uchar *buf);
175static void mrg_reset(PACK_MRG_INFO *mrg);
176#if !defined(DBUG_OFF)
177static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count);
178static int fakecmp(my_off_t **count1, my_off_t **count2);
179#endif
180
181
182static int error_on_write=0,test_only=0,verbose=0,silent=0,
183 write_loop=0,force_pack=0, isamchk_neaded=0;
184static int tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL;
185static my_bool backup, opt_wait;
186/*
187 tree_buff_length is somewhat arbitrary. The bigger it is the better
188 the chance to win in terms of compression factor. On the other hand,
189 this table becomes part of the compressed file header. And its length
190 is coded with 16 bits in the header. Hence the limit is 2**16 - 1.
191*/
192static uint tree_buff_length= 65536 - MALLOC_OVERHEAD;
193static char tmp_dir[FN_REFLEN]={0},*join_table;
194static my_off_t intervall_length;
195static ha_checksum glob_crc;
196static struct st_file_buffer file_buffer;
197static QUEUE queue;
198static HUFF_COUNTS *global_count;
199static char zero_string[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
200static const char *load_default_groups[]= { "ariapack",0 };
201
202 /* The main program */
203
204int main(int argc, char **argv)
205{
206 int error,ok;
207 PACK_MRG_INFO merge;
208 char **default_argv;
209 MY_INIT(argv[0]);
210
211 load_defaults_or_exit("my", load_default_groups, &argc, &argv);
212 default_argv= argv;
213 get_options(&argc,&argv);
214 maria_init();
215
216 error=ok=isamchk_neaded=0;
217 if (join_table)
218 { /* Join files into one */
219 if (open_maria_files(&merge,argv,(uint) argc) ||
220 compress(&merge,join_table))
221 error=1;
222 }
223 else while (argc--)
224 {
225 MARIA_HA *isam_file;
226 if (!(isam_file=open_maria_file(*argv++,O_RDWR)))
227 error=1;
228 else
229 {
230 merge.file= &isam_file;
231 merge.current=0;
232 merge.free_file=0;
233 merge.count=1;
234 if (compress(&merge,0))
235 error=1;
236 else
237 ok=1;
238 }
239 }
240 if (ok && isamchk_neaded && !silent)
241 puts("Remember to run aria_chk -rq on compressed tables");
242 fflush(stdout);
243 fflush(stderr);
244 free_defaults(default_argv);
245 maria_end();
246 my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
247 exit(error ? 2 : 0);
248#ifndef _lint
249 return 0; /* No compiler warning */
250#endif
251}
252
253enum options_mp {OPT_CHARSETS_DIR_MP=256, OPT_AUTO_CLOSE};
254
255static struct my_option my_long_options[] =
256{
257#ifdef __NETWARE__
258 {"autoclose", OPT_AUTO_CLOSE, "Auto close the screen on exit for Netware.",
259 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
260#endif
261 {"backup", 'b', "Make a backup of the table as table_name.OLD.",
262 &backup, &backup, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
263 {"character-sets-dir", OPT_CHARSETS_DIR_MP,
264 "Directory where character sets are.", (char**) &charsets_dir,
265 (char**) &charsets_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
266 {"debug", '#', "Output debug log. Often this is 'd:t:o,filename'.",
267 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
268 {"force", 'f',
269 "Force packing of table even if it gets bigger or if tempfile exists.",
270 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
271 {"join", 'j',
272 "Join all given tables into 'new_table_name'. All tables MUST have identical layouts.",
273 &join_table, &join_table, 0, GET_STR, REQUIRED_ARG, 0, 0, 0,
274 0, 0, 0},
275 {"help", '?', "Display this help and exit.",
276 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
277 {"silent", 's', "Be more silent.",
278 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
279 {"tmpdir", 'T', "Use temporary directory to store temporary table.",
280 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
281 {"test", 't', "Don't pack table, only test packing it.",
282 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
283 {"verbose", 'v', "Write info about progress and packing result. Use many -v for more verbosity!",
284 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
285 {"version", 'V', "Output version information and exit.",
286 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
287 {"wait", 'w', "Wait and retry if table is in use.", &opt_wait,
288 &opt_wait, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
289 { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
290};
291
292
293static void print_version(void)
294{
295 printf("%s Ver 1.0 for %s on %s\n", my_progname, SYSTEM_TYPE, MACHINE_TYPE);
296}
297
298
299static void usage(void)
300{
301 print_version();
302 puts("Copyright 2002-2008 MySQL AB, 2008-2009 Sun Microsystems, Inc.");
303 puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,");
304 puts("and you are welcome to modify and redistribute it under the GPL license\n");
305
306 puts("Pack a Aria-table to take much less space.");
307 puts("Keys are not updated, you must run aria_chk -rq on the index (.MAI) file");
308 puts("afterwards to update the keys.");
309 puts("You should give the .MAI file as the filename argument.");
310 puts("To unpack a packed table, run aria_chk -u on the table");
311
312 printf("\nUsage: %s [OPTIONS] filename...\n", my_progname);
313 my_print_help(my_long_options);
314 print_defaults("my", load_default_groups);
315 my_print_variables(my_long_options);
316}
317
318
319static my_bool
320get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
321 char *argument)
322{
323 uint length;
324
325 switch(optid) {
326#ifdef __NETWARE__
327 case OPT_AUTO_CLOSE:
328 setscreenmode(SCR_AUTOCLOSE_ON_EXIT);
329 break;
330#endif
331 case 'f':
332 force_pack= 1;
333 tmpfile_createflag= O_RDWR | O_TRUNC;
334 break;
335 case 's':
336 write_loop= verbose= 0;
337 silent= 1;
338 break;
339 case 't':
340 test_only= 1;
341 /* Avoid to reset 'verbose' if it was already set > 1. */
342 if (! verbose)
343 verbose= 1;
344 break;
345 case 'T':
346 length= (uint) (strmov(tmp_dir, argument) - tmp_dir);
347 if (length != dirname_length(tmp_dir))
348 {
349 tmp_dir[length]=FN_LIBCHAR;
350 tmp_dir[length+1]=0;
351 }
352 break;
353 case 'v':
354 verbose++; /* Allow for selecting the level of verbosity. */
355 silent= 0;
356 break;
357 case '#':
358 DBUG_PUSH(argument ? argument : "d:t:o,/tmp/aria_pack.trace");
359 break;
360 case 'V':
361 print_version();
362 exit(0);
363 case 'I':
364 case '?':
365 usage();
366 exit(0);
367 }
368 return 0;
369}
370
371 /* reads options */
372 /* Initiates DEBUG - but no debugging here ! */
373
374static void get_options(int *argc,char ***argv)
375{
376 int ho_error;
377
378 my_progname= argv[0][0];
379 if (isatty(fileno(stdout)))
380 write_loop=1;
381
382 if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option)))
383 exit(ho_error);
384
385 if (!*argc)
386 {
387 usage();
388 exit(1);
389 }
390 if (join_table)
391 {
392 backup=0; /* Not needed */
393 tmp_dir[0]=0;
394 }
395 return;
396}
397
398
399static MARIA_HA *open_maria_file(char *name,int mode)
400{
401 MARIA_HA *isam_file;
402 MARIA_SHARE *share;
403 DBUG_ENTER("open_maria_file");
404
405 if (!(isam_file=maria_open(name, mode, HA_OPEN_IGNORE_MOVED_STATE |
406 (opt_wait ? HA_OPEN_WAIT_IF_LOCKED :
407 HA_OPEN_ABORT_IF_LOCKED))))
408 {
409 fprintf(stderr, "%s gave error %d on open\n", name, my_errno);
410 DBUG_RETURN(0);
411 }
412 share=isam_file->s;
413 if (share->options & HA_OPTION_COMPRESS_RECORD && !join_table)
414 {
415 if (!force_pack)
416 {
417 fprintf(stderr, "%s is already compressed\n", name);
418 maria_close(isam_file);
419 DBUG_RETURN(0);
420 }
421 if (verbose)
422 puts("Recompressing already compressed table");
423 share->options&= ~HA_OPTION_READ_ONLY_DATA; /* We are modifing it */
424 }
425 if (! force_pack && share->state.state.records != 0 &&
426 (share->state.state.records <= 1 ||
427 share->state.state.data_file_length < 1024))
428 {
429 fprintf(stderr, "%s is too small to compress\n", name);
430 maria_close(isam_file);
431 DBUG_RETURN(0);
432 }
433 maria_lock_database(isam_file,F_WRLCK);
434 maria_ignore_trids(isam_file);
435 DBUG_RETURN(isam_file);
436}
437
438
439static my_bool open_maria_files(PACK_MRG_INFO *mrg,char **names,uint count)
440{
441 uint i,j;
442 mrg->count=0;
443 mrg->current=0;
444 mrg->file=(MARIA_HA**) my_malloc(sizeof(MARIA_HA*)*count,MYF(MY_FAE));
445 mrg->free_file=1;
446 mrg->src_file_has_indexes_disabled= 0;
447 for (i=0; i < count ; i++)
448 {
449 if (!(mrg->file[i]=open_maria_file(names[i],O_RDONLY)))
450 goto error;
451
452 mrg->src_file_has_indexes_disabled|=
453 ! maria_is_all_keys_active(mrg->file[i]->s->state.key_map,
454 mrg->file[i]->s->base.keys);
455 }
456 /* Check that files are identical */
457 for (j=0 ; j < count-1 ; j++)
458 {
459 MARIA_COLUMNDEF *m1,*m2,*end;
460 if (mrg->file[j]->s->base.reclength != mrg->file[j+1]->s->base.reclength ||
461 mrg->file[j]->s->base.fields != mrg->file[j+1]->s->base.fields)
462 goto diff_file;
463 m1=mrg->file[j]->s->columndef;
464 end=m1+mrg->file[j]->s->base.fields;
465 m2=mrg->file[j+1]->s->columndef;
466 for ( ; m1 != end ; m1++,m2++)
467 {
468 if (m1->type != m2->type || m1->length != m2->length)
469 goto diff_file;
470 }
471 }
472 mrg->count=count;
473 return 0;
474
475 diff_file:
476 fprintf(stderr, "%s: Tables '%s' and '%s' are not identical\n",
477 my_progname, names[j], names[j+1]);
478 error:
479 while (i--)
480 maria_close(mrg->file[i]);
481 my_free(mrg->file);
482 return 1;
483}
484
485
486static int compress(PACK_MRG_INFO *mrg,char *result_table)
487{
488 int error;
489 File new_file,join_maria_file;
490 MARIA_HA *isam_file;
491 MARIA_SHARE *share;
492 char org_name[FN_REFLEN],new_name[FN_REFLEN],temp_name[FN_REFLEN];
493 uint i,header_length,fields,trees,used_trees;
494 my_off_t old_length,new_length,tot_elements;
495 HUFF_COUNTS *huff_counts;
496 HUFF_TREE *huff_trees;
497 DBUG_ENTER("compress");
498
499 isam_file=mrg->file[0]; /* Take this as an example */
500 share=isam_file->s;
501 new_file=join_maria_file= -1;
502 trees=fields=0;
503 huff_trees=0;
504 huff_counts=0;
505 maria_block_size= isam_file->s->block_size;
506
507 /* Create temporary or join file */
508 if (backup)
509 fn_format(org_name,isam_file->s->open_file_name.str, "",MARIA_NAME_DEXT, 2);
510 else
511 fn_format(org_name,isam_file->s->open_file_name.str, "",MARIA_NAME_DEXT, 2+4+16);
512
513 if (init_pagecache(maria_pagecache, MARIA_MIN_PAGE_CACHE_SIZE, 0, 0,
514 maria_block_size, 0, MY_WME) == 0)
515 {
516 fprintf(stderr, "Can't initialize page cache\n");
517 goto err;
518 }
519
520 if (!test_only && result_table)
521 {
522 /* Make a new indexfile based on first file in list */
523 uint length;
524 uchar *buff;
525 strmov(org_name,result_table); /* Fix error messages */
526 fn_format(new_name,result_table,"",MARIA_NAME_IEXT,2);
527 if ((join_maria_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME)))
528 < 0)
529 goto err;
530 length=(uint) share->base.keystart;
531 if (!(buff= (uchar*) my_malloc(length,MYF(MY_WME))))
532 goto err;
533 if (my_pread(share->kfile.file, buff, length, 0L, MYF(MY_WME | MY_NABP)) ||
534 my_write(join_maria_file,buff,length,
535 MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
536 {
537 my_free(buff);
538 goto err;
539 }
540 my_free(buff);
541 fn_format(new_name,result_table,"",MARIA_NAME_DEXT,2);
542 }
543 else if (!tmp_dir[0])
544 make_new_name(new_name,org_name);
545 else
546 fn_format(new_name,org_name,tmp_dir,DATA_TMP_EXT,1+2+4);
547 if (!test_only &&
548 (new_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME))) < 0)
549 goto err;
550
551 /* Start calculating statistics */
552
553 mrg->records=0;
554 for (i=0 ; i < mrg->count ; i++)
555 mrg->records+=mrg->file[i]->s->state.state.records;
556
557 DBUG_PRINT("info", ("Compressing %s: (%lu records)",
558 result_table ? new_name : org_name,
559 (ulong) mrg->records));
560 if (write_loop || verbose)
561 {
562 printf("Compressing %s: (%lu records)\n",
563 result_table ? new_name : org_name, (ulong) mrg->records);
564 }
565 trees=fields=share->base.fields;
566 huff_counts=init_huff_count(isam_file,mrg->records);
567
568 /*
569 Read the whole data file(s) for statistics.
570 */
571 DBUG_PRINT("info", ("- Calculating statistics"));
572 if (write_loop || verbose)
573 printf("- Calculating statistics\n");
574 if (get_statistic(mrg,huff_counts))
575 goto err;
576
577 old_length=0;
578 for (i=0; i < mrg->count ; i++)
579 old_length+= (mrg->file[i]->s->state.state.data_file_length -
580 mrg->file[i]->s->state.state.empty);
581
582 /*
583 Create a global priority queue in preparation for making
584 temporary Huffman trees.
585 */
586 if (init_queue(&queue, 256, 0, 0, compare_huff_elements, 0, 0, 0))
587 goto err;
588
589 /*
590 Check each column if we should use pre-space-compress, end-space-
591 compress, empty-field-compress or zero-field-compress.
592 */
593 check_counts(huff_counts,fields,mrg->records);
594
595 /*
596 Build a Huffman tree for each column.
597 */
598 huff_trees=make_huff_trees(huff_counts,trees);
599
600 /*
601 If the packed lengths of combined columns is less then the sum of
602 the non-combined columns, then create common Huffman trees for them.
603 We do this only for uchar compressed columns, not for distinct values
604 compressed columns.
605 */
606 if ((int) (used_trees=join_same_trees(huff_counts,trees)) < 0)
607 goto err;
608
609 /*
610 Assign codes to all uchar or column values.
611 */
612 if (make_huff_decode_table(huff_trees,fields))
613 goto err;
614
615 /* Prepare a file buffer. */
616 init_file_buffer(new_file,0);
617
618 /*
619 Reserve space in the target file for the fixed compressed file header.
620 */
621 file_buffer.pos_in_file=HEAD_LENGTH;
622 if (! test_only)
623 my_seek(new_file,file_buffer.pos_in_file,MY_SEEK_SET,MYF(0));
624
625 /*
626 Write field infos: field type, pack type, length bits, tree number.
627 */
628 write_field_info(huff_counts,fields,used_trees);
629
630 /*
631 Write decode trees.
632 */
633 if (!(tot_elements=write_huff_tree(huff_trees,trees)))
634 goto err;
635
636 /*
637 Calculate the total length of the compression info header.
638 This includes the fixed compressed file header, the column compression
639 type descriptions, and the decode trees.
640 */
641 header_length=(uint) file_buffer.pos_in_file+
642 (uint) (file_buffer.pos-file_buffer.buffer);
643
644 /*
645 Compress the source file into the target file.
646 */
647 DBUG_PRINT("info", ("- Compressing file"));
648 if (write_loop || verbose)
649 printf("- Compressing file\n");
650 error=compress_maria_file(mrg,huff_counts);
651 new_length=file_buffer.pos_in_file;
652 if (!error && !test_only)
653 {
654 uchar buff[MEMMAP_EXTRA_MARGIN]; /* End marginal for memmap */
655 bzero(buff,sizeof(buff));
656 error=my_write(file_buffer.file,buff,sizeof(buff),
657 MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0;
658 }
659
660 /*
661 Write the fixed compressed file header.
662 */
663 if (!error)
664 error=write_header(mrg,header_length,used_trees,tot_elements,
665 new_length);
666
667 /* Flush the file buffer. */
668 end_file_buffer();
669
670 /* Display statistics. */
671 DBUG_PRINT("info", ("Min record length: %6d Max length: %6d "
672 "Mean total length: %6ld",
673 mrg->min_pack_length, mrg->max_pack_length,
674 (ulong) (mrg->records ? (new_length/mrg->records) : 0)));
675 if (verbose && mrg->records)
676 printf("Min record length: %6d Max length: %6d "
677 "Mean total length: %6ld\n", mrg->min_pack_length,
678 mrg->max_pack_length, (ulong) (new_length/mrg->records));
679
680 /* Close source and target file. */
681 if (!test_only)
682 {
683 error|=my_close(new_file,MYF(MY_WME));
684 if (!result_table)
685 {
686 (void) flush_pagecache_blocks(isam_file->s->pagecache, &isam_file->dfile,
687 FLUSH_RELEASE);
688 error|=my_close(isam_file->dfile.file, MYF(MY_WME));
689 isam_file->dfile.file= -1; /* Tell maria_close file is closed */
690 isam_file->s->bitmap.file.file= -1;
691 }
692 }
693
694 /* Cleanup. */
695 free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields);
696 if (! test_only && ! error)
697 {
698 if (result_table)
699 {
700 error=save_state_mrg(join_maria_file,mrg,new_length,glob_crc);
701 }
702 else
703 {
704 if (backup)
705 {
706 if (my_rename(org_name,make_old_name(temp_name,
707 isam_file->s->open_file_name.str),
708 MYF(MY_WME)))
709 error=1;
710 else
711 {
712 if (tmp_dir[0])
713 error=my_copy(new_name,org_name,MYF(MY_WME));
714 else
715 error=my_rename(new_name,org_name,MYF(MY_WME));
716 if (!error)
717 {
718 my_copystat(temp_name,org_name,MYF(MY_COPYTIME));
719 if (tmp_dir[0])
720 my_delete(new_name,MYF(MY_WME));
721 }
722 }
723 }
724 else
725 {
726 if (tmp_dir[0])
727 {
728 error=my_copy(new_name,org_name,
729 MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_COPYTIME));
730 if (!error)
731 my_delete(new_name,MYF(MY_WME));
732 }
733 else
734 error=my_redel(org_name, new_name, 0, MYF(MY_WME | MY_COPYTIME));
735 }
736 if (! error)
737 error=save_state(isam_file,mrg,new_length,glob_crc);
738 }
739 }
740 error|=mrg_close(mrg);
741 if (join_maria_file >= 0)
742 error|=my_close(join_maria_file,MYF(MY_WME));
743 if (error)
744 {
745 fprintf(stderr, "Aborting: %s is not compressed\n", org_name);
746 my_delete(new_name,MYF(MY_WME));
747 DBUG_RETURN(-1);
748 }
749 if (write_loop || verbose)
750 {
751 if (old_length)
752 printf("%.4g%% \n",
753 (((longlong) (old_length - new_length)) * 100.0 /
754 (longlong) old_length));
755 else
756 puts("Empty file saved in compressed format");
757 }
758 DBUG_RETURN(0);
759
760 err:
761 free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields);
762 if (new_file >= 0)
763 my_close(new_file,MYF(0));
764 if (join_maria_file >= 0)
765 my_close(join_maria_file,MYF(0));
766 mrg_close(mrg);
767 end_pagecache(maria_pagecache, 1);
768 fprintf(stderr, "Aborted: %s is not compressed\n", org_name);
769 DBUG_RETURN(-1);
770}
771
772 /* Init a huff_count-struct for each field and init it */
773
774static HUFF_COUNTS *init_huff_count(MARIA_HA *info,my_off_t records)
775{
776 reg2 uint i;
777 reg1 HUFF_COUNTS *count;
778 if ((count = (HUFF_COUNTS*) my_malloc(info->s->base.fields*
779 sizeof(HUFF_COUNTS),
780 MYF(MY_ZEROFILL | MY_WME))))
781 {
782 for (i=0 ; i < info->s->base.fields ; i++)
783 {
784 enum en_fieldtype type;
785 count[i].field_length=info->s->columndef[i].length;
786 type= count[i].field_type= (enum en_fieldtype) info->s->columndef[i].type;
787 if (type == FIELD_INTERVALL ||
788 type == FIELD_CONSTANT ||
789 type == FIELD_ZERO)
790 type = FIELD_NORMAL;
791 if (count[i].field_length <= 8 &&
792 (type == FIELD_NORMAL ||
793 type == FIELD_SKIP_ZERO))
794 count[i].max_zero_fill= count[i].field_length;
795 /*
796 For every column initialize a tree, which is used to detect distinct
797 column values. 'int_tree' works together with 'tree_buff' and
798 'tree_pos'. It's keys are implemented by pointers into 'tree_buff'.
799 This is accomplished by '-1' as the element size.
800 */
801 init_tree(&count[i].int_tree,0,0,-1,(qsort_cmp2) compare_tree, NULL,
802 NULL, MYF(0));
803 if (records && type != FIELD_BLOB && type != FIELD_VARCHAR)
804 count[i].tree_pos=count[i].tree_buff =
805 my_malloc(count[i].field_length > 1 ? tree_buff_length : 2,
806 MYF(MY_WME));
807 }
808 }
809 return count;
810}
811
812
813 /* Free memory used by counts and trees */
814
815static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees, uint trees,
816 HUFF_COUNTS *huff_counts,
817 uint fields)
818{
819 register uint i;
820
821 if (huff_trees)
822 {
823 for (i=0 ; i < trees ; i++)
824 {
825 if (huff_trees[i].element_buffer)
826 my_free(huff_trees[i].element_buffer);
827 if (huff_trees[i].code)
828 my_free(huff_trees[i].code);
829 }
830 my_free(huff_trees);
831 }
832 if (huff_counts)
833 {
834 for (i=0 ; i < fields ; i++)
835 {
836 if (huff_counts[i].tree_buff)
837 {
838 my_free(huff_counts[i].tree_buff);
839 delete_tree(&huff_counts[i].int_tree, 0);
840 }
841 }
842 my_free(huff_counts);
843 }
844 delete_queue(&queue); /* This is safe to free */
845 return;
846}
847
848 /* Read through old file and gather some statistics */
849
850static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts)
851{
852 int error;
853 uint length, null_bytes;
854 ulong reclength,max_blob_length;
855 uchar *record,*pos,*next_pos,*end_pos,*start_pos;
856 ha_rows record_count;
857 HUFF_COUNTS *count,*end_count;
858 TREE_ELEMENT *element;
859 ha_checksum(*calc_checksum)(MARIA_HA *, const uchar *);
860 DBUG_ENTER("get_statistic");
861
862 reclength= mrg->file[0]->s->base.reclength;
863 null_bytes= mrg->file[0]->s->base.null_bytes;
864 record=(uchar*) my_safe_alloca(reclength);
865 end_count=huff_counts+mrg->file[0]->s->base.fields;
866 record_count=0; glob_crc=0;
867 max_blob_length=0;
868
869 /* Check how to calculate checksum */
870 if (mrg->file[0]->s->data_file_type == STATIC_RECORD)
871 calc_checksum= _ma_static_checksum;
872 else
873 calc_checksum= _ma_checksum;
874
875 mrg_reset(mrg);
876 while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE)
877 {
878 ulong tot_blob_length=0;
879 if (! error)
880 {
881 /* glob_crc is a checksum over all bytes of all records. */
882 glob_crc+= (*calc_checksum)(mrg->file[0],record);
883
884 /* Count the incidence of values separately for every column. */
885 for (pos=record + null_bytes, count=huff_counts ;
886 count < end_count ;
887 count++,
888 pos=next_pos)
889 {
890 next_pos=end_pos=(start_pos=pos)+count->field_length;
891
892 /*
893 Put the whole column value in a tree if there is room for it.
894 'int_tree' is used to quickly check for duplicate values.
895 'tree_buff' collects as many distinct column values as
896 possible. If the field length is > 1, it is tree_buff_length,
897 else 2 bytes. Each value is 'field_length' bytes big. If there
898 are more distinct column values than fit into the buffer, we
899 give up with this tree. BLOBs and VARCHARs do not have a
900 tree_buff as it can only be used with fixed length columns.
901 For the special case of field length == 1, we handle only the
902 case that there is only one distinct value in the table(s).
903 Otherwise, we can have a maximum of 256 distinct values. This
904 is then handled by the normal Huffman tree build.
905
906 Another limit for collecting distinct column values is the
907 number of values itself. Since we would need to build a
908 Huffman tree for the values, we are limited by the 'IS_OFFSET'
909 constant. This constant expresses a bit which is used to
910 determine if a tree element holds a final value or an offset
911 to a child element. Hence, all values and offsets need to be
912 smaller than 'IS_OFFSET'. A tree element is implemented with
913 two integer values, one for the left branch and one for the
914 right branch. For the extreme case that the first element
915 points to the last element, the number of integers in the tree
916 must be less or equal to IS_OFFSET. So the number of elements
917 must be less or equal to IS_OFFSET / 2.
918
919 WARNING: At first, we insert a pointer into the record buffer
920 as the key for the tree. If we got a new distinct value, which
921 is really inserted into the tree, instead of being counted
922 only, we will copy the column value from the record buffer to
923 'tree_buff' and adjust the key pointer of the tree accordingly.
924 */
925 if (count->tree_buff)
926 {
927 global_count=count;
928 if (!(element=tree_insert(&count->int_tree,pos, 0,
929 count->int_tree.custom_arg)) ||
930 (element->count == 1 &&
931 (count->tree_buff + tree_buff_length <
932 count->tree_pos + count->field_length)) ||
933 (count->int_tree.elements_in_tree > IS_OFFSET / 2) ||
934 (count->field_length == 1 &&
935 count->int_tree.elements_in_tree > 1))
936 {
937 delete_tree(&count->int_tree, 0);
938 my_free(count->tree_buff);
939 count->tree_buff=0;
940 }
941 else
942 {
943 /*
944 If tree_insert() succeeds, it either creates a new element
945 or increments the counter of an existing element.
946 */
947 if (element->count == 1)
948 {
949 /* Copy the new column value into 'tree_buff'. */
950 memcpy(count->tree_pos,pos,(size_t) count->field_length);
951 /* Adjust the key pointer in the tree. */
952 tree_set_pointer(element,count->tree_pos);
953 /* Point behind the last column value so far. */
954 count->tree_pos+=count->field_length;
955 }
956 }
957 }
958
959 /* Save character counters and space-counts and zero-field-counts */
960 if (count->field_type == FIELD_NORMAL ||
961 count->field_type == FIELD_SKIP_ENDSPACE)
962 {
963 /* Ignore trailing space. */
964 for ( ; end_pos > pos ; end_pos--)
965 if (end_pos[-1] != ' ')
966 break;
967 /* Empty fields are just counted. Go to the next record. */
968 if (end_pos == pos)
969 {
970 count->empty_fields++;
971 count->max_zero_fill=0;
972 continue;
973 }
974 /*
975 Count the total of all trailing spaces and the number of
976 short trailing spaces. Remember the longest trailing space.
977 */
978 length= (uint) (next_pos-end_pos);
979 count->tot_end_space+=length;
980 if (length < 8)
981 count->end_space[length]++;
982 if (count->max_end_space < length)
983 count->max_end_space = length;
984 }
985
986 if (count->field_type == FIELD_NORMAL ||
987 count->field_type == FIELD_SKIP_PRESPACE)
988 {
989 /* Ignore leading space. */
990 for (pos=start_pos; pos < end_pos ; pos++)
991 if (pos[0] != ' ')
992 break;
993 /* Empty fields are just counted. Go to the next record. */
994 if (end_pos == pos)
995 {
996 count->empty_fields++;
997 count->max_zero_fill=0;
998 continue;
999 }
1000 /*
1001 Count the total of all leading spaces and the number of
1002 short leading spaces. Remember the longest leading space.
1003 */
1004 length= (uint) (pos-start_pos);
1005 count->tot_pre_space+=length;
1006 if (length < 8)
1007 count->pre_space[length]++;
1008 if (count->max_pre_space < length)
1009 count->max_pre_space = length;
1010 }
1011
1012 /* Calculate pos, end_pos, and max_length for variable length fields. */
1013 if (count->field_type == FIELD_BLOB)
1014 {
1015 uint field_length=count->field_length -portable_sizeof_char_ptr;
1016 ulong blob_length= _ma_calc_blob_length(field_length, start_pos);
1017 memcpy(&pos, start_pos+field_length,sizeof(char*));
1018 end_pos=pos+blob_length;
1019 tot_blob_length+=blob_length;
1020 set_if_bigger(count->max_length,blob_length);
1021 }
1022 else if (count->field_type == FIELD_VARCHAR)
1023 {
1024 uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1);
1025 length= (pack_length == 1 ? (uint) *(uchar*) start_pos :
1026 uint2korr(start_pos));
1027 pos= start_pos+pack_length;
1028 end_pos= pos+length;
1029 set_if_bigger(count->max_length,length);
1030 }
1031
1032 /* Evaluate 'max_zero_fill' for short fields. */
1033 if (count->field_length <= 8 &&
1034 (count->field_type == FIELD_NORMAL ||
1035 count->field_type == FIELD_SKIP_ZERO))
1036 {
1037 uint i;
1038 /* Zero fields are just counted. Go to the next record. */
1039 if (!memcmp(start_pos, zero_string, count->field_length))
1040 {
1041 count->zero_fields++;
1042 continue;
1043 }
1044 /*
1045 max_zero_fill starts with field_length. It is decreased every
1046 time a shorter "zero trailer" is found. It is set to zero when
1047 an empty field is found (see above). This suggests that the
1048 variable should be called 'min_zero_fill'.
1049 */
1050 for (i =0 ; i < count->max_zero_fill && ! end_pos[-1 - (int) i] ;
1051 i++) ;
1052 if (i < count->max_zero_fill)
1053 count->max_zero_fill=i;
1054 }
1055
1056 /* Ignore zero fields and check fields. */
1057 if (count->field_type == FIELD_ZERO ||
1058 count->field_type == FIELD_CHECK)
1059 continue;
1060
1061 /*
1062 Count the incidence of every uchar value in the
1063 significant field value.
1064 */
1065 for ( ; pos < end_pos ; pos++)
1066 count->counts[(uchar) *pos]++;
1067
1068 /* Step to next field. */
1069 }
1070
1071 if (tot_blob_length > max_blob_length)
1072 max_blob_length=tot_blob_length;
1073 record_count++;
1074 if (write_loop && record_count % WRITE_COUNT == 0)
1075 {
1076 printf("%lu\r", (ulong) record_count);
1077 fflush(stdout);
1078 }
1079 }
1080 else if (error != HA_ERR_RECORD_DELETED)
1081 {
1082 fprintf(stderr, "Got error %d while reading rows\n", error);
1083 break;
1084 }
1085
1086 /* Step to next record. */
1087 }
1088 if (write_loop)
1089 {
1090 printf(" \r");
1091 fflush(stdout);
1092 }
1093
1094 /*
1095 If --debug=d,fakebigcodes is set, fake the counts to get big Huffman
1096 codes.
1097 */
1098 DBUG_EXECUTE_IF("fakebigcodes", fakebigcodes(huff_counts, end_count););
1099
1100 DBUG_PRINT("info", ("Found the following number of incidents "
1101 "of the uchar codes:"));
1102 if (verbose >= 2)
1103 printf("Found the following number of incidents "
1104 "of the uchar codes:\n");
1105 for (count= huff_counts ; count < end_count; count++)
1106 {
1107 uint idx;
1108 my_off_t total_count;
1109 char llbuf[32];
1110
1111 DBUG_PRINT("info", ("column: %3u", (uint) (count - huff_counts + 1)));
1112 if (verbose >= 2)
1113 printf("column: %3u\n", (uint) (count - huff_counts + 1));
1114 if (count->tree_buff)
1115 {
1116 DBUG_PRINT("info", ("number of distinct values: %u",
1117 (uint) ((count->tree_pos - count->tree_buff) /
1118 count->field_length)));
1119 if (verbose >= 2)
1120 printf("number of distinct values: %u\n",
1121 (uint) ((count->tree_pos - count->tree_buff) /
1122 count->field_length));
1123 }
1124 total_count= 0;
1125 for (idx= 0; idx < 256; idx++)
1126 {
1127 if (count->counts[idx])
1128 {
1129 total_count+= count->counts[idx];
1130 DBUG_PRINT("info", ("counts[0x%02x]: %12s", idx,
1131 llstr((longlong) count->counts[idx], llbuf)));
1132 if (verbose >= 2)
1133 printf("counts[0x%02x]: %12s\n", idx,
1134 llstr((longlong) count->counts[idx], llbuf));
1135 }
1136 }
1137 DBUG_PRINT("info", ("total: %12s", llstr((longlong) total_count,
1138 llbuf)));
1139 if ((verbose >= 2) && total_count)
1140 {
1141 printf("total: %12s\n",
1142 llstr((longlong) total_count, llbuf));
1143 }
1144 }
1145
1146 mrg->records=record_count;
1147 mrg->max_blob_length=max_blob_length;
1148 my_safe_afree(record, reclength);
1149 DBUG_RETURN(error != HA_ERR_END_OF_FILE);
1150}
1151
1152static int compare_huff_elements(void *not_used __attribute__((unused)),
1153 uchar *a, uchar *b)
1154{
1155 return *((my_off_t*) a) < *((my_off_t*) b) ? -1 :
1156 (*((my_off_t*) a) == *((my_off_t*) b) ? 0 : 1);
1157}
1158
1159 /* Check each tree if we should use pre-space-compress, end-space-
1160 compress, empty-field-compress or zero-field-compress */
1161
1162static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
1163 my_off_t records)
1164{
1165 uint space_fields,fill_zero_fields,field_count[(int) FIELD_enum_val_count];
1166 my_off_t old_length,new_length,length;
1167 DBUG_ENTER("check_counts");
1168
1169 bzero((uchar*) field_count,sizeof(field_count));
1170 space_fields=fill_zero_fields=0;
1171
1172 for (; trees-- ; huff_counts++)
1173 {
1174 if (huff_counts->field_type == FIELD_BLOB)
1175 {
1176 huff_counts->length_bits=max_bit(huff_counts->max_length);
1177 goto found_pack;
1178 }
1179 else if (huff_counts->field_type == FIELD_VARCHAR)
1180 {
1181 huff_counts->length_bits=max_bit(huff_counts->max_length);
1182 goto found_pack;
1183 }
1184 else if (huff_counts->field_type == FIELD_CHECK)
1185 {
1186 huff_counts->bytes_packed=0;
1187 huff_counts->counts[0]=0;
1188 goto found_pack;
1189 }
1190
1191 huff_counts->field_type=FIELD_NORMAL;
1192 huff_counts->pack_type=0;
1193
1194 /* Check for zero-filled records (in this column), or zero records. */
1195 if (huff_counts->zero_fields || ! records)
1196 {
1197 my_off_t old_space_count;
1198 /*
1199 If there are only zero filled records (in this column),
1200 or no records at all, we are done.
1201 */
1202 if (huff_counts->zero_fields == records)
1203 {
1204 huff_counts->field_type= FIELD_ZERO;
1205 huff_counts->bytes_packed=0;
1206 huff_counts->counts[0]=0;
1207 goto found_pack;
1208 }
1209 /* Remeber the number of significant spaces. */
1210 old_space_count=huff_counts->counts[' '];
1211 /* Add all leading and trailing spaces. */
1212 huff_counts->counts[' ']+= (huff_counts->tot_end_space +
1213 huff_counts->tot_pre_space +
1214 huff_counts->empty_fields *
1215 huff_counts->field_length);
1216 /* Check, what the compressed length of this would be. */
1217 old_length=calc_packed_length(huff_counts,0)+records/8;
1218 /* Get the number of zero bytes. */
1219 length=huff_counts->zero_fields*huff_counts->field_length;
1220 /* Add it to the counts. */
1221 huff_counts->counts[0]+=length;
1222 /* Check, what the compressed length of this would be. */
1223 new_length=calc_packed_length(huff_counts,0);
1224 /* If the compression without the zeroes would be shorter, we are done. */
1225 if (old_length < new_length && huff_counts->field_length > 1)
1226 {
1227 huff_counts->field_type=FIELD_SKIP_ZERO;
1228 huff_counts->counts[0]-=length;
1229 huff_counts->bytes_packed=old_length- records/8;
1230 goto found_pack;
1231 }
1232 /* Remove the insignificant spaces, but keep the zeroes. */
1233 huff_counts->counts[' ']=old_space_count;
1234 }
1235 /* Check, what the compressed length of this column would be. */
1236 huff_counts->bytes_packed=calc_packed_length(huff_counts,0);
1237
1238 /*
1239 If there are enough empty records (in this column),
1240 treating them specially may pay off.
1241 */
1242 if (huff_counts->empty_fields)
1243 {
1244 if (huff_counts->field_length > 2 &&
1245 huff_counts->empty_fields + (records - huff_counts->empty_fields)*
1246 (1+max_bit(MY_MAX(huff_counts->max_pre_space,
1247 huff_counts->max_end_space))) <
1248 records * max_bit(huff_counts->field_length))
1249 {
1250 huff_counts->pack_type |= PACK_TYPE_SPACE_FIELDS;
1251 }
1252 else
1253 {
1254 length=huff_counts->empty_fields*huff_counts->field_length;
1255 if (huff_counts->tot_end_space || ! huff_counts->tot_pre_space)
1256 {
1257 huff_counts->tot_end_space+=length;
1258 huff_counts->max_end_space=huff_counts->field_length;
1259 if (huff_counts->field_length < 8)
1260 huff_counts->end_space[huff_counts->field_length]+=
1261 huff_counts->empty_fields;
1262 }
1263 if (huff_counts->tot_pre_space)
1264 {
1265 huff_counts->tot_pre_space+=length;
1266 huff_counts->max_pre_space=huff_counts->field_length;
1267 if (huff_counts->field_length < 8)
1268 huff_counts->pre_space[huff_counts->field_length]+=
1269 huff_counts->empty_fields;
1270 }
1271 }
1272 }
1273
1274 /*
1275 If there are enough trailing spaces (in this column),
1276 treating them specially may pay off.
1277 */
1278 if (huff_counts->tot_end_space)
1279 {
1280 huff_counts->counts[' ']+=huff_counts->tot_pre_space;
1281 if (test_space_compress(huff_counts,records,huff_counts->max_end_space,
1282 huff_counts->end_space,
1283 huff_counts->tot_end_space,FIELD_SKIP_ENDSPACE))
1284 goto found_pack;
1285 huff_counts->counts[' ']-=huff_counts->tot_pre_space;
1286 }
1287
1288 /*
1289 If there are enough leading spaces (in this column),
1290 treating them specially may pay off.
1291 */
1292 if (huff_counts->tot_pre_space)
1293 {
1294 if (test_space_compress(huff_counts,records,huff_counts->max_pre_space,
1295 huff_counts->pre_space,
1296 huff_counts->tot_pre_space,FIELD_SKIP_PRESPACE))
1297 goto found_pack;
1298 }
1299
1300 found_pack: /* Found field-packing */
1301
1302 /* Test if we can use zero-fill */
1303
1304 if (huff_counts->max_zero_fill &&
1305 (huff_counts->field_type == FIELD_NORMAL ||
1306 huff_counts->field_type == FIELD_SKIP_ZERO))
1307 {
1308 huff_counts->counts[0]-=huff_counts->max_zero_fill*
1309 (huff_counts->field_type == FIELD_SKIP_ZERO ?
1310 records - huff_counts->zero_fields : records);
1311 huff_counts->pack_type|=PACK_TYPE_ZERO_FILL;
1312 huff_counts->bytes_packed=calc_packed_length(huff_counts,0);
1313 }
1314
1315 /* Test if intervall-field is better */
1316
1317 if (huff_counts->tree_buff)
1318 {
1319 HUFF_TREE tree;
1320
1321 DBUG_EXECUTE_IF("forceintervall",
1322 huff_counts->bytes_packed= ~ (my_off_t) 0;);
1323 tree.element_buffer=0;
1324 if (!make_huff_tree(&tree,huff_counts) &&
1325 tree.bytes_packed+tree.tree_pack_length < huff_counts->bytes_packed)
1326 {
1327 if (tree.elements == 1)
1328 huff_counts->field_type=FIELD_CONSTANT;
1329 else
1330 huff_counts->field_type=FIELD_INTERVALL;
1331 huff_counts->pack_type=0;
1332 }
1333 else
1334 {
1335 my_free(huff_counts->tree_buff);
1336 delete_tree(&huff_counts->int_tree, 0);
1337 huff_counts->tree_buff=0;
1338 }
1339 if (tree.element_buffer)
1340 my_free(tree.element_buffer);
1341 }
1342 if (huff_counts->pack_type & PACK_TYPE_SPACE_FIELDS)
1343 space_fields++;
1344 if (huff_counts->pack_type & PACK_TYPE_ZERO_FILL)
1345 fill_zero_fields++;
1346 field_count[huff_counts->field_type]++;
1347 }
1348 DBUG_PRINT("info", ("normal: %3d empty-space: %3d "
1349 "empty-zero: %3d empty-fill: %3d",
1350 field_count[FIELD_NORMAL],space_fields,
1351 field_count[FIELD_SKIP_ZERO],fill_zero_fields));
1352 DBUG_PRINT("info", ("pre-space: %3d end-space: %3d "
1353 "intervall-fields: %3d zero: %3d",
1354 field_count[FIELD_SKIP_PRESPACE],
1355 field_count[FIELD_SKIP_ENDSPACE],
1356 field_count[FIELD_INTERVALL],
1357 field_count[FIELD_ZERO]));
1358 if (verbose)
1359 printf("\nnormal: %3d empty-space: %3d "
1360 "empty-zero: %3d empty-fill: %3d\n"
1361 "pre-space: %3d end-space: %3d "
1362 "intervall-fields: %3d zero: %3d\n",
1363 field_count[FIELD_NORMAL],space_fields,
1364 field_count[FIELD_SKIP_ZERO],fill_zero_fields,
1365 field_count[FIELD_SKIP_PRESPACE],
1366 field_count[FIELD_SKIP_ENDSPACE],
1367 field_count[FIELD_INTERVALL],
1368 field_count[FIELD_ZERO]);
1369 DBUG_VOID_RETURN;
1370}
1371
1372
1373/* Test if we can use space-compression and empty-field-compression */
1374
1375static int
1376test_space_compress(HUFF_COUNTS *huff_counts, my_off_t records,
1377 uint max_space_length, my_off_t *space_counts,
1378 my_off_t tot_space_count, enum en_fieldtype field_type)
1379{
1380 int min_pos;
1381 uint length_bits,i;
1382 my_off_t space_count,min_space_count,min_pack,new_length,skip;
1383
1384 length_bits=max_bit(max_space_length);
1385
1386 /* Default no end_space-packing */
1387 space_count=huff_counts->counts[(uint) ' '];
1388 min_space_count= (huff_counts->counts[(uint) ' ']+= tot_space_count);
1389 min_pack=calc_packed_length(huff_counts,0);
1390 min_pos= -2;
1391 huff_counts->counts[(uint) ' ']=space_count;
1392
1393 /* Test with allways space-count */
1394 new_length=huff_counts->bytes_packed+length_bits*records/8;
1395 if (new_length+1 < min_pack)
1396 {
1397 min_pos= -1;
1398 min_pack=new_length;
1399 min_space_count=space_count;
1400 }
1401 /* Test with length-flag */
1402 for (skip=0L, i=0 ; i < 8 ; i++)
1403 {
1404 if (space_counts[i])
1405 {
1406 if (i)
1407 huff_counts->counts[(uint) ' ']+=space_counts[i];
1408 skip+=huff_counts->pre_space[i];
1409 new_length=calc_packed_length(huff_counts,0)+
1410 (records+(records-skip)*(1+length_bits))/8;
1411 if (new_length < min_pack)
1412 {
1413 min_pos=(int) i;
1414 min_pack=new_length;
1415 min_space_count=huff_counts->counts[(uint) ' '];
1416 }
1417 }
1418 }
1419
1420 huff_counts->counts[(uint) ' ']=min_space_count;
1421 huff_counts->bytes_packed=min_pack;
1422 switch (min_pos) {
1423 case -2:
1424 return(0); /* No space-compress */
1425 case -1: /* Always space-count */
1426 huff_counts->field_type=field_type;
1427 huff_counts->min_space=0;
1428 huff_counts->length_bits=max_bit(max_space_length);
1429 break;
1430 default:
1431 huff_counts->field_type=field_type;
1432 huff_counts->min_space=(uint) min_pos;
1433 huff_counts->pack_type|=PACK_TYPE_SELECTED;
1434 huff_counts->length_bits=max_bit(max_space_length);
1435 break;
1436 }
1437 return(1); /* Using space-compress */
1438}
1439
1440
1441 /* Make a huff_tree of each huff_count */
1442
1443static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts, uint trees)
1444{
1445 uint tree;
1446 HUFF_TREE *huff_tree;
1447 DBUG_ENTER("make_huff_trees");
1448
1449 if (!(huff_tree=(HUFF_TREE*) my_malloc(trees*sizeof(HUFF_TREE),
1450 MYF(MY_WME | MY_ZEROFILL))))
1451 DBUG_RETURN(0);
1452
1453 for (tree=0 ; tree < trees ; tree++)
1454 {
1455 if (make_huff_tree(huff_tree+tree,huff_counts+tree))
1456 {
1457 while (tree--)
1458 my_free(huff_tree[tree].element_buffer);
1459 my_free(huff_tree);
1460 DBUG_RETURN(0);
1461 }
1462 }
1463 DBUG_RETURN(huff_tree);
1464}
1465
1466/*
1467 Build a Huffman tree.
1468
1469 SYNOPSIS
1470 make_huff_tree()
1471 huff_tree The Huffman tree.
1472 huff_counts The counts.
1473
1474 DESCRIPTION
1475 Build a Huffman tree according to huff_counts->counts or
1476 huff_counts->tree_buff. tree_buff, if non-NULL contains up to
1477 tree_buff_length of distinct column values. In that case, whole
1478 values can be Huffman encoded instead of single bytes.
1479
1480 RETURN
1481 0 OK
1482 != 0 Error
1483*/
1484
1485static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts)
1486{
1487 uint i,found,bits_packed,first,last;
1488 my_off_t bytes_packed;
1489 HUFF_ELEMENT *a,*b,*new_huff_el;
1490
1491 first=last=0;
1492 if (huff_counts->tree_buff)
1493 {
1494 /* Calculate the number of distinct values in tree_buff. */
1495 found= (uint) (huff_counts->tree_pos - huff_counts->tree_buff) /
1496 huff_counts->field_length;
1497 first=0; last=found-1;
1498 }
1499 else
1500 {
1501 /* Count the number of uchar codes found in the column. */
1502 for (i=found=0 ; i < 256 ; i++)
1503 {
1504 if (huff_counts->counts[i])
1505 {
1506 if (! found++)
1507 first=i;
1508 last=i;
1509 }
1510 }
1511 if (found < 2)
1512 found=2;
1513 }
1514
1515 /* When using 'tree_buff' we can have more that 256 values. */
1516 if (queue.max_elements < found)
1517 {
1518 delete_queue(&queue);
1519 if (init_queue(&queue,found, 0, 0, compare_huff_elements, 0, 0, 0))
1520 return -1;
1521 }
1522
1523 /* Allocate or reallocate an element buffer for the Huffman tree. */
1524 if (!huff_tree->element_buffer)
1525 {
1526 if (!(huff_tree->element_buffer=
1527 (HUFF_ELEMENT*) my_malloc(found*2*sizeof(HUFF_ELEMENT),MYF(MY_WME))))
1528 return 1;
1529 }
1530 else
1531 {
1532 HUFF_ELEMENT *temp;
1533 if (!(temp=
1534 (HUFF_ELEMENT*) my_realloc((uchar*) huff_tree->element_buffer,
1535 found*2*sizeof(HUFF_ELEMENT),
1536 MYF(MY_WME))))
1537 return 1;
1538 huff_tree->element_buffer=temp;
1539 }
1540
1541 huff_counts->tree=huff_tree;
1542 huff_tree->counts=huff_counts;
1543 huff_tree->min_chr=first;
1544 huff_tree->max_chr=last;
1545 huff_tree->char_bits=max_bit(last-first);
1546 huff_tree->offset_bits=max_bit(found-1)+1;
1547
1548 if (huff_counts->tree_buff)
1549 {
1550 huff_tree->elements=0;
1551 huff_tree->tree_pack_length=(1+15+16+5+5+
1552 (huff_tree->char_bits+1)*found+
1553 (huff_tree->offset_bits+1)*
1554 (found-2)+7)/8 +
1555 (uint) (huff_tree->counts->tree_pos-
1556 huff_tree->counts->tree_buff);
1557 /*
1558 Put a HUFF_ELEMENT into the queue for every distinct column value.
1559
1560 tree_walk() calls save_counts_in_queue() for every element in
1561 'int_tree'. This takes elements from the target trees element
1562 buffer and places references to them into the buffer of the
1563 priority queue. We insert in column value order, but the order is
1564 in fact irrelevant here. We will establish the correct order
1565 later.
1566 */
1567 tree_walk(&huff_counts->int_tree,
1568 (int (*)(void*, element_count,void*)) save_counts_in_queue,
1569 (uchar*) huff_tree, left_root_right);
1570 }
1571 else
1572 {
1573 huff_tree->elements=found;
1574 huff_tree->tree_pack_length=(9+9+5+5+
1575 (huff_tree->char_bits+1)*found+
1576 (huff_tree->offset_bits+1)*
1577 (found-2)+7)/8;
1578 /*
1579 Put a HUFF_ELEMENT into the queue for every uchar code found in the column.
1580
1581 The elements are taken from the target trees element buffer.
1582 Instead of using queue_insert(), we just place references to the
1583 elements into the buffer of the priority queue. We insert in byte
1584 value order, but the order is in fact irrelevant here. We will
1585 establish the correct order later.
1586 */
1587 for (i=first, found=0 ; i <= last ; i++)
1588 {
1589 if (huff_counts->counts[i])
1590 {
1591 new_huff_el=huff_tree->element_buffer+(found++);
1592 new_huff_el->count=huff_counts->counts[i];
1593 new_huff_el->a.leaf.null=0;
1594 new_huff_el->a.leaf.element_nr=i;
1595 queue.root[found]=(uchar*) new_huff_el;
1596 }
1597 }
1598 /*
1599 If there is only a single uchar value in this field in all records,
1600 add a second element with zero incidence. This is required to enter
1601 the loop, which builds the Huffman tree.
1602 */
1603 while (found < 2)
1604 {
1605 new_huff_el=huff_tree->element_buffer+(found++);
1606 new_huff_el->count=0;
1607 new_huff_el->a.leaf.null=0;
1608 if (last)
1609 new_huff_el->a.leaf.element_nr=huff_tree->min_chr=last-1;
1610 else
1611 new_huff_el->a.leaf.element_nr=huff_tree->max_chr=last+1;
1612 queue.root[found]=(uchar*) new_huff_el;
1613 }
1614 }
1615
1616 /* Make a queue from the queue buffer. */
1617 queue.elements=found;
1618
1619 /*
1620 Make a priority queue from the queue. Construct its index so that we
1621 have a partially ordered tree.
1622 */
1623 queue_fix(&queue);
1624
1625 /* The Huffman algorithm. */
1626 bytes_packed=0; bits_packed=0;
1627 for (i=1 ; i < found ; i++)
1628 {
1629 /*
1630 Pop the top element from the queue (the one with the least incidence).
1631 Popping from a priority queue includes a re-ordering of the queue,
1632 to get the next least incidence element to the top.
1633 */
1634 a=(HUFF_ELEMENT*) queue_remove_top(&queue);
1635 /* Copy the next least incidence element */
1636 b=(HUFF_ELEMENT*) queue_top(&queue);
1637 /* Get a new element from the element buffer. */
1638 new_huff_el=huff_tree->element_buffer+found+i;
1639 /* The new element gets the sum of the two least incidence elements. */
1640 new_huff_el->count=a->count+b->count;
1641 /*
1642 The Huffman algorithm assigns another bit to the code for a byte
1643 every time that bytes incidence is combined (directly or indirectly)
1644 to a new element as one of the two least incidence elements.
1645 This means that one more bit per incidence of that uchar is required
1646 in the resulting file. So we add the new combined incidence as the
1647 number of bits by which the result grows.
1648 */
1649 bits_packed+=(uint) (new_huff_el->count & 7);
1650 bytes_packed+=new_huff_el->count/8;
1651 /* The new element points to its children, lesser in left. */
1652 new_huff_el->a.nod.left=a;
1653 new_huff_el->a.nod.right=b;
1654 /*
1655 Replace the copied top element by the new element and re-order the
1656 queue.
1657 */
1658 queue_top(&queue)= (uchar*) new_huff_el;
1659 queue_replace_top(&queue);
1660 }
1661 huff_tree->root=(HUFF_ELEMENT*) queue.root[1];
1662 huff_tree->bytes_packed=bytes_packed+(bits_packed+7)/8;
1663 return 0;
1664}
1665
1666static int compare_tree(void* cmp_arg __attribute__((unused)),
1667 register const uchar *s, register const uchar *t)
1668{
1669 uint length;
1670 for (length=global_count->field_length; length-- ;)
1671 if (*s++ != *t++)
1672 return (int) s[-1] - (int) t[-1];
1673 return 0;
1674}
1675
1676/*
1677 Organize distinct column values and their incidences into a priority queue.
1678
1679 SYNOPSIS
1680 save_counts_in_queue()
1681 key The column value.
1682 count The incidence of this value.
1683 tree The Huffman tree to be built later.
1684
1685 DESCRIPTION
1686 We use the element buffer of the targeted tree. The distinct column
1687 values are organized in a priority queue first. The Huffman
1688 algorithm will later organize the elements into a Huffman tree. For
1689 the time being, we just place references to the elements into the
1690 queue buffer. The buffer will later be organized into a priority
1691 queue.
1692
1693 RETURN
1694 0
1695 */
1696
1697static int save_counts_in_queue(uchar *key, element_count count,
1698 HUFF_TREE *tree)
1699{
1700 HUFF_ELEMENT *new_huff_el;
1701
1702 new_huff_el=tree->element_buffer+(tree->elements++);
1703 new_huff_el->count=count;
1704 new_huff_el->a.leaf.null=0;
1705 new_huff_el->a.leaf.element_nr= (uint) (key- tree->counts->tree_buff) /
1706 tree->counts->field_length;
1707 queue.root[tree->elements]=(uchar*) new_huff_el;
1708 return 0;
1709}
1710
1711
1712/*
1713 Calculate length of file if given counts should be used.
1714
1715 SYNOPSIS
1716 calc_packed_length()
1717 huff_counts The counts for a column of the table(s).
1718 add_tree_lenght If the decode tree length should be added.
1719
1720 DESCRIPTION
1721 We need to follow the Huffman algorithm until we know, how many bits
1722 are required for each uchar code. But we do not need the resulting
1723 Huffman tree. Hence, we can leave out some steps which are essential
1724 in make_huff_tree().
1725
1726 RETURN
1727 Number of bytes required to compress this table column.
1728*/
1729
1730static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,
1731 uint add_tree_lenght)
1732{
1733 uint i,found,bits_packed,first,last;
1734 my_off_t bytes_packed;
1735 HUFF_ELEMENT element_buffer[256];
1736 DBUG_ENTER("calc_packed_length");
1737
1738 /*
1739 WARNING: We use a small hack for efficiency: Instead of placing
1740 references to HUFF_ELEMENTs into the queue, we just insert
1741 references to the counts of the uchar codes which appeared in this
1742 table column. During the Huffman algorithm they are successively
1743 replaced by references to HUFF_ELEMENTs. This works, because
1744 HUFF_ELEMENTs have the incidence count at their beginning.
1745 Regardless, wether the queue array contains references to counts of
1746 type my_off_t or references to HUFF_ELEMENTs which have the count of
1747 type my_off_t at their beginning, it always points to a count of the
1748 same type.
1749
1750 Instead of using queue_insert(), we just copy the references into
1751 the buffer of the priority queue. We insert in uchar value order, but
1752 the order is in fact irrelevant here. We will establish the correct
1753 order later.
1754 */
1755 first=last=0;
1756 for (i=found=0 ; i < 256 ; i++)
1757 {
1758 if (huff_counts->counts[i])
1759 {
1760 if (! found++)
1761 first=i;
1762 last=i;
1763 /* We start with root[1], which is the queues top element. */
1764 queue.root[found]=(uchar*) &huff_counts->counts[i];
1765 }
1766 }
1767 if (!found)
1768 DBUG_RETURN(0); /* Empty tree */
1769 /*
1770 If there is only a single uchar value in this field in all records,
1771 add a second element with zero incidence. This is required to enter
1772 the loop, which follows the Huffman algorithm.
1773 */
1774 if (found < 2)
1775 queue.root[++found]=(uchar*) &huff_counts->counts[last ? 0 : 1];
1776
1777 /* Make a queue from the queue buffer. */
1778 queue.elements=found;
1779
1780 bytes_packed=0; bits_packed=0;
1781 /* Add the length of the coding table, which would become part of the file. */
1782 if (add_tree_lenght)
1783 bytes_packed=(8+9+5+5+(max_bit(last-first)+1)*found+
1784 (max_bit(found-1)+1+1)*(found-2) +7)/8;
1785
1786 /*
1787 Make a priority queue from the queue. Construct its index so that we
1788 have a partially ordered tree.
1789 */
1790 queue_fix(&queue);
1791
1792 /* The Huffman algorithm. */
1793 for (i=0 ; i < found-1 ; i++)
1794 {
1795 my_off_t *a;
1796 my_off_t *b;
1797 HUFF_ELEMENT *new_huff_el;
1798
1799 /*
1800 Pop the top element from the queue (the one with the least
1801 incidence). Popping from a priority queue includes a re-ordering
1802 of the queue, to get the next least incidence element to the top.
1803 */
1804 a= (my_off_t*) queue_remove_top(&queue);
1805 /* Copy the next least incidence element. */
1806 b= (my_off_t*) queue_top(&queue);
1807 /* Create a new element in a local (automatic) buffer. */
1808 new_huff_el= element_buffer + i;
1809 /* The new element gets the sum of the two least incidence elements. */
1810 new_huff_el->count= *a + *b;
1811 /*
1812 The Huffman algorithm assigns another bit to the code for a byte
1813 every time that bytes incidence is combined (directly or indirectly)
1814 to a new element as one of the two least incidence elements.
1815 This means that one more bit per incidence of that uchar is required
1816 in the resulting file. So we add the new combined incidence as the
1817 number of bits by which the result grows.
1818 */
1819 bits_packed+=(uint) (new_huff_el->count & 7);
1820 bytes_packed+=new_huff_el->count/8;
1821 /*
1822 Replace the copied top element by the new element and re-order the
1823 queue. This successively replaces the references to counts by
1824 references to HUFF_ELEMENTs.
1825 */
1826 queue_top(&queue)= (uchar*) new_huff_el;
1827 queue_replace_top(&queue);
1828 }
1829 DBUG_RETURN(bytes_packed+(bits_packed+7)/8);
1830}
1831
1832
1833 /* Remove trees that don't give any compression */
1834
1835static uint join_same_trees(HUFF_COUNTS *huff_counts, uint trees)
1836{
1837 uint k,tree_number;
1838 HUFF_COUNTS count,*i,*j,*last_count;
1839
1840 last_count=huff_counts+trees;
1841 for (tree_number=0, i=huff_counts ; i < last_count ; i++)
1842 {
1843 if (!i->tree->tree_number)
1844 {
1845 i->tree->tree_number= ++tree_number;
1846 if (i->tree_buff)
1847 continue; /* Don't join intervall */
1848 for (j=i+1 ; j < last_count ; j++)
1849 {
1850 if (! j->tree->tree_number && ! j->tree_buff)
1851 {
1852 for (k=0 ; k < 256 ; k++)
1853 count.counts[k]=i->counts[k]+j->counts[k];
1854 if (calc_packed_length(&count,1) <=
1855 i->tree->bytes_packed + j->tree->bytes_packed+
1856 i->tree->tree_pack_length+j->tree->tree_pack_length+
1857 ALLOWED_JOIN_DIFF)
1858 {
1859 memcpy(i->counts,(uchar*) count.counts, sizeof(count.counts[0])*256);
1860 my_free(j->tree->element_buffer);
1861 j->tree->element_buffer=0;
1862 j->tree=i->tree;
1863 bmove((uchar*) i->counts,(uchar*) count.counts,
1864 sizeof(count.counts[0])*256);
1865 if (make_huff_tree(i->tree,i))
1866 return (uint) -1;
1867 }
1868 }
1869 }
1870 }
1871 }
1872 DBUG_PRINT("info", ("Original trees: %d After join: %d",
1873 trees, tree_number));
1874 if (verbose)
1875 printf("Original trees: %d After join: %d\n", trees, tree_number);
1876 return tree_number; /* Return trees left */
1877}
1878
1879
1880/*
1881 Fill in huff_tree encode tables.
1882
1883 SYNOPSIS
1884 make_huff_decode_table()
1885 huff_tree An array of HUFF_TREE which are to be encoded.
1886 trees The number of HUFF_TREE in the array.
1887
1888 RETURN
1889 0 success
1890 != 0 error
1891*/
1892
1893static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees)
1894{
1895 uint elements;
1896 for ( ; trees-- ; huff_tree++)
1897 {
1898 if (huff_tree->tree_number > 0)
1899 {
1900 elements=huff_tree->counts->tree_buff ? huff_tree->elements : 256;
1901 if (!(huff_tree->code =
1902 (ulonglong*) my_malloc(elements*
1903 (sizeof(ulonglong) + sizeof(uchar)),
1904 MYF(MY_WME | MY_ZEROFILL))))
1905 return 1;
1906 huff_tree->code_len=(uchar*) (huff_tree->code+elements);
1907 make_traverse_code_tree(huff_tree, huff_tree->root,
1908 8 * sizeof(ulonglong), 0);
1909 }
1910 }
1911 return 0;
1912}
1913
1914
1915static void make_traverse_code_tree(HUFF_TREE *huff_tree,
1916 HUFF_ELEMENT *element,
1917 uint size, ulonglong code)
1918{
1919 uint chr;
1920 if (!element->a.leaf.null)
1921 {
1922 chr=element->a.leaf.element_nr;
1923 huff_tree->code_len[chr]= (uchar) (8 * sizeof(ulonglong) - size);
1924 huff_tree->code[chr]= (code >> size);
1925 if (huff_tree->height < 8 * sizeof(ulonglong) - size)
1926 huff_tree->height= 8 * sizeof(ulonglong) - size;
1927 }
1928 else
1929 {
1930 size--;
1931 make_traverse_code_tree(huff_tree,element->a.nod.left,size,code);
1932 make_traverse_code_tree(huff_tree, element->a.nod.right, size,
1933 code + (((ulonglong) 1) << size));
1934 }
1935 return;
1936}
1937
1938
1939/*
1940 Convert a value into binary digits.
1941
1942 SYNOPSIS
1943 bindigits()
1944 value The value.
1945 length The number of low order bits to convert.
1946
1947 NOTE
1948 The result string is in static storage. It is reused on every call.
1949 So you cannot use it twice in one expression.
1950
1951 RETURN
1952 A pointer to a static NUL-terminated string.
1953 */
1954
1955static char *bindigits(ulonglong value, uint bits)
1956{
1957 static char digits[72];
1958 char *ptr= digits;
1959 uint idx= bits;
1960
1961 DBUG_ASSERT(idx < sizeof(digits));
1962 while (idx)
1963 *(ptr++)= '0' + ((char) (value >> (--idx)) & (char) 1);
1964 *ptr= '\0';
1965 return digits;
1966}
1967
1968
1969/*
1970 Convert a value into hexadecimal digits.
1971
1972 SYNOPSIS
1973 hexdigits()
1974 value The value.
1975
1976 NOTE
1977 The result string is in static storage. It is reused on every call.
1978 So you cannot use it twice in one expression.
1979
1980 RETURN
1981 A pointer to a static NUL-terminated string.
1982 */
1983
1984static char *hexdigits(ulonglong value)
1985{
1986 static char digits[20];
1987 char *ptr= digits;
1988 uint idx= 2 * sizeof(value); /* Two hex digits per byte. */
1989
1990 DBUG_ASSERT(idx < sizeof(digits));
1991 while (idx)
1992 {
1993 if ((*(ptr++)= '0' + ((char) (value >> (4 * (--idx))) & (char) 0xf)) > '9')
1994 *(ptr - 1)+= 'a' - '9' - 1;
1995 }
1996 *ptr= '\0';
1997 return digits;
1998}
1999
2000
2001 /* Write header to new packed data file */
2002
2003static int write_header(PACK_MRG_INFO *mrg,uint head_length,uint trees,
2004 my_off_t tot_elements,my_off_t filelength)
2005{
2006 uchar *buff= (uchar*) file_buffer.pos;
2007
2008 bzero(buff,HEAD_LENGTH);
2009 memcpy(buff,maria_pack_file_magic,4);
2010 int4store(buff+4,head_length);
2011 int4store(buff+8, mrg->min_pack_length);
2012 int4store(buff+12,mrg->max_pack_length);
2013 int4store(buff+16,tot_elements);
2014 int4store(buff+20,intervall_length);
2015 int2store(buff+24,trees);
2016 buff[26]=(char) mrg->ref_length;
2017 /* Save record pointer length */
2018 buff[27]= (uchar) maria_get_pointer_length((ulonglong) filelength,2);
2019 if (test_only)
2020 return 0;
2021 my_seek(file_buffer.file,0L,MY_SEEK_SET,MYF(0));
2022 return my_write(file_buffer.file,(const uchar *) file_buffer.pos,HEAD_LENGTH,
2023 MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0;
2024}
2025
2026 /* Write fieldinfo to new packed file */
2027
2028static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees)
2029{
2030 reg1 uint i;
2031 uint huff_tree_bits;
2032 huff_tree_bits=max_bit(trees ? trees-1 : 0);
2033
2034 DBUG_PRINT("info", (" "));
2035 DBUG_PRINT("info", ("column types:"));
2036 DBUG_PRINT("info", ("FIELD_NORMAL 0"));
2037 DBUG_PRINT("info", ("FIELD_SKIP_ENDSPACE 1"));
2038 DBUG_PRINT("info", ("FIELD_SKIP_PRESPACE 2"));
2039 DBUG_PRINT("info", ("FIELD_SKIP_ZERO 3"));
2040 DBUG_PRINT("info", ("FIELD_BLOB 4"));
2041 DBUG_PRINT("info", ("FIELD_CONSTANT 5"));
2042 DBUG_PRINT("info", ("FIELD_INTERVALL 6"));
2043 DBUG_PRINT("info", ("FIELD_ZERO 7"));
2044 DBUG_PRINT("info", ("FIELD_VARCHAR 8"));
2045 DBUG_PRINT("info", ("FIELD_CHECK 9"));
2046 DBUG_PRINT("info", (" "));
2047 DBUG_PRINT("info", ("pack type as a set of flags:"));
2048 DBUG_PRINT("info", ("PACK_TYPE_SELECTED 1"));
2049 DBUG_PRINT("info", ("PACK_TYPE_SPACE_FIELDS 2"));
2050 DBUG_PRINT("info", ("PACK_TYPE_ZERO_FILL 4"));
2051 DBUG_PRINT("info", (" "));
2052 if (verbose >= 2)
2053 {
2054 printf("\n");
2055 printf("column types:\n");
2056 printf("FIELD_NORMAL 0\n");
2057 printf("FIELD_SKIP_ENDSPACE 1\n");
2058 printf("FIELD_SKIP_PRESPACE 2\n");
2059 printf("FIELD_SKIP_ZERO 3\n");
2060 printf("FIELD_BLOB 4\n");
2061 printf("FIELD_CONSTANT 5\n");
2062 printf("FIELD_INTERVALL 6\n");
2063 printf("FIELD_ZERO 7\n");
2064 printf("FIELD_VARCHAR 8\n");
2065 printf("FIELD_CHECK 9\n");
2066 printf("\n");
2067 printf("pack type as a set of flags:\n");
2068 printf("PACK_TYPE_SELECTED 1\n");
2069 printf("PACK_TYPE_SPACE_FIELDS 2\n");
2070 printf("PACK_TYPE_ZERO_FILL 4\n");
2071 printf("\n");
2072 }
2073 for (i=0 ; i++ < fields ; counts++)
2074 {
2075 write_bits((ulonglong) (int) counts->field_type, 5);
2076 write_bits(counts->pack_type,6);
2077 if (counts->pack_type & PACK_TYPE_ZERO_FILL)
2078 write_bits(counts->max_zero_fill,5);
2079 else
2080 write_bits(counts->length_bits,5);
2081 write_bits((ulonglong) counts->tree->tree_number - 1, huff_tree_bits);
2082 DBUG_PRINT("info", ("column: %3u type: %2u pack: %2u zero: %4u "
2083 "lbits: %2u tree: %2u length: %4u",
2084 i , counts->field_type, counts->pack_type,
2085 counts->max_zero_fill, counts->length_bits,
2086 counts->tree->tree_number, counts->field_length));
2087 if (verbose >= 2)
2088 printf("column: %3u type: %2u pack: %2u zero: %4u lbits: %2u "
2089 "tree: %2u length: %4u\n", i , counts->field_type,
2090 counts->pack_type, counts->max_zero_fill, counts->length_bits,
2091 counts->tree->tree_number, counts->field_length);
2092 }
2093 flush_bits();
2094 return;
2095}
2096
2097 /* Write all huff_trees to new datafile. Return tot count of
2098 elements in all trees
2099 Returns 0 on error */
2100
2101static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees)
2102{
2103 uint i,int_length;
2104 uint tree_no;
2105 uint codes;
2106 uint errors= 0;
2107 uint *packed_tree,*offset,length;
2108 my_off_t elements;
2109
2110 /* Find the highest number of elements in the trees. */
2111 for (i=length=0 ; i < trees ; i++)
2112 if (huff_tree[i].tree_number > 0 && huff_tree[i].elements > length)
2113 length=huff_tree[i].elements;
2114 /*
2115 Allocate a buffer for packing a decode tree. Two numbers per element
2116 (left child and right child).
2117 */
2118 if (!(packed_tree=(uint*) my_alloca(sizeof(uint)*length*2)))
2119 {
2120 my_error(EE_OUTOFMEMORY,MYF(ME_BELL),sizeof(uint)*length*2);
2121 return 0;
2122 }
2123
2124 DBUG_PRINT("info", (" "));
2125 if (verbose >= 2)
2126 printf("\n");
2127 tree_no= 0;
2128 intervall_length=0;
2129 for (elements=0; trees-- ; huff_tree++)
2130 {
2131 /* Skip columns that have been joined with other columns. */
2132 if (huff_tree->tree_number == 0)
2133 continue; /* Deleted tree */
2134 tree_no++;
2135 DBUG_PRINT("info", (" "));
2136 if (verbose >= 3)
2137 printf("\n");
2138 /* Count the total number of elements (byte codes or column values). */
2139 elements+=huff_tree->elements;
2140 huff_tree->max_offset=2;
2141 /* Build a tree of offsets and codes for decoding in 'packed_tree'. */
2142 if (huff_tree->elements <= 1)
2143 offset=packed_tree;
2144 else
2145 offset=make_offset_code_tree(huff_tree,huff_tree->root,packed_tree);
2146
2147 /* This should be the same as 'length' above. */
2148 huff_tree->offset_bits=max_bit(huff_tree->max_offset);
2149
2150 /*
2151 Since we check this during collecting the distinct column values,
2152 this should never happen.
2153 */
2154 if (huff_tree->max_offset >= IS_OFFSET)
2155 { /* This should be impossible */
2156 fprintf(stderr, "Tree offset got too big: %d, aborted\n",
2157 huff_tree->max_offset);
2158 my_afree(packed_tree);
2159 return 0;
2160 }
2161
2162 DBUG_PRINT("info", ("pos: %lu elements: %u tree-elements: %lu "
2163 "char_bits: %u\n",
2164 (ulong) (file_buffer.pos - file_buffer.buffer),
2165 huff_tree->elements, (ulong) (offset - packed_tree),
2166 huff_tree->char_bits));
2167 if (!huff_tree->counts->tree_buff)
2168 {
2169 /* We do a uchar compression on this column. Mark with bit 0. */
2170 write_bits(0,1);
2171 write_bits(huff_tree->min_chr,8);
2172 write_bits(huff_tree->elements,9);
2173 write_bits(huff_tree->char_bits,5);
2174 write_bits(huff_tree->offset_bits,5);
2175 int_length=0;
2176 }
2177 else
2178 {
2179 int_length=(uint) (huff_tree->counts->tree_pos -
2180 huff_tree->counts->tree_buff);
2181 /* We have distinct column values for this column. Mark with bit 1. */
2182 write_bits(1,1);
2183 write_bits(huff_tree->elements,15);
2184 write_bits(int_length,16);
2185 write_bits(huff_tree->char_bits,5);
2186 write_bits(huff_tree->offset_bits,5);
2187 intervall_length+=int_length;
2188 }
2189 DBUG_PRINT("info", ("tree: %2u elements: %4u char_bits: %2u "
2190 "offset_bits: %2u %s: %5u codelen: %2u",
2191 tree_no, huff_tree->elements, huff_tree->char_bits,
2192 huff_tree->offset_bits, huff_tree->counts->tree_buff ?
2193 "bufflen" : "min_chr", huff_tree->counts->tree_buff ?
2194 int_length : huff_tree->min_chr, huff_tree->height));
2195 if (verbose >= 2)
2196 printf("tree: %2u elements: %4u char_bits: %2u offset_bits: %2u "
2197 "%s: %5u codelen: %2u\n", tree_no, huff_tree->elements,
2198 huff_tree->char_bits, huff_tree->offset_bits,
2199 huff_tree->counts->tree_buff ? "bufflen" : "min_chr",
2200 huff_tree->counts->tree_buff ? int_length :
2201 huff_tree->min_chr, huff_tree->height);
2202
2203 /* Check that the code tree length matches the element count. */
2204 length=(uint) (offset-packed_tree);
2205 if (length != huff_tree->elements*2-2)
2206 {
2207 fprintf(stderr, "error: Huff-tree-length: %d != calc_length: %d\n",
2208 length, huff_tree->elements * 2 - 2);
2209 errors++;
2210 break;
2211 }
2212
2213 for (i=0 ; i < length ; i++)
2214 {
2215 if (packed_tree[i] & IS_OFFSET)
2216 write_bits(packed_tree[i] - IS_OFFSET+ (1 << huff_tree->offset_bits),
2217 huff_tree->offset_bits+1);
2218 else
2219 write_bits(packed_tree[i]-huff_tree->min_chr,huff_tree->char_bits+1);
2220 DBUG_PRINT("info", ("tree[0x%04x]: %s0x%04x",
2221 i, (packed_tree[i] & IS_OFFSET) ?
2222 " -> " : "", (packed_tree[i] & IS_OFFSET) ?
2223 packed_tree[i] - IS_OFFSET + i : packed_tree[i]));
2224 if (verbose >= 3)
2225 printf("tree[0x%04x]: %s0x%04x\n",
2226 i, (packed_tree[i] & IS_OFFSET) ? " -> " : "",
2227 (packed_tree[i] & IS_OFFSET) ?
2228 packed_tree[i] - IS_OFFSET + i : packed_tree[i]);
2229 }
2230 flush_bits();
2231
2232 /*
2233 Display coding tables and check their correctness.
2234 */
2235 codes= huff_tree->counts->tree_buff ? huff_tree->elements : 256;
2236 for (i= 0; i < codes; i++)
2237 {
2238 ulonglong code;
2239 uint bits;
2240 uint len;
2241 uint idx;
2242
2243 if (! (len= huff_tree->code_len[i]))
2244 continue;
2245 DBUG_PRINT("info", ("code[0x%04x]: 0x%s bits: %2u bin: %s", i,
2246 hexdigits(huff_tree->code[i]), huff_tree->code_len[i],
2247 bindigits(huff_tree->code[i],
2248 huff_tree->code_len[i])));
2249 if (verbose >= 3)
2250 printf("code[0x%04x]: 0x%s bits: %2u bin: %s\n", i,
2251 hexdigits(huff_tree->code[i]), huff_tree->code_len[i],
2252 bindigits(huff_tree->code[i], huff_tree->code_len[i]));
2253
2254 /* Check that the encode table decodes correctly. */
2255 code= 0;
2256 bits= 0;
2257 idx= 0;
2258 DBUG_EXECUTE_IF("forcechkerr1", len--;);
2259 DBUG_EXECUTE_IF("forcechkerr2", bits= 8 * sizeof(code););
2260 DBUG_EXECUTE_IF("forcechkerr3", idx= length;);
2261 for (;;)
2262 {
2263 if (! len)
2264 {
2265 fflush(stdout);
2266 fprintf(stderr, "error: code 0x%s with %u bits not found\n",
2267 hexdigits(huff_tree->code[i]), huff_tree->code_len[i]);
2268 errors++;
2269 break;
2270 }
2271 code<<= 1;
2272 code|= (huff_tree->code[i] >> (--len)) & 1;
2273 bits++;
2274 if (bits > 8 * sizeof(code))
2275 {
2276 fflush(stdout);
2277 fprintf(stderr, "error: Huffman code too long: %u/%u\n",
2278 bits, (uint) (8 * sizeof(code)));
2279 errors++;
2280 break;
2281 }
2282 idx+= (uint) code & 1;
2283 if (idx >= length)
2284 {
2285 fflush(stdout);
2286 fprintf(stderr, "error: illegal tree offset: %u/%u\n", idx, length);
2287 errors++;
2288 break;
2289 }
2290 if (packed_tree[idx] & IS_OFFSET)
2291 idx+= packed_tree[idx] & ~IS_OFFSET;
2292 else
2293 break; /* Hit a leaf. This contains the result value. */
2294 }
2295 if (errors)
2296 break;
2297
2298 DBUG_EXECUTE_IF("forcechkerr4", packed_tree[idx]++;);
2299 if (packed_tree[idx] != i)
2300 {
2301 fflush(stdout);
2302 fprintf(stderr, "error: decoded value 0x%04x should be: 0x%04x\n",
2303 packed_tree[idx], i);
2304 errors++;
2305 break;
2306 }
2307 } /*end for (codes)*/
2308 if (errors)
2309 break;
2310
2311 /* Write column values in case of distinct column value compression. */
2312 if (huff_tree->counts->tree_buff)
2313 {
2314 for (i=0 ; i < int_length ; i++)
2315 {
2316 write_bits((ulonglong) (uchar) huff_tree->counts->tree_buff[i], 8);
2317 DBUG_PRINT("info", ("column_values[0x%04x]: 0x%02x",
2318 i, (uchar) huff_tree->counts->tree_buff[i]));
2319 if (verbose >= 3)
2320 printf("column_values[0x%04x]: 0x%02x\n",
2321 i, (uchar) huff_tree->counts->tree_buff[i]);
2322 }
2323 }
2324 flush_bits();
2325 }
2326 DBUG_PRINT("info", (" "));
2327 if (verbose >= 2)
2328 printf("\n");
2329 my_afree(packed_tree);
2330 if (errors)
2331 {
2332 fprintf(stderr, "Error: Generated decode trees are corrupt. Stop.\n");
2333 return 0;
2334 }
2335 return elements;
2336}
2337
2338
2339static uint *make_offset_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element,
2340 uint *offset)
2341{
2342 uint *prev_offset;
2343
2344 prev_offset= offset;
2345 /*
2346 'a.leaf.null' takes the same place as 'a.nod.left'. If this is null,
2347 then there is no left child and, hence no right child either. This
2348 is a property of a binary tree. An element is either a node with two
2349 childs, or a leaf without childs.
2350
2351 The current element is always a node with two childs. Go left first.
2352 */
2353 if (!element->a.nod.left->a.leaf.null)
2354 {
2355 /* Store the uchar code or the index of the column value. */
2356 prev_offset[0] =(uint) element->a.nod.left->a.leaf.element_nr;
2357 offset+=2;
2358 }
2359 else
2360 {
2361 /*
2362 Recursively traverse the tree to the left. Mark it as an offset to
2363 another tree node (in contrast to a uchar code or column value index).
2364 */
2365 prev_offset[0]= IS_OFFSET+2;
2366 offset=make_offset_code_tree(huff_tree,element->a.nod.left,offset+2);
2367 }
2368
2369 /* Now, check the right child. */
2370 if (!element->a.nod.right->a.leaf.null)
2371 {
2372 /* Store the uchar code or the index of the column value. */
2373 prev_offset[1]=element->a.nod.right->a.leaf.element_nr;
2374 return offset;
2375 }
2376 else
2377 {
2378 /*
2379 Recursively traverse the tree to the right. Mark it as an offset to
2380 another tree node (in contrast to a uchar code or column value index).
2381 */
2382 uint temp=(uint) (offset-prev_offset-1);
2383 prev_offset[1]= IS_OFFSET+ temp;
2384 if (huff_tree->max_offset < temp)
2385 huff_tree->max_offset = temp;
2386 return make_offset_code_tree(huff_tree,element->a.nod.right,offset);
2387 }
2388}
2389
2390 /* Get number of bits neaded to represent value */
2391
2392static uint max_bit(register uint value)
2393{
2394 reg2 uint power=1;
2395
2396 while ((value>>=1))
2397 power++;
2398 return (power);
2399}
2400
2401
2402static int compress_maria_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts)
2403{
2404 int error;
2405 uint i,max_calc_length,pack_ref_length,min_record_length,max_record_length;
2406 uint intervall,field_length,max_pack_length,pack_blob_length, null_bytes;
2407 my_off_t record_count;
2408 char llbuf[32];
2409 ulong length,pack_length;
2410 uchar *record,*pos,*end_pos,*record_pos,*start_pos;
2411 HUFF_COUNTS *count,*end_count;
2412 HUFF_TREE *tree;
2413 MARIA_HA *isam_file=mrg->file[0];
2414 uint pack_version= (uint) isam_file->s->pack.version;
2415 DBUG_ENTER("compress_maria_file");
2416
2417 /* Allocate a buffer for the records (excluding blobs). */
2418 if (!(record=(uchar*) my_safe_alloca(isam_file->s->base.reclength)))
2419 return -1;
2420
2421 end_count=huff_counts+isam_file->s->base.fields;
2422 min_record_length= (uint) ~0;
2423 max_record_length=0;
2424 null_bytes= isam_file->s->base.null_bytes;
2425
2426 /*
2427 Calculate the maximum number of bits required to pack the records.
2428 Remember to understand 'max_zero_fill' as 'min_zero_fill'.
2429 The tree height determines the maximum number of bits per value.
2430 Some fields skip leading or trailing spaces or zeroes. The skipped
2431 number of bytes is encoded by 'length_bits' bits.
2432 Empty blobs and varchar are encoded with a single 1 bit. Other blobs
2433 and varchar get a leading 0 bit.
2434 */
2435 max_calc_length= null_bytes;
2436 for (i= 0 ; i < isam_file->s->base.fields ; i++)
2437 {
2438 if (!(huff_counts[i].pack_type & PACK_TYPE_ZERO_FILL))
2439 huff_counts[i].max_zero_fill=0;
2440 if (huff_counts[i].field_type == FIELD_CONSTANT ||
2441 huff_counts[i].field_type == FIELD_ZERO ||
2442 huff_counts[i].field_type == FIELD_CHECK)
2443 continue;
2444 if (huff_counts[i].field_type == FIELD_INTERVALL)
2445 max_calc_length+=huff_counts[i].tree->height;
2446 else if (huff_counts[i].field_type == FIELD_BLOB ||
2447 huff_counts[i].field_type == FIELD_VARCHAR)
2448 max_calc_length+=huff_counts[i].tree->height*huff_counts[i].max_length + huff_counts[i].length_bits +1;
2449 else
2450 max_calc_length+=
2451 (huff_counts[i].field_length - huff_counts[i].max_zero_fill)*
2452 huff_counts[i].tree->height+huff_counts[i].length_bits;
2453 }
2454 max_calc_length= (max_calc_length + 7) / 8;
2455 pack_ref_length= _ma_calc_pack_length(pack_version, max_calc_length);
2456 record_count=0;
2457 /* 'max_blob_length' is the max length of all blobs of a record. */
2458 pack_blob_length= isam_file->s->base.blobs ?
2459 _ma_calc_pack_length(pack_version, mrg->max_blob_length) : 0;
2460 max_pack_length=pack_ref_length+pack_blob_length;
2461
2462 DBUG_PRINT("fields", ("==="));
2463 mrg_reset(mrg);
2464 while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE)
2465 {
2466 ulong tot_blob_length=0;
2467 if (! error)
2468 {
2469 if (flush_buffer((ulong) max_calc_length + (ulong) max_pack_length +
2470 null_bytes))
2471 break;
2472 record_pos= file_buffer.pos;
2473 file_buffer.pos+= max_pack_length;
2474 if (null_bytes)
2475 {
2476 /* Copy null bits 'as is' */
2477 memcpy(file_buffer.pos, record, null_bytes);
2478 file_buffer.pos+= null_bytes;
2479 }
2480 for (start_pos=record+null_bytes, count= huff_counts;
2481 count < end_count ;
2482 count++)
2483 {
2484 end_pos=start_pos+(field_length=count->field_length);
2485 tree=count->tree;
2486
2487 DBUG_PRINT("fields", ("column: %3lu type: %2u pack: %2u zero: %4u "
2488 "lbits: %2u tree: %2u length: %4u",
2489 (ulong) (count - huff_counts + 1),
2490 count->field_type,
2491 count->pack_type, count->max_zero_fill,
2492 count->length_bits, count->tree->tree_number,
2493 count->field_length));
2494
2495 /* Check if the column contains spaces only. */
2496 if (count->pack_type & PACK_TYPE_SPACE_FIELDS)
2497 {
2498 for (pos=start_pos ; *pos == ' ' && pos < end_pos; pos++) ;
2499 if (pos == end_pos)
2500 {
2501 DBUG_PRINT("fields",
2502 ("PACK_TYPE_SPACE_FIELDS spaces only, bits: 1"));
2503 DBUG_PRINT("fields", ("---"));
2504 write_bits(1,1);
2505 start_pos=end_pos;
2506 continue;
2507 }
2508 DBUG_PRINT("fields",
2509 ("PACK_TYPE_SPACE_FIELDS not only spaces, bits: 1"));
2510 write_bits(0,1);
2511 }
2512 end_pos-=count->max_zero_fill;
2513 field_length-=count->max_zero_fill;
2514
2515 switch (count->field_type) {
2516 case FIELD_SKIP_ZERO:
2517 if (!memcmp(start_pos, zero_string, field_length))
2518 {
2519 DBUG_PRINT("fields", ("FIELD_SKIP_ZERO zeroes only, bits: 1"));
2520 write_bits(1,1);
2521 start_pos=end_pos;
2522 break;
2523 }
2524 DBUG_PRINT("fields", ("FIELD_SKIP_ZERO not only zeroes, bits: 1"));
2525 write_bits(0,1);
2526 /* Fall through */
2527 case FIELD_NORMAL:
2528 DBUG_PRINT("fields", ("FIELD_NORMAL %lu bytes",
2529 (ulong) (end_pos - start_pos)));
2530 for ( ; start_pos < end_pos ; start_pos++)
2531 {
2532 DBUG_PRINT("fields",
2533 ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
2534 (uchar) *start_pos,
2535 hexdigits(tree->code[(uchar) *start_pos]),
2536 (uint) tree->code_len[(uchar) *start_pos],
2537 bindigits(tree->code[(uchar) *start_pos],
2538 (uint) tree->code_len[(uchar) *start_pos])));
2539 write_bits(tree->code[(uchar) *start_pos],
2540 (uint) tree->code_len[(uchar) *start_pos]);
2541 }
2542 break;
2543 case FIELD_SKIP_ENDSPACE:
2544 for (pos=end_pos ; pos > start_pos && pos[-1] == ' ' ; pos--) ;
2545 length= (ulong) (end_pos - pos);
2546 if (count->pack_type & PACK_TYPE_SELECTED)
2547 {
2548 if (length > count->min_space)
2549 {
2550 DBUG_PRINT("fields",
2551 ("FIELD_SKIP_ENDSPACE more than min_space, bits: 1"));
2552 DBUG_PRINT("fields",
2553 ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u",
2554 length, field_length, count->length_bits));
2555 write_bits(1,1);
2556 write_bits(length,count->length_bits);
2557 }
2558 else
2559 {
2560 DBUG_PRINT("fields",
2561 ("FIELD_SKIP_ENDSPACE not more than min_space, "
2562 "bits: 1"));
2563 write_bits(0,1);
2564 pos=end_pos;
2565 }
2566 }
2567 else
2568 {
2569 DBUG_PRINT("fields",
2570 ("FIELD_SKIP_ENDSPACE skip %lu/%u bytes, bits: %2u",
2571 length, field_length, count->length_bits));
2572 write_bits(length,count->length_bits);
2573 }
2574 /* Encode all significant bytes. */
2575 DBUG_PRINT("fields", ("FIELD_SKIP_ENDSPACE %lu bytes",
2576 (ulong) (pos - start_pos)));
2577 for ( ; start_pos < pos ; start_pos++)
2578 {
2579 DBUG_PRINT("fields",
2580 ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
2581 (uchar) *start_pos,
2582 hexdigits(tree->code[(uchar) *start_pos]),
2583 (uint) tree->code_len[(uchar) *start_pos],
2584 bindigits(tree->code[(uchar) *start_pos],
2585 (uint) tree->code_len[(uchar) *start_pos])));
2586 write_bits(tree->code[(uchar) *start_pos],
2587 (uint) tree->code_len[(uchar) *start_pos]);
2588 }
2589 start_pos=end_pos;
2590 break;
2591 case FIELD_SKIP_PRESPACE:
2592 for (pos=start_pos ; pos < end_pos && pos[0] == ' ' ; pos++) ;
2593 length= (ulong) (pos - start_pos);
2594 if (count->pack_type & PACK_TYPE_SELECTED)
2595 {
2596 if (length > count->min_space)
2597 {
2598 DBUG_PRINT("fields",
2599 ("FIELD_SKIP_PRESPACE more than min_space, bits: 1"));
2600 DBUG_PRINT("fields",
2601 ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u",
2602 length, field_length, count->length_bits));
2603 write_bits(1,1);
2604 write_bits(length,count->length_bits);
2605 }
2606 else
2607 {
2608 DBUG_PRINT("fields",
2609 ("FIELD_SKIP_PRESPACE not more than min_space, "
2610 "bits: 1"));
2611 pos=start_pos;
2612 write_bits(0,1);
2613 }
2614 }
2615 else
2616 {
2617 DBUG_PRINT("fields",
2618 ("FIELD_SKIP_PRESPACE skip %lu/%u bytes, bits: %2u",
2619 length, field_length, count->length_bits));
2620 write_bits(length,count->length_bits);
2621 }
2622 /* Encode all significant bytes. */
2623 DBUG_PRINT("fields", ("FIELD_SKIP_PRESPACE %lu bytes",
2624 (ulong) (end_pos - start_pos)));
2625 for (start_pos=pos ; start_pos < end_pos ; start_pos++)
2626 {
2627 DBUG_PRINT("fields",
2628 ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
2629 (uchar) *start_pos,
2630 hexdigits(tree->code[(uchar) *start_pos]),
2631 (uint) tree->code_len[(uchar) *start_pos],
2632 bindigits(tree->code[(uchar) *start_pos],
2633 (uint) tree->code_len[(uchar) *start_pos])));
2634 write_bits(tree->code[(uchar) *start_pos],
2635 (uint) tree->code_len[(uchar) *start_pos]);
2636 }
2637 break;
2638 case FIELD_CONSTANT:
2639 case FIELD_ZERO:
2640 case FIELD_CHECK:
2641 DBUG_PRINT("fields", ("FIELD_CONSTANT/ZERO/CHECK"));
2642 start_pos=end_pos;
2643 break;
2644 case FIELD_INTERVALL:
2645 global_count=count;
2646 pos=(uchar*) tree_search(&count->int_tree, start_pos,
2647 count->int_tree.custom_arg);
2648 intervall=(uint) (pos - count->tree_buff)/field_length;
2649 DBUG_PRINT("fields", ("FIELD_INTERVALL"));
2650 DBUG_PRINT("fields", ("index: %4u code: 0x%s bits: %2u",
2651 intervall, hexdigits(tree->code[intervall]),
2652 (uint) tree->code_len[intervall]));
2653 write_bits(tree->code[intervall],(uint) tree->code_len[intervall]);
2654 start_pos=end_pos;
2655 break;
2656 case FIELD_BLOB:
2657 {
2658 ulong blob_length= _ma_calc_blob_length(field_length-
2659 portable_sizeof_char_ptr,
2660 start_pos);
2661 /* Empty blobs are encoded with a single 1 bit. */
2662 if (!blob_length)
2663 {
2664 DBUG_PRINT("fields", ("FIELD_BLOB empty, bits: 1"));
2665 write_bits(1,1);
2666 }
2667 else
2668 {
2669 uchar *blob,*blob_end;
2670 DBUG_PRINT("fields", ("FIELD_BLOB not empty, bits: 1"));
2671 write_bits(0,1);
2672 /* Write the blob length. */
2673 DBUG_PRINT("fields", ("FIELD_BLOB %lu bytes, bits: %2u",
2674 blob_length, count->length_bits));
2675 write_bits(blob_length,count->length_bits);
2676 memcpy(&blob,end_pos-portable_sizeof_char_ptr, sizeof(char*));
2677 blob_end=blob+blob_length;
2678 /* Encode the blob bytes. */
2679 for ( ; blob < blob_end ; blob++)
2680 {
2681 DBUG_PRINT("fields",
2682 ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
2683 (uchar) *blob, hexdigits(tree->code[(uchar) *blob]),
2684 (uint) tree->code_len[(uchar) *blob],
2685 bindigits(tree->code[(uchar) *start_pos],
2686 (uint)tree->code_len[(uchar) *start_pos])));
2687 write_bits(tree->code[(uchar) *blob],
2688 (uint) tree->code_len[(uchar) *blob]);
2689 }
2690 tot_blob_length+=blob_length;
2691 }
2692 start_pos= end_pos;
2693 break;
2694 }
2695 case FIELD_VARCHAR:
2696 {
2697 uint var_pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1);
2698 ulong col_length= (var_pack_length == 1 ?
2699 (uint) *(uchar*) start_pos :
2700 uint2korr(start_pos));
2701 /* Empty varchar are encoded with a single 1 bit. */
2702 if (!col_length)
2703 {
2704 DBUG_PRINT("fields", ("FIELD_VARCHAR empty, bits: 1"));
2705 write_bits(1,1); /* Empty varchar */
2706 }
2707 else
2708 {
2709 uchar *end= start_pos + var_pack_length + col_length;
2710 DBUG_PRINT("fields", ("FIELD_VARCHAR not empty, bits: 1"));
2711 write_bits(0,1);
2712 /* Write the varchar length. */
2713 DBUG_PRINT("fields", ("FIELD_VARCHAR %lu bytes, bits: %2u",
2714 col_length, count->length_bits));
2715 write_bits(col_length,count->length_bits);
2716 /* Encode the varchar bytes. */
2717 for (start_pos+= var_pack_length ; start_pos < end ; start_pos++)
2718 {
2719 DBUG_PRINT("fields",
2720 ("value: 0x%02x code: 0x%s bits: %2u bin: %s",
2721 (uchar) *start_pos,
2722 hexdigits(tree->code[(uchar) *start_pos]),
2723 (uint) tree->code_len[(uchar) *start_pos],
2724 bindigits(tree->code[(uchar) *start_pos],
2725 (uint)tree->code_len[(uchar) *start_pos])));
2726 write_bits(tree->code[(uchar) *start_pos],
2727 (uint) tree->code_len[(uchar) *start_pos]);
2728 }
2729 }
2730 start_pos= end_pos;
2731 break;
2732 }
2733 case FIELD_LAST:
2734 case FIELD_enum_val_count:
2735 abort(); /* Impossible */
2736 }
2737 start_pos+=count->max_zero_fill;
2738 DBUG_PRINT("fields", ("---"));
2739 }
2740 flush_bits();
2741 length=(ulong) (file_buffer.pos - record_pos) - max_pack_length;
2742 pack_length= _ma_save_pack_length(pack_version, record_pos, length);
2743 if (pack_blob_length)
2744 pack_length+= _ma_save_pack_length(pack_version,
2745 record_pos + pack_length,
2746 tot_blob_length);
2747 DBUG_PRINT("fields", ("record: %lu length: %lu blob-length: %lu "
2748 "length-bytes: %lu", (ulong) record_count, length,
2749 tot_blob_length, pack_length));
2750 DBUG_PRINT("fields", ("==="));
2751
2752 /* Correct file buffer if the header was smaller */
2753 if (pack_length != max_pack_length)
2754 {
2755 bmove(record_pos+pack_length,record_pos+max_pack_length,length);
2756 file_buffer.pos-= (max_pack_length-pack_length);
2757 }
2758 if (length < (ulong) min_record_length)
2759 min_record_length=(uint) length;
2760 if (length > (ulong) max_record_length)
2761 max_record_length=(uint) length;
2762 record_count++;
2763 if (write_loop && record_count % WRITE_COUNT == 0)
2764 {
2765 printf("%lu\r", (ulong) record_count);
2766 fflush(stdout);
2767 }
2768 }
2769 else if (error != HA_ERR_RECORD_DELETED)
2770 break;
2771 }
2772 if (error == HA_ERR_END_OF_FILE)
2773 error=0;
2774 else
2775 {
2776 fprintf(stderr, "%s: Got error %d reading records\n", my_progname, error);
2777 }
2778 if (verbose >= 2)
2779 printf("wrote %s records.\n", llstr((longlong) record_count, llbuf));
2780
2781 my_safe_afree(record, isam_file->s->base.reclength);
2782 mrg->ref_length=max_pack_length;
2783 mrg->min_pack_length=max_record_length ? min_record_length : 0;
2784 mrg->max_pack_length=max_record_length;
2785 DBUG_RETURN(error || error_on_write || flush_buffer(~(ulong) 0));
2786}
2787
2788
2789static char *make_new_name(char *new_name, char *old_name)
2790{
2791 return fn_format(new_name,old_name,"",DATA_TMP_EXT,2+4);
2792}
2793
2794static char *make_old_name(char *new_name, char *old_name)
2795{
2796 return fn_format(new_name,old_name,"",OLD_EXT,2+4);
2797}
2798
2799 /* rutines for bit writing buffer */
2800
2801static void init_file_buffer(File file, pbool read_buffer)
2802{
2803 file_buffer.file=file;
2804 file_buffer.buffer= (uchar*) my_malloc(ALIGN_SIZE(RECORD_CACHE_SIZE),
2805 MYF(MY_WME));
2806 file_buffer.end=file_buffer.buffer+ALIGN_SIZE(RECORD_CACHE_SIZE)-8;
2807 file_buffer.pos_in_file=0;
2808 error_on_write=0;
2809 if (read_buffer)
2810 {
2811
2812 file_buffer.pos=file_buffer.end;
2813 file_buffer.bits=0;
2814 }
2815 else
2816 {
2817 file_buffer.pos=file_buffer.buffer;
2818 file_buffer.bits=BITS_SAVED;
2819 }
2820 file_buffer.bitbucket= 0;
2821}
2822
2823
2824static int flush_buffer(ulong neaded_length)
2825{
2826 ulong length;
2827
2828 /*
2829 file_buffer.end is 8 bytes lower than the real end of the buffer.
2830 This is done so that the end-of-buffer condition does not need to be
2831 checked for every uchar (see write_bits()). Consequently,
2832 file_buffer.pos can become greater than file_buffer.end. The
2833 algorithms in the other functions ensure that there will never be
2834 more than 8 bytes written to the buffer without an end-of-buffer
2835 check. So the buffer cannot be overrun. But we need to check for the
2836 near-to-buffer-end condition to avoid a negative result, which is
2837 casted to unsigned and thus becomes giant.
2838 */
2839 if ((file_buffer.pos < file_buffer.end) &&
2840 ((ulong) (file_buffer.end - file_buffer.pos) > neaded_length))
2841 return 0;
2842 length=(ulong) (file_buffer.pos-file_buffer.buffer);
2843 file_buffer.pos=file_buffer.buffer;
2844 file_buffer.pos_in_file+=length;
2845 if (test_only)
2846 return 0;
2847 if (error_on_write|| my_write(file_buffer.file,
2848 (const uchar*) file_buffer.buffer,
2849 length,
2850 MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
2851 {
2852 error_on_write=1;
2853 return 1;
2854 }
2855
2856 if (neaded_length != ~(ulong) 0 &&
2857 (ulong) (file_buffer.end-file_buffer.buffer) < neaded_length)
2858 {
2859 uchar *tmp;
2860 neaded_length+=256; /* some margin */
2861 tmp= (uchar*) my_realloc(file_buffer.buffer, neaded_length,MYF(MY_WME));
2862 if (!tmp)
2863 return 1;
2864 file_buffer.pos= (tmp + (ulong) (file_buffer.pos - file_buffer.buffer));
2865 file_buffer.buffer= tmp;
2866 file_buffer.end= (tmp+neaded_length-8);
2867 }
2868 return 0;
2869}
2870
2871
2872static void end_file_buffer(void)
2873{
2874 my_free(file_buffer.buffer);
2875}
2876
2877 /* output `bits` low bits of `value' */
2878
2879static void write_bits(register ulonglong value, register uint bits)
2880{
2881 DBUG_ASSERT(((bits < 8 * sizeof(value)) && ! (value >> bits)) ||
2882 (bits == 8 * sizeof(value)));
2883
2884 if ((file_buffer.bits-= (int) bits) >= 0)
2885 {
2886 file_buffer.bitbucket|= value << file_buffer.bits;
2887 }
2888 else
2889 {
2890 reg3 ulonglong bit_buffer;
2891 bits= (uint) -file_buffer.bits;
2892 bit_buffer= (file_buffer.bitbucket |
2893 ((bits != 8 * sizeof(value)) ? (value >> bits) : 0));
2894#if BITS_SAVED == 64
2895 *file_buffer.pos++= (uchar) (bit_buffer >> 56);
2896 *file_buffer.pos++= (uchar) (bit_buffer >> 48);
2897 *file_buffer.pos++= (uchar) (bit_buffer >> 40);
2898 *file_buffer.pos++= (uchar) (bit_buffer >> 32);
2899#endif
2900 *file_buffer.pos++= (uchar) (bit_buffer >> 24);
2901 *file_buffer.pos++= (uchar) (bit_buffer >> 16);
2902 *file_buffer.pos++= (uchar) (bit_buffer >> 8);
2903 *file_buffer.pos++= (uchar) (bit_buffer);
2904
2905 if (bits != 8 * sizeof(value))
2906 value&= (((ulonglong) 1) << bits) - 1;
2907 if (file_buffer.pos >= file_buffer.end)
2908 flush_buffer(~ (ulong) 0);
2909 file_buffer.bits=(int) (BITS_SAVED - bits);
2910 file_buffer.bitbucket= value << (BITS_SAVED - bits);
2911 }
2912 return;
2913}
2914
2915 /* Flush bits in bit_buffer to buffer */
2916
2917static void flush_bits(void)
2918{
2919 int bits;
2920 ulonglong bit_buffer;
2921
2922 bits= file_buffer.bits & ~7;
2923 bit_buffer= file_buffer.bitbucket >> bits;
2924 bits= BITS_SAVED - bits;
2925 while (bits > 0)
2926 {
2927 bits-= 8;
2928 *file_buffer.pos++= (uchar) (bit_buffer >> bits);
2929 }
2930 if (file_buffer.pos >= file_buffer.end)
2931 flush_buffer(~ (ulong) 0);
2932 file_buffer.bits= BITS_SAVED;
2933 file_buffer.bitbucket= 0;
2934}
2935
2936
2937/****************************************************************************
2938** functions to handle the joined files
2939****************************************************************************/
2940
2941static int save_state(MARIA_HA *isam_file,PACK_MRG_INFO *mrg,
2942 my_off_t new_length,
2943 ha_checksum crc)
2944{
2945 MARIA_SHARE *share=isam_file->s;
2946 uint options=mi_uint2korr(share->state.header.options);
2947 uint key;
2948 DBUG_ENTER("save_state");
2949
2950 options|= HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA;
2951 mi_int2store(share->state.header.options,options);
2952 /* Save the original file type of we have to undo the packing later */
2953 share->state.header.org_data_file_type= share->state.header.data_file_type;
2954 share->state.header.data_file_type= COMPRESSED_RECORD;
2955
2956 share->state.state.data_file_length=new_length;
2957 share->state.state.del=0;
2958 share->state.state.empty=0;
2959 share->state.dellink= HA_OFFSET_ERROR;
2960 share->state.split=(ha_rows) mrg->records;
2961 share->state.version=(ulong) time((time_t*) 0);
2962 if (share->base.born_transactional)
2963 share->state.create_rename_lsn= share->state.is_of_horizon=
2964 share->state.skip_redo_lsn= LSN_NEEDS_NEW_STATE_LSNS;
2965 if (! maria_is_all_keys_active(share->state.key_map, share->base.keys))
2966 {
2967 /*
2968 Some indexes are disabled, cannot use current key_file_length value
2969 as an estimate of upper bound of index file size. Use packed data file
2970 size instead.
2971 */
2972 share->state.state.key_file_length= new_length;
2973 }
2974 /*
2975 If there are no disabled indexes, keep key_file_length value from
2976 original file so "aria_chk -rq" can use this value (this is necessary
2977 because index size cannot be easily calculated for fulltext keys)
2978 */
2979 maria_clear_all_keys_active(share->state.key_map);
2980 for (key=0 ; key < share->base.keys ; key++)
2981 share->state.key_root[key]= HA_OFFSET_ERROR;
2982 share->state.key_del= HA_OFFSET_ERROR;
2983 share->state.state.checksum= crc; /* Save crc in file */
2984 share->changed=1; /* Force write of header */
2985 share->state.open_count=0;
2986 share->global_changed=0;
2987 my_chsize(share->kfile.file, share->base.keystart, 0, MYF(0));
2988 if (share->base.keys)
2989 isamchk_neaded=1;
2990 DBUG_RETURN(_ma_state_info_write_sub(share->kfile.file,
2991 &share->state,
2992 MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
2993 MA_STATE_INFO_WRITE_FULL_INFO));
2994}
2995
2996
2997static int save_state_mrg(File file,PACK_MRG_INFO *mrg,my_off_t new_length,
2998 ha_checksum crc)
2999{
3000 MARIA_STATE_INFO state;
3001 MARIA_HA *isam_file=mrg->file[0];
3002 uint options;
3003 DBUG_ENTER("save_state_mrg");
3004
3005 state= isam_file->s->state;
3006 options= (mi_uint2korr(state.header.options) | HA_OPTION_COMPRESS_RECORD |
3007 HA_OPTION_READ_ONLY_DATA);
3008 mi_int2store(state.header.options,options);
3009 /* Save the original file type of we have to undo the packing later */
3010 state.header.org_data_file_type= state.header.data_file_type;
3011 state.header.data_file_type= COMPRESSED_RECORD;
3012
3013 state.state.data_file_length=new_length;
3014 state.state.del=0;
3015 state.state.empty=0;
3016 state.state.records=state.split=(ha_rows) mrg->records;
3017 state.create_rename_lsn= state.is_of_horizon= state.skip_redo_lsn=
3018 LSN_NEEDS_NEW_STATE_LSNS;
3019
3020 /* See comment above in save_state about key_file_length handling. */
3021 if (mrg->src_file_has_indexes_disabled)
3022 {
3023 isam_file->s->state.state.key_file_length=
3024 MY_MAX(isam_file->s->state.state.key_file_length, new_length);
3025 }
3026 state.dellink= HA_OFFSET_ERROR;
3027 state.version=(ulong) time((time_t*) 0);
3028 maria_clear_all_keys_active(state.key_map);
3029 state.state.checksum=crc;
3030 if (isam_file->s->base.keys)
3031 isamchk_neaded=1;
3032 state.changed=STATE_CHANGED | STATE_NOT_ANALYZED; /* Force check of table */
3033 DBUG_RETURN (_ma_state_info_write_sub(file, &state,
3034 MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
3035 MA_STATE_INFO_WRITE_FULL_INFO));
3036}
3037
3038
3039/* reset for mrg_rrnd */
3040
3041static void mrg_reset(PACK_MRG_INFO *mrg)
3042{
3043 if (mrg->current)
3044 {
3045 maria_extra(*mrg->current, HA_EXTRA_NO_CACHE, 0);
3046 mrg->current=0;
3047 }
3048}
3049
3050static int mrg_rrnd(PACK_MRG_INFO *info,uchar *buf)
3051{
3052 int error;
3053 MARIA_HA *isam_info;
3054
3055 if (!info->current)
3056 {
3057 isam_info= *(info->current=info->file);
3058 info->end=info->current+info->count;
3059 maria_reset(isam_info);
3060 maria_extra(isam_info, HA_EXTRA_CACHE, 0);
3061 if ((error= maria_scan_init(isam_info)))
3062 return(error);
3063 }
3064 else
3065 isam_info= *info->current;
3066
3067 for (;;)
3068 {
3069 if (!(error= maria_scan(isam_info, buf)) ||
3070 error != HA_ERR_END_OF_FILE)
3071 return (error);
3072 maria_scan_end(isam_info);
3073 maria_extra(isam_info,HA_EXTRA_NO_CACHE, 0);
3074 if (info->current+1 == info->end)
3075 return(HA_ERR_END_OF_FILE);
3076 info->current++;
3077 isam_info= *info->current;
3078 maria_reset(isam_info);
3079 maria_extra(isam_info,HA_EXTRA_CACHE, 0);
3080 if ((error= maria_scan_init(isam_info)))
3081 return(error);
3082 }
3083}
3084
3085
3086static int mrg_close(PACK_MRG_INFO *mrg)
3087{
3088 uint i;
3089 int error=0;
3090 DBUG_ENTER("mrg_close");
3091
3092 for (i=0 ; i < mrg->count ; i++)
3093 error|=maria_close(mrg->file[i]);
3094 if (mrg->free_file)
3095 my_free(mrg->file);
3096 DBUG_RETURN(error);
3097}
3098
3099
3100#if !defined(DBUG_OFF)
3101/*
3102 Fake the counts to get big Huffman codes.
3103
3104 SYNOPSIS
3105 fakebigcodes()
3106 huff_counts A pointer to the counts array.
3107 end_count A pointer past the counts array.
3108
3109 DESCRIPTION
3110
3111 Huffman coding works by removing the two least frequent values from
3112 the list of values and add a new value with the sum of their
3113 incidences in a loop until only one value is left. Every time a
3114 value is reused for a new value, it gets one more bit for its
3115 encoding. Hence, the least frequent values get the longest codes.
3116
3117 To get a maximum code length for a value, two of the values must
3118 have an incidence of 1. As their sum is 2, the next infrequent value
3119 must have at least an incidence of 2, then 4, 8, 16 and so on. This
3120 means that one needs 2**n bytes (values) for a code length of n
3121 bits. However, using more distinct values forces the use of longer
3122 codes, or reaching the code length with less total bytes (values).
3123
3124 To get 64(32)-bit codes, I sort the counts by decreasing incidence.
3125 I assign counts of 1 to the two most frequent values, a count of 2
3126 for the next one, then 4, 8, and so on until 2**64-1(2**30-1). All
3127 the remaining values get 1. That way every possible uchar has an
3128 assigned code, though not all codes are used if not all uchar values
3129 are present in the column.
3130
3131 This strategy would work with distinct column values too, but
3132 requires that at least 64(32) values are present. To make things
3133 easier here, I cancel all distinct column values and force byte
3134 compression for all columns.
3135
3136 RETURN
3137 void
3138*/
3139
3140static void fakebigcodes(HUFF_COUNTS *huff_counts, HUFF_COUNTS *end_count)
3141{
3142 HUFF_COUNTS *count;
3143 my_off_t *cur_count_p;
3144 my_off_t *end_count_p;
3145 my_off_t **cur_sort_p;
3146 my_off_t **end_sort_p;
3147 my_off_t *sort_counts[256];
3148 my_off_t total;
3149 DBUG_ENTER("fakebigcodes");
3150
3151 for (count= huff_counts; count < end_count; count++)
3152 {
3153 /*
3154 Remove distinct column values.
3155 */
3156 if (huff_counts->tree_buff)
3157 {
3158 my_free(huff_counts->tree_buff);
3159 delete_tree(&huff_counts->int_tree, 0);
3160 huff_counts->tree_buff= NULL;
3161 DBUG_PRINT("fakebigcodes", ("freed distinct column values"));
3162 }
3163
3164 /*
3165 Sort counts by decreasing incidence.
3166 */
3167 cur_count_p= count->counts;
3168 end_count_p= cur_count_p + 256;
3169 cur_sort_p= sort_counts;
3170 while (cur_count_p < end_count_p)
3171 *(cur_sort_p++)= cur_count_p++;
3172 (void) my_qsort(sort_counts, 256, sizeof(my_off_t*), (qsort_cmp) fakecmp);
3173
3174 /*
3175 Assign faked counts.
3176 */
3177 cur_sort_p= sort_counts;
3178#if SIZEOF_LONG_LONG > 4
3179 end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 1;
3180#else
3181 end_sort_p= sort_counts + 8 * sizeof(ulonglong) - 2;
3182#endif
3183 /* Most frequent value gets a faked count of 1. */
3184 **(cur_sort_p++)= 1;
3185 total= 1;
3186 while (cur_sort_p < end_sort_p)
3187 {
3188 **(cur_sort_p++)= total;
3189 total<<= 1;
3190 }
3191 /* Set the last value. */
3192 **(cur_sort_p++)= --total;
3193 /*
3194 Set the remaining counts.
3195 */
3196 end_sort_p= sort_counts + 256;
3197 while (cur_sort_p < end_sort_p)
3198 **(cur_sort_p++)= 1;
3199 }
3200 DBUG_VOID_RETURN;
3201}
3202
3203
3204/*
3205 Compare two counts for reverse sorting.
3206
3207 SYNOPSIS
3208 fakecmp()
3209 count1 One count.
3210 count2 Another count.
3211
3212 RETURN
3213 1 count1 < count2
3214 0 count1 == count2
3215 -1 count1 > count2
3216*/
3217
3218static int fakecmp(my_off_t **count1, my_off_t **count2)
3219{
3220 return ((**count1 < **count2) ? 1 :
3221 (**count1 > **count2) ? -1 : 0);
3222}
3223#endif
3224
3225#include "ma_check_standalone.h"
3226
3227