1/* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
15
16/* Describe, check and repair of MARIA tables */
17
18/*
19 About checksum calculation.
20
21 There are two types of checksums. Table checksum and row checksum.
22
23 Row checksum is an additional uchar at the end of dynamic length
24 records. It must be calculated if the table is configured for them.
25 Otherwise they must not be used. The variable
26 MYISAM_SHARE::calc_checksum determines if row checksums are used.
27 MI_INFO::checksum is used as temporary storage during row handling.
28 For parallel repair we must assure that only one thread can use this
29 variable. There is no problem on the write side as this is done by one
30 thread only. But when checking a record after read this could go
31 wrong. But since all threads read through a common read buffer, it is
32 sufficient if only one thread checks it.
33
34 Table checksum is an eight uchar value in the header of the index file.
35 It can be calculated even if row checksums are not used. The variable
36 MI_CHECK::glob_crc is calculated over all records.
37 MI_SORT_PARAM::calc_checksum determines if this should be done. This
38 variable is not part of MI_CHECK because it must be set per thread for
39 parallel repair. The global glob_crc must be changed by one thread
40 only. And it is sufficient to calculate the checksum once only.
41*/
42
43#include "ma_ftdefs.h"
44#include "ma_rt_index.h"
45#include "ma_blockrec.h"
46#include "trnman.h"
47#include "ma_key_recover.h"
48#include <my_check_opt.h>
49
50#include <stdarg.h>
51#include <my_getopt.h>
52#ifdef HAVE_SYS_VADVISE_H
53#include <sys/vadvise.h>
54#endif
55#ifdef HAVE_SYS_MMAN_H
56#include <sys/mman.h>
57#endif
58
59/* Functions defined in this file */
60
61static int check_k_link(HA_CHECK *param, MARIA_HA *info, my_off_t next_link);
62static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo,
63 MARIA_PAGE *page, ha_rows *keys,
64 ha_checksum *key_checksum, uint level);
65static uint isam_key_length(MARIA_HA *info,MARIA_KEYDEF *keyinfo);
66static ha_checksum calc_checksum(ha_rows count);
67static int writekeys(MARIA_SORT_PARAM *sort_param);
68static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
69 MARIA_KEYDEF *keyinfo,
70 my_off_t pagepos, File new_file);
71static int sort_key_read(MARIA_SORT_PARAM *sort_param, uchar *key);
72static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, uchar *key);
73static int sort_get_next_record(MARIA_SORT_PARAM *sort_param);
74static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,
75 const void *b);
76static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
77 const uchar *a);
78static int sort_key_write(MARIA_SORT_PARAM *sort_param, const uchar *a);
79static my_off_t get_record_for_key(MARIA_KEYDEF *keyinfo, const uchar *key);
80static int sort_insert_key(MARIA_SORT_PARAM *sort_param,
81 reg1 SORT_KEY_BLOCKS *key_block,
82 const uchar *key, my_off_t prev_block);
83static int sort_delete_record(MARIA_SORT_PARAM *sort_param);
84/*static int _ma_flush_pending_blocks(HA_CHECK *param);*/
85static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks,
86 uint buffer_length);
87static ha_checksum maria_byte_checksum(const uchar *buf, uint length);
88static void set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share);
89static void restore_data_file_type(MARIA_SHARE *share);
90static void change_data_file_descriptor(MARIA_HA *info, File new_file);
91static void unuse_data_file_descriptor(MARIA_HA *info);
92static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info,
93 MARIA_HA *info, uchar *record);
94static void copy_data_file_state(MARIA_STATE_INFO *to,
95 MARIA_STATE_INFO *from);
96static void report_keypage_fault(HA_CHECK *param, MARIA_HA *info,
97 my_off_t position);
98static my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file);
99static my_bool _ma_flush_table_files_before_swap(HA_CHECK *param,
100 MARIA_HA *info);
101static TrID max_trid_in_system(void);
102static void _ma_check_print_not_visible_error(HA_CHECK *param, TrID used_trid);
103void retry_if_quick(MARIA_SORT_PARAM *param, int error);
104static void print_bitmap_description(MARIA_SHARE *share,
105 pgcache_page_no_t page,
106 uchar *buff);
107
108
109/* Initialize check param with default values */
110
111void maria_chk_init(HA_CHECK *param)
112{
113 bzero((uchar*) param,sizeof(*param));
114 param->opt_follow_links=1;
115 param->keys_in_use= ~(ulonglong) 0;
116 param->search_after_block=HA_OFFSET_ERROR;
117 param->auto_increment_value= 0;
118 param->use_buffers= PAGE_BUFFER_INIT;
119 param->read_buffer_length=READ_BUFFER_INIT;
120 param->write_buffer_length=READ_BUFFER_INIT;
121 param->sort_buffer_length=SORT_BUFFER_INIT;
122 param->sort_key_blocks=BUFFERS_WHEN_SORTING;
123 param->tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL;
124 param->myf_rw=MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL);
125 param->start_check_pos=0;
126 param->max_record_length= LONGLONG_MAX;
127 param->pagecache_block_size= KEY_CACHE_BLOCK_SIZE;
128 param->stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL;
129 param->max_stage= 1;
130}
131
132
133/* Initialize check param and maria handler for check of table */
134
135void maria_chk_init_for_check(HA_CHECK *param, MARIA_HA *info)
136{
137 param->not_visible_rows_found= 0;
138 param->max_found_trid= 0;
139
140 /*
141 Set up transaction handler so that we can see all rows. When rows is read
142 we will check the found id against param->max_tried
143 */
144 if (!info->s->base.born_transactional)
145 {
146 /*
147 There are no trids. Howver we want to set max_trid to make test of
148 create_trid simpler.
149 */
150 param->max_trid= ~(TrID) 0;
151 }
152 else if (param->max_trid == 0)
153 {
154 if (!ma_control_file_inited())
155 param->max_trid= 0; /* Give warning for first trid found */
156 else
157 param->max_trid= max_trid_in_system();
158 }
159
160 maria_ignore_trids(info);
161}
162
163
164 /* Check the status flags for the table */
165
166int maria_chk_status(HA_CHECK *param, MARIA_HA *info)
167{
168 MARIA_SHARE *share= info->s;
169
170 if (maria_is_crashed_on_repair(info))
171 _ma_check_print_warning(param,
172 "Table is marked as crashed and last repair failed");
173 else if (maria_in_repair(info))
174 _ma_check_print_warning(param,
175 "Last repair was aborted before finishing");
176 else if (maria_is_crashed(info))
177 _ma_check_print_warning(param,
178 "Table is marked as crashed");
179 if (share->state.open_count != (uint) (share->global_changed ? 1 : 0))
180 {
181 /* Don't count this as a real warning, as check can correct this ! */
182 uint save=param->warning_printed;
183 _ma_check_print_warning(param,
184 share->state.open_count==1 ?
185 "%d client is using or hasn't closed the table properly" :
186 "%d clients are using or haven't closed the table properly",
187 share->state.open_count);
188 /* If this will be fixed by the check, forget the warning */
189 if (param->testflag & T_UPDATE_STATE)
190 param->warning_printed=save;
191 }
192 if (share->state.create_trid > param->max_trid)
193 {
194 _ma_check_print_warning(param,
195 "Table create_trd (%llu) > current max_transaction id (%llu). Table needs to be repaired or zerofilled to be usable",
196 share->state.create_trid, param->max_trid);
197 return 1;
198 }
199 return 0;
200}
201
202/*
203 Check delete links in row data
204*/
205
206int maria_chk_del(HA_CHECK *param, register MARIA_HA *info,
207 ulonglong test_flag)
208{
209 MARIA_SHARE *share= info->s;
210 reg2 ha_rows i;
211 uint delete_link_length;
212 my_off_t empty,next_link,UNINIT_VAR(old_link);
213 char buff[22],buff2[22];
214 DBUG_ENTER("maria_chk_del");
215
216 param->record_checksum=0;
217
218 if (share->data_file_type == BLOCK_RECORD)
219 DBUG_RETURN(0); /* No delete links here */
220
221 delete_link_length=((share->options & HA_OPTION_PACK_RECORD) ? 20 :
222 share->rec_reflength+1);
223
224 if (!(test_flag & T_SILENT))
225 puts("- check record delete-chain");
226
227 next_link=share->state.dellink;
228 if (share->state.state.del == 0)
229 {
230 if (test_flag & T_VERBOSE)
231 {
232 puts("No recordlinks");
233 }
234 }
235 else
236 {
237 if (test_flag & T_VERBOSE)
238 printf("Recordlinks: ");
239 empty=0;
240 for (i= share->state.state.del ; i > 0L && next_link != HA_OFFSET_ERROR ; i--)
241 {
242 if (_ma_killed_ptr(param))
243 DBUG_RETURN(1);
244 if (test_flag & T_VERBOSE)
245 printf(" %9s",llstr(next_link,buff));
246 if (next_link >= share->state.state.data_file_length)
247 goto wrong;
248 if (mysql_file_pread(info->dfile.file, (uchar*) buff, delete_link_length,
249 next_link,MYF(MY_NABP)))
250 {
251 if (test_flag & T_VERBOSE) puts("");
252 _ma_check_print_error(param,"Can't read delete-link at filepos: %s",
253 llstr(next_link,buff));
254 DBUG_RETURN(1);
255 }
256 if (*buff != '\0')
257 {
258 if (test_flag & T_VERBOSE) puts("");
259 _ma_check_print_error(param,"Record at pos: %s is not remove-marked",
260 llstr(next_link,buff));
261 goto wrong;
262 }
263 if (share->options & HA_OPTION_PACK_RECORD)
264 {
265 my_off_t prev_link=mi_sizekorr(buff+12);
266 if (empty && prev_link != old_link)
267 {
268 if (test_flag & T_VERBOSE) puts("");
269 _ma_check_print_error(param,
270 "Deleted block at %s doesn't point back at previous delete link",
271 llstr(next_link,buff2));
272 goto wrong;
273 }
274 old_link=next_link;
275 next_link=mi_sizekorr(buff+4);
276 empty+=mi_uint3korr(buff+1);
277 }
278 else
279 {
280 param->record_checksum+=(ha_checksum) next_link;
281 next_link= _ma_rec_pos(share, (uchar *) buff + 1);
282 empty+=share->base.pack_reclength;
283 }
284 }
285 if (share->state.state.del && (test_flag & T_VERBOSE))
286 puts("\n");
287 if (empty != share->state.state.empty)
288 {
289 _ma_check_print_warning(param,
290 "Found %s deleted space in delete link chain. Should be %s",
291 llstr(empty,buff2),
292 llstr(share->state.state.empty,buff));
293 }
294 if (next_link != HA_OFFSET_ERROR)
295 {
296 _ma_check_print_error(param,
297 "Found more than the expected %s deleted rows in delete link chain",
298 llstr(share->state.state.del, buff));
299 goto wrong;
300 }
301 if (i != 0)
302 {
303 _ma_check_print_error(param,
304 "Found %s deleted rows in delete link chain. Should be %s",
305 llstr(share->state.state.del - i, buff2),
306 llstr(share->state.state.del, buff));
307 goto wrong;
308 }
309 }
310 DBUG_RETURN(0);
311
312wrong:
313 param->testflag|=T_RETRY_WITHOUT_QUICK;
314 if (test_flag & T_VERBOSE)
315 puts("");
316 _ma_check_print_error(param,"record delete-link-chain corrupted");
317 DBUG_RETURN(1);
318} /* maria_chk_del */
319
320
321/* Check delete links in index file */
322
323static int check_k_link(HA_CHECK *param, register MARIA_HA *info,
324 my_off_t next_link)
325{
326 MARIA_SHARE *share= info->s;
327 uint block_size= share->block_size;
328 ha_rows records;
329 char llbuff[21], llbuff2[21];
330 uchar *buff;
331 DBUG_ENTER("check_k_link");
332
333 if (next_link == HA_OFFSET_ERROR)
334 DBUG_RETURN(0); /* Avoid printing empty line */
335
336 records= (ha_rows) (share->state.state.key_file_length / block_size);
337 while (next_link != HA_OFFSET_ERROR && records > 0)
338 {
339 if (_ma_killed_ptr(param))
340 DBUG_RETURN(1);
341 if (param->testflag & T_VERBOSE)
342 printf("%16s",llstr(next_link,llbuff));
343
344 /* Key blocks must lay within the key file length entirely. */
345 if (next_link + block_size > share->state.state.key_file_length)
346 {
347 /* purecov: begin tested */
348 _ma_check_print_error(param, "Invalid key block position: %s "
349 "key block size: %u file_length: %s",
350 llstr(next_link, llbuff), block_size,
351 llstr(share->state.state.key_file_length, llbuff2));
352 DBUG_RETURN(1);
353 /* purecov: end */
354 }
355
356 /* Key blocks must be aligned at block_size */
357 if (next_link & (block_size -1))
358 {
359 /* purecov: begin tested */
360 _ma_check_print_error(param, "Mis-aligned key block: %s "
361 "minimum key block length: %u",
362 llstr(next_link, llbuff),
363 block_size);
364 DBUG_RETURN(1);
365 /* purecov: end */
366 }
367
368 DBUG_ASSERT(share->pagecache->block_size == block_size);
369 if (!(buff= pagecache_read(share->pagecache,
370 &share->kfile,
371 (pgcache_page_no_t) (next_link / block_size),
372 DFLT_INIT_HITS,
373 info->buff, PAGECACHE_READ_UNKNOWN_PAGE,
374 PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
375 {
376 /* purecov: begin tested */
377 _ma_check_print_error(param, "key cache read error for block: %s",
378 llstr(next_link,llbuff));
379 DBUG_RETURN(1);
380 /* purecov: end */
381 }
382 if (_ma_get_keynr(info->s, buff) != MARIA_DELETE_KEY_NR)
383 _ma_check_print_error(param, "Page at %s is not delete marked",
384 llstr(next_link, llbuff));
385
386 next_link= mi_sizekorr(buff + share->keypage_header);
387 records--;
388 param->key_file_blocks+=block_size;
389 }
390 if (param->testflag & T_VERBOSE)
391 {
392 if (next_link != HA_OFFSET_ERROR)
393 printf("%16s\n",llstr(next_link,llbuff));
394 else
395 puts("");
396 }
397 DBUG_RETURN (next_link != HA_OFFSET_ERROR);
398} /* check_k_link */
399
400
401 /* Check sizes of files */
402
403int maria_chk_size(HA_CHECK *param, register MARIA_HA *info)
404{
405 MARIA_SHARE *share= info->s;
406 int error;
407 register my_off_t skr,size;
408 char buff[22],buff2[22];
409 DBUG_ENTER("maria_chk_size");
410
411 if (!(param->testflag & T_SILENT))
412 puts("- check file-size");
413
414 /*
415 The following is needed if called externally (not from maria_chk).
416 To get a correct physical size we need to flush them.
417 */
418 if ((error= _ma_flush_table_files(info,
419 MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
420 FLUSH_FORCE_WRITE, FLUSH_FORCE_WRITE)))
421 _ma_check_print_error(param, "Failed to flush data or index file");
422
423 size= mysql_file_seek(share->kfile.file, 0L, MY_SEEK_END, MYF(MY_THREADSAFE));
424 if ((skr=(my_off_t) share->state.state.key_file_length) != size)
425 {
426 /* Don't give error if file generated by maria_pack */
427 if (skr > size && maria_is_any_key_active(share->state.key_map))
428 {
429 error=1;
430 _ma_check_print_error(param,
431 "Size of indexfile is: %-8s Expected: %s",
432 llstr(size,buff), llstr(skr,buff2));
433 share->state.state.key_file_length= size;
434 }
435 else if (!(param->testflag & T_VERY_SILENT))
436 _ma_check_print_warning(param,
437 "Size of indexfile is: %-8s Expected: %s",
438 llstr(size,buff), llstr(skr,buff2));
439 }
440 if (size > share->base.max_key_file_length)
441 {
442 _ma_check_print_warning(param,
443 "Size of indexfile is: %-8s which is bigger than max indexfile size: %s",
444 ullstr(size,buff),
445 ullstr(share->base.max_key_file_length, buff2));
446 }
447 else if (!(param->testflag & T_VERY_SILENT) &&
448 ! (share->options & HA_OPTION_COMPRESS_RECORD) &&
449 ulonglong2double(share->state.state.key_file_length) >
450 ulonglong2double(share->base.margin_key_file_length)*0.9)
451 _ma_check_print_warning(param,"Keyfile is almost full, %10s of %10s used",
452 llstr(share->state.state.key_file_length,buff),
453 llstr(share->base.max_key_file_length,buff));
454
455 size= mysql_file_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
456 skr=(my_off_t) share->state.state.data_file_length;
457 if (share->options & HA_OPTION_COMPRESS_RECORD)
458 skr+= MEMMAP_EXTRA_MARGIN;
459#ifdef USE_RELOC
460 if (share->data_file_type == STATIC_RECORD &&
461 skr < (my_off_t) share->base.reloc*share->base.min_pack_length)
462 skr=(my_off_t) share->base.reloc*share->base.min_pack_length;
463#endif
464 if (skr != size)
465 {
466 share->state.state.data_file_length=size; /* Skip other errors */
467 if (skr > size && skr != size + MEMMAP_EXTRA_MARGIN)
468 {
469 error=1;
470 _ma_check_print_error(param,"Size of datafile is: %-9s Expected: %s",
471 llstr(size,buff), llstr(skr,buff2));
472 param->testflag|=T_RETRY_WITHOUT_QUICK;
473 }
474 else
475 {
476 _ma_check_print_warning(param,
477 "Size of datafile is: %-9s Expected: %s",
478 llstr(size,buff), llstr(skr,buff2));
479 }
480 }
481 if (size > share->base.max_data_file_length)
482 {
483 _ma_check_print_warning(param,
484 "Size of datafile is: %-8s which is bigger than max datafile size: %s",
485 ullstr(size,buff),
486 ullstr(share->base.max_data_file_length, buff2));
487 } else if (!(param->testflag & T_VERY_SILENT) &&
488 !(share->options & HA_OPTION_COMPRESS_RECORD) &&
489 ulonglong2double(share->state.state.data_file_length) >
490 (ulonglong2double(share->base.max_data_file_length)*0.9))
491 _ma_check_print_warning(param, "Datafile is almost full, %10s of %10s used",
492 llstr(share->state.state.data_file_length,buff),
493 llstr(share->base.max_data_file_length,buff2));
494 DBUG_RETURN(error);
495} /* maria_chk_size */
496
497
498/* Check keys */
499
500int maria_chk_key(HA_CHECK *param, register MARIA_HA *info)
501{
502 uint key,found_keys=0,full_text_keys=0,result=0;
503 ha_rows keys;
504 ha_checksum old_record_checksum,init_checksum;
505 my_off_t all_keydata,all_totaldata,key_totlength,length;
506 double *rec_per_key_part;
507 MARIA_SHARE *share= info->s;
508 MARIA_KEYDEF *keyinfo;
509 char buff[22],buff2[22];
510 MARIA_PAGE page;
511 DBUG_ENTER("maria_chk_key");
512
513 if (!(param->testflag & T_SILENT))
514 puts("- check key delete-chain");
515
516 param->key_file_blocks=share->base.keystart;
517 if (check_k_link(param, info, share->state.key_del))
518 {
519 if (param->testflag & T_VERBOSE) puts("");
520 _ma_check_print_error(param,"key delete-link-chain corrupted");
521 DBUG_RETURN(-1);
522 }
523
524 if (!(param->testflag & T_SILENT))
525 puts("- check index reference");
526
527 all_keydata=all_totaldata=key_totlength=0;
528 init_checksum=param->record_checksum;
529 old_record_checksum=0;
530 if (share->data_file_type == STATIC_RECORD)
531 old_record_checksum= (calc_checksum(share->state.state.records +
532 share->state.state.del-1) *
533 share->base.pack_reclength);
534 rec_per_key_part= param->new_rec_per_key_part;
535 for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
536 rec_per_key_part+=keyinfo->keysegs, key++, keyinfo++)
537 {
538 param->key_crc[key]=0;
539 if (! maria_is_key_active(share->state.key_map, key))
540 {
541 /* Remember old statistics for key */
542 memcpy((char*) rec_per_key_part,
543 (char*) (share->state.rec_per_key_part +
544 (uint) (rec_per_key_part - param->new_rec_per_key_part)),
545 keyinfo->keysegs*sizeof(*rec_per_key_part));
546 continue;
547 }
548 found_keys++;
549 _ma_report_progress(param, key, share->base.keys);
550
551 param->record_checksum=init_checksum;
552
553 bzero((char*) &param->unique_count,sizeof(param->unique_count));
554 bzero((char*) &param->notnull_count,sizeof(param->notnull_count));
555
556 if ((!(param->testflag & T_SILENT)))
557 printf ("- check data record references index: %d\n",key+1);
558 if (keyinfo->flag & (HA_FULLTEXT | HA_SPATIAL))
559 full_text_keys++;
560 if (share->state.key_root[key] == HA_OFFSET_ERROR)
561 {
562 if (share->state.state.records != 0 && !(keyinfo->flag & HA_FULLTEXT))
563 _ma_check_print_error(param, "Key tree %u is empty", key + 1);
564 goto do_stat;
565 }
566 if (_ma_fetch_keypage(&page, info, keyinfo, share->state.key_root[key],
567 PAGECACHE_LOCK_LEFT_UNLOCKED, DFLT_INIT_HITS,
568 info->buff, 0))
569 {
570 report_keypage_fault(param, info, share->state.key_root[key]);
571 if (!(param->testflag & T_INFO))
572 DBUG_RETURN(-1);
573 result= -1;
574 continue;
575 }
576 param->key_file_blocks+=keyinfo->block_length;
577 keys=0;
578 param->keydata=param->totaldata=0;
579 param->key_blocks=0;
580 param->max_level=0;
581 if (chk_index(param, info,keyinfo, &page, &keys, param->key_crc+key,1))
582 DBUG_RETURN(-1);
583 if (!(keyinfo->flag & (HA_FULLTEXT | HA_SPATIAL | HA_RTREE_INDEX)))
584 {
585 if (keys != share->state.state.records)
586 {
587 _ma_check_print_error(param,"Found %s keys of %s",llstr(keys,buff),
588 llstr(share->state.state.records,buff2));
589 if (!(param->testflag & T_INFO))
590 DBUG_RETURN(-1);
591 result= -1;
592 continue;
593 }
594 if ((found_keys - full_text_keys == 1 &&
595 !(share->data_file_type == STATIC_RECORD)) ||
596 (param->testflag & T_DONT_CHECK_CHECKSUM))
597 old_record_checksum= param->record_checksum;
598 else if (old_record_checksum != param->record_checksum)
599 {
600 if (key)
601 _ma_check_print_error(param,
602 "Key %u doesn't point at same records as "
603 "key 1",
604 key+1);
605 else
606 _ma_check_print_error(param,"Key 1 doesn't point at all records");
607 if (!(param->testflag & T_INFO))
608 DBUG_RETURN(-1);
609 result= -1;
610 continue;
611 }
612 }
613 if ((uint) share->base.auto_key -1 == key)
614 {
615 /* Check that auto_increment key is bigger than max key value */
616 ulonglong auto_increment;
617 const HA_KEYSEG *keyseg= share->keyinfo[share->base.auto_key-1].seg;
618 info->lastinx=key;
619 _ma_read_key_record(info, info->rec_buff, 0);
620 auto_increment=
621 ma_retrieve_auto_increment(info->rec_buff + keyseg->start,
622 keyseg->type);
623 if (auto_increment > share->state.auto_increment)
624 {
625 _ma_check_print_warning(param, "Auto-increment value: %s is smaller "
626 "than max used value: %s",
627 llstr(share->state.auto_increment,buff2),
628 llstr(auto_increment, buff));
629 }
630 if (param->testflag & T_AUTO_INC)
631 {
632 set_if_bigger(share->state.auto_increment,
633 auto_increment);
634 set_if_bigger(share->state.auto_increment,
635 param->auto_increment_value);
636 }
637
638 /* Check that there isn't a row with auto_increment = 0 in the table */
639 maria_extra(info,HA_EXTRA_KEYREAD,0);
640 bzero(info->lastkey_buff, keyinfo->seg->length);
641 if (!maria_rkey(info, info->rec_buff, key,
642 info->lastkey_buff,
643 (key_part_map) 1, HA_READ_KEY_EXACT))
644 {
645 /* Don't count this as a real warning, as maria_chk can't correct it */
646 uint save=param->warning_printed;
647 _ma_check_print_warning(param, "Found row where the auto_increment "
648 "column has the value 0");
649 param->warning_printed=save;
650 }
651 maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
652 }
653
654 length=(my_off_t) isam_key_length(info,keyinfo)*keys + param->key_blocks*2;
655 if (param->testflag & T_INFO && param->totaldata != 0L && keys != 0L)
656 printf("Key: %2d: Keyblocks used: %3d%% Packed: %4d%% Max levels: %2d\n",
657 key+1,
658 (int) (my_off_t2double(param->keydata)*100.0/my_off_t2double(param->totaldata)),
659 (int) ((my_off_t2double(length) - my_off_t2double(param->keydata))*100.0/
660 my_off_t2double(length)),
661 param->max_level);
662 all_keydata+=param->keydata; all_totaldata+=param->totaldata; key_totlength+=length;
663
664do_stat:
665 if (param->testflag & T_STATISTICS)
666 maria_update_key_parts(keyinfo, rec_per_key_part, param->unique_count,
667 param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
668 param->notnull_count: NULL,
669 (ulonglong)share->state.state.records);
670 }
671 if (param->testflag & T_INFO)
672 {
673 if (all_totaldata != 0L && found_keys > 0)
674 printf("Total: Keyblocks used: %3d%% Packed: %4d%%\n\n",
675 (int) (my_off_t2double(all_keydata)*100.0/
676 my_off_t2double(all_totaldata)),
677 (int) ((my_off_t2double(key_totlength) -
678 my_off_t2double(all_keydata))*100.0/
679 my_off_t2double(key_totlength)));
680 else if (all_totaldata != 0L && maria_is_any_key_active(share->state.key_map))
681 puts("");
682 }
683 if (param->key_file_blocks != share->state.state.key_file_length &&
684 share->state.key_map == ~(ulonglong) 0)
685 _ma_check_print_warning(param, "Some data are unreferenced in keyfile");
686 if (found_keys != full_text_keys)
687 param->record_checksum=old_record_checksum-init_checksum; /* Remove delete links */
688 else
689 param->record_checksum=0;
690 DBUG_RETURN(result);
691} /* maria_chk_key */
692
693
694
695static int chk_index_down(HA_CHECK *param, MARIA_HA *info,
696 MARIA_KEYDEF *keyinfo,
697 my_off_t page, uchar *buff, ha_rows *keys,
698 ha_checksum *key_checksum, uint level)
699{
700 char llbuff[22],llbuff2[22];
701 MARIA_SHARE *share= info->s;
702 MARIA_PAGE ma_page;
703 DBUG_ENTER("chk_index_down");
704
705 /* Key blocks must lay within the key file length entirely. */
706 if (page + keyinfo->block_length > share->state.state.key_file_length)
707 {
708 /* purecov: begin tested */
709 /* Give it a chance to fit in the real file size. */
710 my_off_t max_length= mysql_file_seek(info->s->kfile.file, 0L, MY_SEEK_END,
711 MYF(MY_THREADSAFE));
712 _ma_check_print_error(param, "Invalid key block position: %s "
713 "key block size: %u file_length: %s",
714 llstr(page, llbuff), keyinfo->block_length,
715 llstr(share->state.state.key_file_length, llbuff2));
716 if (page + keyinfo->block_length > max_length)
717 goto err;
718 /* Fix the remembered key file length. */
719 share->state.state.key_file_length= (max_length &
720 ~ (my_off_t) (keyinfo->block_length -
721 1));
722 /* purecov: end */
723 }
724
725 /* Key blocks must be aligned at block length */
726 if (page & (info->s->block_size -1))
727 {
728 /* purecov: begin tested */
729 _ma_check_print_error(param, "Mis-aligned key block: %s "
730 "key block length: %u",
731 llstr(page, llbuff), info->s->block_size);
732 goto err;
733 /* purecov: end */
734 }
735
736 if (_ma_fetch_keypage(&ma_page, info, keyinfo, page,
737 PAGECACHE_LOCK_LEFT_UNLOCKED,
738 DFLT_INIT_HITS, buff, 0))
739 {
740 report_keypage_fault(param, info, page);
741 goto err;
742 }
743 param->key_file_blocks+=keyinfo->block_length;
744 if (chk_index(param, info, keyinfo, &ma_page, keys, key_checksum,level))
745 goto err;
746
747 DBUG_RETURN(0);
748
749 /* purecov: begin tested */
750err:
751 DBUG_RETURN(1);
752 /* purecov: end */
753}
754
755
756/*
757 "Ignore NULLs" statistics collection method: process first index tuple.
758
759 SYNOPSIS
760 maria_collect_stats_nonulls_first()
761 keyseg IN Array of key part descriptions
762 notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i}
763 tuples that don't contain NULLs)
764 key IN Key values tuple
765
766 DESCRIPTION
767 Process the first index tuple - find out which prefix tuples don't
768 contain NULLs, and update the array of notnull counters accordingly.
769*/
770
771static
772void maria_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull,
773 const uchar *key)
774{
775 size_t first_null, kp;
776 first_null= ha_find_null(keyseg, key) - keyseg;
777 /*
778 All prefix tuples that don't include keypart_{first_null} are not-null
779 tuples (and all others aren't), increment counters for them.
780 */
781 for (kp= 0; kp < first_null; kp++)
782 notnull[kp]++;
783}
784
785
786/*
787 "Ignore NULLs" statistics collection method: process next index tuple.
788
789 SYNOPSIS
790 maria_collect_stats_nonulls_next()
791 keyseg IN Array of key part descriptions
792 notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i}
793 tuples that don't contain NULLs)
794 prev_key IN Previous key values tuple
795 last_key IN Next key values tuple
796
797 DESCRIPTION
798 Process the next index tuple:
799 1. Find out which prefix tuples of last_key don't contain NULLs, and
800 update the array of notnull counters accordingly.
801 2. Find the first keypart number where the prev_key and last_key tuples
802 are different(A), or last_key has NULL value(B), and return it, so the
803 caller can count number of unique tuples for each key prefix. We don't
804 need (B) to be counted, and that is compensated back in
805 maria_update_key_parts().
806
807 RETURN
808 1 + number of first keypart where values differ or last_key tuple has NULL
809*/
810
811static
812int maria_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull,
813 const uchar *prev_key,
814 const uchar *last_key)
815{
816 uint diffs[2];
817 size_t first_null_seg, kp;
818 HA_KEYSEG *seg;
819
820 /*
821 Find the first keypart where values are different or either of them is
822 NULL. We get results in diffs array:
823 diffs[0]= 1 + number of first different keypart
824 diffs[1]=offset: (last_key + diffs[1]) points to first value in
825 last_key that is NULL or different from corresponding
826 value in prev_key.
827 */
828 ha_key_cmp(keyseg, prev_key, last_key, USE_WHOLE_KEY,
829 SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diffs);
830 seg= keyseg + diffs[0] - 1;
831
832 /* Find first NULL in last_key */
833 first_null_seg= ha_find_null(seg, last_key + diffs[1]) - keyseg;
834 for (kp= 0; kp < first_null_seg; kp++)
835 notnull[kp]++;
836
837 /*
838 Return 1+ number of first key part where values differ. Don't care if
839 these were NULLs and not .... We compensate for that in
840 maria_update_key_parts.
841 */
842 return diffs[0];
843}
844
845
846/* Check if index is ok */
847
848static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo,
849 MARIA_PAGE *anc_page, ha_rows *keys,
850 ha_checksum *key_checksum, uint level)
851{
852 int flag;
853 uint comp_flag, page_flag, nod_flag;
854 uchar *temp_buff, *keypos, *old_keypos, *endpos;
855 my_off_t next_page,record;
856 MARIA_SHARE *share= info->s;
857 char llbuff[22];
858 uint diff_pos[2];
859 uchar tmp_key_buff[MARIA_MAX_KEY_BUFF];
860 MARIA_KEY tmp_key;
861 DBUG_ENTER("chk_index");
862 DBUG_DUMP("buff", anc_page->buff, anc_page->size);
863
864 /* TODO: implement appropriate check for RTree keys */
865 if (keyinfo->flag & (HA_SPATIAL | HA_RTREE_INDEX))
866 DBUG_RETURN(0);
867
868 if (!(temp_buff=(uchar*) my_alloca((uint) keyinfo->block_length)))
869 {
870 _ma_check_print_error(param,"Not enough memory for keyblock");
871 DBUG_RETURN(-1);
872 }
873
874 if (keyinfo->flag & HA_NOSAME)
875 {
876 /* Not real duplicates */
877 comp_flag=SEARCH_FIND | SEARCH_UPDATE | SEARCH_INSERT;
878 }
879 else
880 comp_flag=SEARCH_SAME; /* Keys in positionorder */
881
882 page_flag= anc_page->flag;
883 nod_flag= anc_page->node;
884 old_keypos= anc_page->buff + share->keypage_header;
885 keypos= old_keypos + nod_flag;
886 endpos= anc_page->buff + anc_page->size;
887
888 param->keydata+= anc_page->size;
889 param->totaldata+= keyinfo->block_length; /* INFO */
890 param->key_blocks++;
891 if (level > param->max_level)
892 param->max_level=level;
893
894 if (_ma_get_keynr(share, anc_page->buff) !=
895 (uint) (keyinfo - share->keyinfo))
896 _ma_check_print_error(param, "Page at %s is not marked for index %u",
897 llstr(anc_page->pos, llbuff),
898 (uint) (keyinfo - share->keyinfo));
899 if ((page_flag & KEYPAGE_FLAG_HAS_TRANSID) &&
900 !share->base.born_transactional)
901 {
902 _ma_check_print_error(param,
903 "Page at %s is marked with HAS_TRANSID even if "
904 "table is not transactional",
905 llstr(anc_page->pos, llbuff));
906 }
907
908 if (anc_page->size > share->max_index_block_size)
909 {
910 _ma_check_print_error(param,
911 "Page at %s has impossible (too big) pagelength",
912 llstr(anc_page->pos, llbuff));
913 goto err;
914 }
915
916 info->last_key.keyinfo= tmp_key.keyinfo= keyinfo;
917 info->lastinx= ~0; /* Safety */
918 tmp_key.data= tmp_key_buff;
919 for ( ;; )
920 {
921 if (nod_flag)
922 {
923 if (_ma_killed_ptr(param))
924 goto err;
925 next_page= _ma_kpos(nod_flag,keypos);
926 if (chk_index_down(param,info,keyinfo,next_page,
927 temp_buff,keys,key_checksum,level+1))
928 {
929 DBUG_DUMP("page_data", old_keypos, (uint) (keypos - old_keypos));
930 goto err;
931 }
932 }
933 old_keypos=keypos;
934 if (keypos >= endpos ||
935 !(*keyinfo->get_key)(&tmp_key, page_flag, nod_flag, &keypos))
936 break;
937 if (keypos > endpos)
938 {
939 _ma_check_print_error(param,
940 "Page length and length of keys don't match at "
941 "page: %s",
942 llstr(anc_page->pos,llbuff));
943 goto err;
944 }
945 if (share->data_file_type == BLOCK_RECORD &&
946 !(page_flag & KEYPAGE_FLAG_HAS_TRANSID) &&
947 key_has_transid(tmp_key.data + tmp_key.data_length +
948 share->rec_reflength-1))
949 {
950 _ma_check_print_error(param,
951 "Found key marked for transid on page that is not "
952 "marked for transid at: %s",
953 llstr(anc_page->pos,llbuff));
954 goto err;
955 }
956
957 if ((*keys)++ &&
958 (flag=ha_key_cmp(keyinfo->seg, info->last_key.data, tmp_key.data,
959 tmp_key.data_length + tmp_key.ref_length,
960 (comp_flag | SEARCH_INSERT | (tmp_key.flag >> 1) |
961 info->last_key.flag), diff_pos)) >=0)
962 {
963 DBUG_DUMP_KEY("old", &info->last_key);
964 DBUG_DUMP_KEY("new", &tmp_key);
965 DBUG_DUMP("new_in_page", old_keypos, (uint) (keypos-old_keypos));
966
967 if ((comp_flag & SEARCH_FIND) && flag == 0)
968 _ma_check_print_error(param,"Found duplicated key at page %s",
969 llstr(anc_page->pos,llbuff));
970 else
971 _ma_check_print_error(param,"Key in wrong position at page %s",
972 llstr(anc_page->pos,llbuff));
973 goto err;
974 }
975
976 if (param->testflag & T_STATISTICS)
977 {
978 if (*keys != 1L) /* not first_key */
979 {
980 if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
981 ha_key_cmp(keyinfo->seg, info->last_key.data,
982 tmp_key.data, tmp_key.data_length,
983 SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
984 diff_pos);
985 else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
986 {
987 diff_pos[0]= maria_collect_stats_nonulls_next(keyinfo->seg,
988 param->notnull_count,
989 info->last_key.data,
990 tmp_key.data);
991 }
992 param->unique_count[diff_pos[0]-1]++;
993 }
994 else
995 {
996 if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
997 maria_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count,
998 tmp_key.data);
999 }
1000 }
1001 _ma_copy_key(&info->last_key, &tmp_key);
1002 (*key_checksum)+= maria_byte_checksum(tmp_key.data, tmp_key.data_length);
1003 record= _ma_row_pos_from_key(&tmp_key);
1004
1005 if (keyinfo->flag & HA_FULLTEXT) /* special handling for ft2 */
1006 {
1007 uint off;
1008 int subkeys;
1009 get_key_full_length_rdonly(off, tmp_key.data);
1010 subkeys= ft_sintXkorr(tmp_key.data + off);
1011 if (subkeys < 0)
1012 {
1013 ha_rows tmp_keys=0;
1014 if (chk_index_down(param,info,&share->ft2_keyinfo,record,
1015 temp_buff,&tmp_keys,key_checksum,1))
1016 goto err;
1017 if (tmp_keys + subkeys)
1018 {
1019 _ma_check_print_error(param,
1020 "Number of words in the 2nd level tree "
1021 "does not match the number in the header. "
1022 "Parent word in on the page %s, offset %u",
1023 llstr(anc_page->pos,llbuff),
1024 (uint) (old_keypos - anc_page->buff));
1025 goto err;
1026 }
1027 (*keys)+=tmp_keys-1;
1028 continue;
1029 }
1030 /* fall through */
1031 }
1032 if ((share->data_file_type != BLOCK_RECORD &&
1033 share->data_file_type != NO_RECORD &&
1034 record >= share->state.state.data_file_length) ||
1035 (share->data_file_type == BLOCK_RECORD &&
1036 ma_recordpos_to_page(record) * share->base.min_block_length >=
1037 share->state.state.data_file_length) ||
1038 (share->data_file_type == NO_RECORD && record != 0))
1039 {
1040#ifndef DBUG_OFF
1041 char llbuff2[22], llbuff3[22];
1042#endif
1043 _ma_check_print_error(param,
1044 "Found key at page %s that points to record "
1045 "outside datafile",
1046 llstr(anc_page->pos,llbuff));
1047 DBUG_PRINT("test",("page: %s record: %s filelength: %s",
1048 llstr(anc_page->pos,llbuff),llstr(record,llbuff2),
1049 llstr(share->state.state.data_file_length,llbuff3)));
1050 DBUG_DUMP_KEY("key", &tmp_key);
1051 DBUG_DUMP("new_in_page", old_keypos, (uint) (keypos-old_keypos));
1052 goto err;
1053 }
1054 param->record_checksum+= (ha_checksum) record;
1055 }
1056 if (keypos != endpos)
1057 {
1058 _ma_check_print_error(param,
1059 "Keyblock size at page %s is not correct. "
1060 "Block length: %u key length: %u",
1061 llstr(anc_page->pos, llbuff), anc_page->size,
1062 (uint) (keypos - anc_page->buff));
1063 goto err;
1064 }
1065 my_afree(temp_buff);
1066 DBUG_RETURN(0);
1067 err:
1068 my_afree(temp_buff);
1069 DBUG_RETURN(1);
1070} /* chk_index */
1071
1072
1073 /* Calculate a checksum of 1+2+3+4...N = N*(N+1)/2 without overflow */
1074
1075static ha_checksum calc_checksum(ha_rows count)
1076{
1077 ulonglong sum,a,b;
1078 DBUG_ENTER("calc_checksum");
1079
1080 sum=0;
1081 a=count; b=count+1;
1082 if (a & 1)
1083 b>>=1;
1084 else
1085 a>>=1;
1086 while (b)
1087 {
1088 if (b & 1)
1089 sum+=a;
1090 a<<=1; b>>=1;
1091 }
1092 DBUG_PRINT("exit",("sum: %lx",(ulong) sum));
1093 DBUG_RETURN((ha_checksum) sum);
1094} /* calc_checksum */
1095
1096
1097 /* Calc length of key in normal isam */
1098
1099static uint isam_key_length(MARIA_HA *info, register MARIA_KEYDEF *keyinfo)
1100{
1101 uint length;
1102 HA_KEYSEG *keyseg;
1103 DBUG_ENTER("isam_key_length");
1104
1105 length= info->s->rec_reflength;
1106 for (keyseg=keyinfo->seg ; keyseg->type ; keyseg++)
1107 length+= keyseg->length;
1108
1109 DBUG_PRINT("exit",("length: %d",length));
1110 DBUG_RETURN(length);
1111} /* key_length */
1112
1113
1114
1115static void record_pos_to_txt(MARIA_HA *info, my_off_t recpos,
1116 char *buff)
1117{
1118 if (info->s->data_file_type != BLOCK_RECORD)
1119 llstr(recpos, buff);
1120 else
1121 {
1122 my_off_t page= ma_recordpos_to_page(recpos);
1123 uint row= ma_recordpos_to_dir_entry(recpos);
1124 char *end= longlong10_to_str(page, buff, 10);
1125 *(end++)= ':';
1126 longlong10_to_str(row, end, 10);
1127 }
1128}
1129
1130
1131/*
1132 Check that keys in records exist in index tree
1133
1134 SYNOPSIS
1135 check_keys_in_record()
1136 param Check paramenter
1137 info Maria handler
1138 extend Type of check (extended or normal)
1139 start_recpos Position to row
1140 record Record buffer
1141
1142 NOTES
1143 This function also calculates record checksum & number of rows
1144*/
1145
1146static int check_keys_in_record(HA_CHECK *param, MARIA_HA *info, int extend,
1147 my_off_t start_recpos, uchar *record)
1148{
1149 MARIA_SHARE *share= info->s;
1150 MARIA_KEYDEF *keyinfo;
1151 char llbuff[22+4];
1152 uint keynr;
1153
1154 param->tmp_record_checksum+= (ha_checksum) start_recpos;
1155 param->records++;
1156 if (param->records % WRITE_COUNT == 0)
1157 {
1158 if (param->testflag & T_WRITE_LOOP)
1159 {
1160 printf("%s\r", llstr(param->records, llbuff));
1161 fflush(stdout);
1162 }
1163 _ma_report_progress(param, param->records, share->state.state.records);
1164 }
1165
1166 /* Check if keys match the record */
1167 for (keynr=0, keyinfo= share->keyinfo; keynr < share->base.keys;
1168 keynr++, keyinfo++)
1169 {
1170 if (maria_is_key_active(share->state.key_map, keynr))
1171 {
1172 MARIA_KEY key;
1173 if (!(keyinfo->flag & HA_FULLTEXT))
1174 {
1175 (*keyinfo->make_key)(info, &key, keynr, info->lastkey_buff, record,
1176 start_recpos, 0);
1177 info->last_key.keyinfo= key.keyinfo;
1178 if (extend)
1179 {
1180 /* We don't need to lock the key tree here as we don't allow
1181 concurrent threads when running maria_chk
1182 */
1183 int search_result=
1184#ifdef HAVE_RTREE_KEYS
1185 (keyinfo->flag & (HA_SPATIAL | HA_RTREE_INDEX)) ?
1186 maria_rtree_find_first(info, &key, MBR_EQUAL | MBR_DATA) :
1187#endif
1188 _ma_search(info, &key, SEARCH_SAME, share->state.key_root[keynr]);
1189 if (search_result)
1190 {
1191 record_pos_to_txt(info, start_recpos, llbuff);
1192 _ma_check_print_error(param,
1193 "Record at: %14s "
1194 "Can't find key for index: %2d",
1195 llbuff, keynr+1);
1196 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1197 return -1;
1198 }
1199 }
1200 else
1201 param->tmp_key_crc[keynr]+=
1202 maria_byte_checksum(key.data, key.data_length);
1203 }
1204 }
1205 }
1206 return 0;
1207}
1208
1209
1210/*
1211 Functions to loop through all rows and check if they are ok
1212
1213 NOTES
1214 One function for each record format
1215
1216 RESULT
1217 0 ok
1218 -1 Interrupted by user
1219 1 Error
1220*/
1221
1222static int check_static_record(HA_CHECK *param, MARIA_HA *info, int extend,
1223 uchar *record)
1224{
1225 MARIA_SHARE *share= info->s;
1226 my_off_t start_recpos, pos;
1227 char llbuff[22];
1228
1229 pos= 0;
1230 while (pos < share->state.state.data_file_length)
1231 {
1232 if (_ma_killed_ptr(param))
1233 return -1;
1234 if (my_b_read(&param->read_cache, record,
1235 share->base.pack_reclength))
1236 {
1237 _ma_check_print_error(param,
1238 "got error: %d when reading datafile at position: "
1239 "%s",
1240 my_errno, llstr(pos, llbuff));
1241 return 1;
1242 }
1243 start_recpos= pos;
1244 pos+= share->base.pack_reclength;
1245 param->splits++;
1246 if (*record == '\0')
1247 {
1248 param->del_blocks++;
1249 param->del_length+= share->base.pack_reclength;
1250 continue; /* Record removed */
1251 }
1252 param->glob_crc+= _ma_static_checksum(info,record);
1253 param->used+= share->base.pack_reclength;
1254 if (check_keys_in_record(param, info, extend, start_recpos, record))
1255 return 1;
1256 }
1257 return 0;
1258}
1259
1260
1261static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend,
1262 uchar *record)
1263{
1264 MARIA_BLOCK_INFO block_info;
1265 MARIA_SHARE *share= info->s;
1266 my_off_t UNINIT_VAR(start_recpos), start_block, pos;
1267 uchar *UNINIT_VAR(to);
1268 ulong UNINIT_VAR(left_length);
1269 uint b_type;
1270 char llbuff[22],llbuff2[22],llbuff3[22];
1271 DBUG_ENTER("check_dynamic_record");
1272
1273 pos= 0;
1274 while (pos < share->state.state.data_file_length)
1275 {
1276 my_bool got_error= 0;
1277 int flag;
1278 if (_ma_killed_ptr(param))
1279 DBUG_RETURN(-1);
1280
1281 flag= block_info.second_read=0;
1282 block_info.next_filepos=pos;
1283 do
1284 {
1285 if (_ma_read_cache(info, &param->read_cache, block_info.header,
1286 (start_block=block_info.next_filepos),
1287 sizeof(block_info.header),
1288 (flag ? 0 : READING_NEXT) | READING_HEADER))
1289 {
1290 _ma_check_print_error(param,
1291 "got error: %d when reading datafile at "
1292 "position: %s",
1293 my_errno, llstr(start_block, llbuff));
1294 DBUG_RETURN(1);
1295 }
1296
1297 if (start_block & (MARIA_DYN_ALIGN_SIZE-1))
1298 {
1299 _ma_check_print_error(param,"Wrong aligned block at %s",
1300 llstr(start_block,llbuff));
1301 DBUG_RETURN(1);
1302 }
1303 b_type= _ma_get_block_info(info, &block_info,-1,start_block);
1304 if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
1305 BLOCK_FATAL_ERROR))
1306 {
1307 if (b_type & BLOCK_SYNC_ERROR)
1308 {
1309 if (flag)
1310 {
1311 _ma_check_print_error(param,"Unexpected byte: %d at link: %s",
1312 (int) block_info.header[0],
1313 llstr(start_block,llbuff));
1314 DBUG_RETURN(1);
1315 }
1316 pos=block_info.filepos+block_info.block_len;
1317 goto next;
1318 }
1319 if (b_type & BLOCK_DELETED)
1320 {
1321 if (block_info.block_len < share->base.min_block_length)
1322 {
1323 _ma_check_print_error(param,
1324 "Deleted block with impossible length %lu "
1325 "at %s",
1326 block_info.block_len,llstr(pos,llbuff));
1327 DBUG_RETURN(1);
1328 }
1329 if ((block_info.next_filepos != HA_OFFSET_ERROR &&
1330 block_info.next_filepos >= share->state.state.data_file_length) ||
1331 (block_info.prev_filepos != HA_OFFSET_ERROR &&
1332 block_info.prev_filepos >= share->state.state.data_file_length))
1333 {
1334 _ma_check_print_error(param,"Delete link points outside datafile "
1335 "at %s",
1336 llstr(pos,llbuff));
1337 DBUG_RETURN(1);
1338 }
1339 param->del_blocks++;
1340 param->del_length+= block_info.block_len;
1341 param->splits++;
1342 pos= block_info.filepos+block_info.block_len;
1343 goto next;
1344 }
1345 _ma_check_print_error(param,"Wrong bytesec: %d-%d-%d at linkstart: %s",
1346 block_info.header[0],block_info.header[1],
1347 block_info.header[2],
1348 llstr(start_block,llbuff));
1349 DBUG_RETURN(1);
1350 }
1351 if (share->state.state.data_file_length < block_info.filepos+
1352 block_info.block_len)
1353 {
1354 _ma_check_print_error(param,
1355 "Recordlink that points outside datafile at %s",
1356 llstr(pos,llbuff));
1357 got_error=1;
1358 break;
1359 }
1360 param->splits++;
1361 if (!flag++) /* First block */
1362 {
1363 start_recpos=pos;
1364 pos=block_info.filepos+block_info.block_len;
1365 if (block_info.rec_len > (uint) share->base.max_pack_length)
1366 {
1367 my_errno= HA_ERR_WRONG_IN_RECORD;
1368 _ma_check_print_error(param,"Found too long record (%lu) at %s",
1369 (ulong) block_info.rec_len,
1370 llstr(start_recpos,llbuff));
1371 got_error=1;
1372 break;
1373 }
1374 if (share->base.blobs)
1375 {
1376 if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
1377 block_info.rec_len +
1378 share->base.extra_rec_buff_size))
1379
1380 {
1381 _ma_check_print_error(param,
1382 "Not enough memory (%lu) for blob at %s",
1383 (ulong) block_info.rec_len,
1384 llstr(start_recpos,llbuff));
1385 got_error=1;
1386 break;
1387 }
1388 }
1389 to= info->rec_buff;
1390 left_length= block_info.rec_len;
1391 }
1392 if (left_length < block_info.data_len)
1393 {
1394 _ma_check_print_error(param,"Found too long record (%lu) at %s",
1395 (ulong) block_info.data_len,
1396 llstr(start_recpos,llbuff));
1397 got_error=1;
1398 break;
1399 }
1400 if (_ma_read_cache(info, &param->read_cache, to, block_info.filepos,
1401 (uint) block_info.data_len,
1402 flag == 1 ? READING_NEXT : 0))
1403 {
1404 _ma_check_print_error(param,
1405 "got error: %d when reading datafile at "
1406 "position: %s", my_errno,
1407 llstr(block_info.filepos, llbuff));
1408
1409 DBUG_RETURN(1);
1410 }
1411 to+=block_info.data_len;
1412 param->link_used+= block_info.filepos-start_block;
1413 param->used+= block_info.filepos - start_block + block_info.data_len;
1414 param->empty+= block_info.block_len-block_info.data_len;
1415 left_length-= block_info.data_len;
1416 if (left_length)
1417 {
1418 if (b_type & BLOCK_LAST)
1419 {
1420 _ma_check_print_error(param,
1421 "Wrong record length %s of %s at %s",
1422 llstr(block_info.rec_len-left_length,llbuff),
1423 llstr(block_info.rec_len, llbuff2),
1424 llstr(start_recpos,llbuff3));
1425 got_error=1;
1426 break;
1427 }
1428 if (share->state.state.data_file_length < block_info.next_filepos)
1429 {
1430 _ma_check_print_error(param,
1431 "Found next-recordlink that points outside "
1432 "datafile at %s",
1433 llstr(block_info.filepos,llbuff));
1434 got_error=1;
1435 break;
1436 }
1437 }
1438 } while (left_length);
1439
1440 if (! got_error)
1441 {
1442 if (_ma_rec_unpack(info,record,info->rec_buff,block_info.rec_len) ==
1443 MY_FILE_ERROR)
1444 {
1445 _ma_check_print_error(param,"Found wrong record at %s",
1446 llstr(start_recpos,llbuff));
1447 got_error=1;
1448 }
1449 else
1450 {
1451 ha_checksum checksum= 0;
1452 if (share->calc_checksum)
1453 checksum= (*share->calc_checksum)(info, record);
1454
1455 if (param->testflag & (T_EXTEND | T_MEDIUM | T_VERBOSE))
1456 {
1457 if (_ma_rec_check(info,record, info->rec_buff,block_info.rec_len,
1458 MY_TEST(share->calc_checksum), checksum))
1459 {
1460 _ma_check_print_error(param,"Found wrong packed record at %s",
1461 llstr(start_recpos,llbuff));
1462 got_error= 1;
1463 }
1464 }
1465 param->glob_crc+= checksum;
1466 }
1467
1468 if (! got_error)
1469 {
1470 if (check_keys_in_record(param, info, extend, start_recpos, record))
1471 DBUG_RETURN(1);
1472 }
1473 else
1474 {
1475 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1476 DBUG_RETURN(1);
1477 }
1478 }
1479 else if (!flag)
1480 pos= block_info.filepos+block_info.block_len;
1481next:;
1482 }
1483 DBUG_RETURN(0);
1484}
1485
1486
1487static int check_compressed_record(HA_CHECK *param, MARIA_HA *info, int extend,
1488 uchar *record)
1489{
1490 MARIA_BLOCK_INFO block_info;
1491 MARIA_SHARE *share= info->s;
1492 my_off_t start_recpos, pos;
1493 char llbuff[22];
1494 my_bool got_error= 0;
1495 DBUG_ENTER("check_compressed_record");
1496
1497 pos= share->pack.header_length; /* Skip header */
1498 while (pos < share->state.state.data_file_length)
1499 {
1500 if (_ma_killed_ptr(param))
1501 DBUG_RETURN(-1);
1502
1503 if (_ma_read_cache(info, &param->read_cache, block_info.header, pos,
1504 share->pack.ref_length, READING_NEXT))
1505 {
1506 _ma_check_print_error(param,
1507 "got error: %d when reading datafile at position: "
1508 "%s",
1509 my_errno, llstr(pos, llbuff));
1510 DBUG_RETURN(1);
1511 }
1512
1513 start_recpos= pos;
1514 param->splits++;
1515 _ma_pack_get_block_info(info, &info->bit_buff, &block_info,
1516 &info->rec_buff, &info->rec_buff_size, -1,
1517 start_recpos);
1518 pos=block_info.filepos+block_info.rec_len;
1519 if (block_info.rec_len < (uint) share->min_pack_length ||
1520 block_info.rec_len > (uint) share->max_pack_length)
1521 {
1522 _ma_check_print_error(param,
1523 "Found block with wrong recordlength: %lu at %s",
1524 block_info.rec_len, llstr(start_recpos,llbuff));
1525 got_error=1;
1526 goto end;
1527 }
1528 if (_ma_read_cache(info, &param->read_cache, info->rec_buff,
1529 block_info.filepos, block_info.rec_len, READING_NEXT))
1530 {
1531 _ma_check_print_error(param,
1532 "got error: %d when reading datafile at position: "
1533 "%s",
1534 my_errno, llstr(block_info.filepos, llbuff));
1535 DBUG_RETURN(1);
1536 }
1537 if (_ma_pack_rec_unpack(info, &info->bit_buff, record,
1538 info->rec_buff, block_info.rec_len))
1539 {
1540 _ma_check_print_error(param,"Found wrong record at %s",
1541 llstr(start_recpos,llbuff));
1542 got_error=1;
1543 goto end;
1544 }
1545 param->glob_crc+= (*share->calc_checksum)(info,record);
1546 param->link_used+= (block_info.filepos - start_recpos);
1547 param->used+= (pos-start_recpos);
1548
1549end:
1550 if (! got_error)
1551 {
1552 if (check_keys_in_record(param, info, extend, start_recpos, record))
1553 DBUG_RETURN(1);
1554 }
1555 else
1556 {
1557 got_error= 0; /* Reset for next loop */
1558 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1559 DBUG_RETURN(1);
1560 }
1561 }
1562 DBUG_RETURN(0);
1563}
1564
1565
1566/*
1567 Check if layout on head or tail page is ok
1568
1569 NOTES
1570 This is for rows-in-block format.
1571*/
1572
1573static int check_page_layout(HA_CHECK *param, MARIA_HA *info,
1574 my_off_t page_pos, uchar *page,
1575 uint row_count, uint head_empty,
1576 uint *real_rows_found, uint *free_slots_found)
1577{
1578 uint empty, last_row_end, row, first_dir_entry, free_entry, block_size;
1579 uint free_entries, prev_free_entry;
1580 uchar *dir_entry;
1581 char llbuff[22];
1582 my_bool error_in_free_list= 0;
1583 DBUG_ENTER("check_page_layout");
1584
1585 block_size= info->s->block_size;
1586 empty= 0;
1587 last_row_end= PAGE_HEADER_SIZE(info->s);
1588 *real_rows_found= 0;
1589
1590 /* Check free directory list */
1591 free_entry= (uint) page[DIR_FREE_OFFSET];
1592 free_entries= 0;
1593 prev_free_entry= END_OF_DIR_FREE_LIST;
1594 while (free_entry != END_OF_DIR_FREE_LIST)
1595 {
1596 uchar *dir;
1597 if (free_entry > row_count)
1598 {
1599 _ma_check_print_error(param,
1600 "Page %9s: Directory free entry points outside "
1601 "directory",
1602 llstr(page_pos, llbuff));
1603 error_in_free_list= 1;
1604 break;
1605 }
1606 dir= dir_entry_pos(page, block_size, free_entry);
1607 if (uint2korr(dir) != 0)
1608 {
1609 _ma_check_print_error(param,
1610 "Page %9s: Directory free entry points to "
1611 "not deleted entry",
1612 llstr(page_pos, llbuff));
1613 error_in_free_list= 1;
1614 break;
1615 }
1616 if (dir[2] != prev_free_entry)
1617 {
1618 _ma_check_print_error(param,
1619 "Page %9s: Directory free list back pointer "
1620 "points to wrong entry",
1621 llstr(page_pos, llbuff));
1622 error_in_free_list= 1;
1623 break;
1624 }
1625 prev_free_entry= free_entry;
1626 free_entry= dir[3];
1627 free_entries++;
1628 }
1629 *free_slots_found= free_entries;
1630
1631 /* Check directry */
1632 dir_entry= page+ block_size - PAGE_SUFFIX_SIZE;
1633 first_dir_entry= (block_size - row_count * DIR_ENTRY_SIZE -
1634 PAGE_SUFFIX_SIZE);
1635 for (row= 0 ; row < row_count ; row++)
1636 {
1637 uint pos, length;
1638 dir_entry-= DIR_ENTRY_SIZE;
1639 pos= uint2korr(dir_entry);
1640 if (!pos)
1641 {
1642 free_entries--;
1643 if (row == row_count -1)
1644 {
1645 _ma_check_print_error(param,
1646 "Page %9s: First entry in directory is 0",
1647 llstr(page_pos, llbuff));
1648 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1649 DBUG_RETURN(1);
1650 }
1651 continue; /* Deleted row */
1652 }
1653 (*real_rows_found)++;
1654 length= uint2korr(dir_entry+2);
1655 param->used+= length;
1656 if (pos < last_row_end)
1657 {
1658 _ma_check_print_error(param,
1659 "Page %9s: Row %3u overlapps with previous row",
1660 llstr(page_pos, llbuff), row);
1661 DBUG_RETURN(1);
1662 }
1663 empty+= (pos - last_row_end);
1664 last_row_end= pos + length;
1665 if (last_row_end > first_dir_entry)
1666 {
1667 _ma_check_print_error(param,
1668 "Page %9s: Row %3u overlapps with directory",
1669 llstr(page_pos, llbuff), row);
1670 DBUG_RETURN(1);
1671 }
1672 }
1673 empty+= (first_dir_entry - last_row_end);
1674
1675 if (empty != head_empty)
1676 {
1677 _ma_check_print_error(param,
1678 "Page %9s: Wrong empty size. Stored: %5u "
1679 "Actual: %5u",
1680 llstr(page_pos, llbuff), head_empty, empty);
1681 param->err_count++;
1682 }
1683 if (free_entries != 0 && !error_in_free_list)
1684 {
1685 _ma_check_print_error(param,
1686 "Page %9s: Directory free link don't include "
1687 "all free entries",
1688 llstr(page_pos, llbuff));
1689 param->err_count++;
1690 }
1691 DBUG_RETURN(param->err_count &&
1692 (param->err_count >= MAXERR || !(param->testflag & T_VERBOSE)));
1693}
1694
1695
1696/*
1697 Check all rows on head page
1698
1699 NOTES
1700 This is for rows-in-block format.
1701
1702 Before this, we have already called check_page_layout(), so
1703 we know the block is logicaly correct (even if the rows may not be that)
1704
1705 RETURN
1706 0 ok
1707 1 error
1708*/
1709
1710
1711static my_bool check_head_page(HA_CHECK *param, MARIA_HA *info, uchar *record,
1712 int extend, my_off_t page_pos, uchar *page_buff,
1713 uint row_count)
1714{
1715 MARIA_SHARE *share= info->s;
1716 uchar *dir_entry;
1717 uint row;
1718 char llbuff[22], llbuff2[22];
1719 ulonglong page= page_pos / share->block_size;
1720 DBUG_ENTER("check_head_page");
1721
1722 dir_entry= page_buff+ share->block_size - PAGE_SUFFIX_SIZE;
1723 for (row= 0 ; row < row_count ; row++)
1724 {
1725 uint pos, length, flag;
1726 dir_entry-= DIR_ENTRY_SIZE;
1727 pos= uint2korr(dir_entry);
1728 if (!pos)
1729 continue;
1730 length= uint2korr(dir_entry+2);
1731 if (length < share->base.min_block_length)
1732 {
1733 _ma_check_print_error(param,
1734 "Page %9s: Row %3u is too short "
1735 "(%d of min %d bytes)",
1736 llstr(page, llbuff), row, length,
1737 (uint) share->base.min_block_length);
1738 DBUG_RETURN(1);
1739 }
1740 flag= (uint) (uchar) page_buff[pos];
1741 if (flag & ~(ROW_FLAG_ALL))
1742 _ma_check_print_error(param,
1743 "Page %9s: Row %3u has wrong flag: %u",
1744 llstr(page, llbuff), row, flag);
1745
1746 DBUG_PRINT("info", ("rowid: %s page: %lu row: %u",
1747 llstr(ma_recordpos(page, row), llbuff),
1748 (ulong) page, row));
1749 info->cur_row.trid= 0;
1750 if (_ma_read_block_record2(info, record, page_buff+pos,
1751 page_buff+pos+length))
1752 {
1753 _ma_check_print_error(param,
1754 "Page %9s: Row %3d is crashed",
1755 llstr(page, llbuff), row);
1756 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1757 DBUG_RETURN(1);
1758 continue;
1759 }
1760 set_if_bigger(param->max_found_trid, info->cur_row.trid);
1761 if (info->cur_row.trid > param->max_trid)
1762 _ma_check_print_not_visible_error(param, info->cur_row.trid);
1763
1764 if (share->calc_checksum)
1765 {
1766 ha_checksum checksum= (*share->calc_checksum)(info, record);
1767 if (info->cur_row.checksum != (checksum & 255))
1768 _ma_check_print_error(param, "Page %9s: Row %3d has wrong checksum",
1769 llstr(page, llbuff), row);
1770 param->glob_crc+= checksum;
1771 }
1772 if (info->cur_row.extents_count)
1773 {
1774 uchar *extents= info->cur_row.extents;
1775 uint i;
1776 /* Check that bitmap has the right marker for the found extents */
1777 for (i= 0 ; i < info->cur_row.extents_count ; i++)
1778 {
1779 pgcache_page_no_t extent_page;
1780 uint page_count, page_type;
1781 extent_page= uint5korr(extents);
1782 page_count= uint2korr(extents+5) & ~START_EXTENT_BIT;
1783 extents+= ROW_EXTENT_SIZE;
1784 page_type= BLOB_PAGE;
1785 if (page_count & TAIL_BIT)
1786 {
1787 page_count= 1;
1788 page_type= TAIL_PAGE;
1789 }
1790 /*
1791 TODO OPTIMIZE:
1792 Check the whole extent with one test and only do the loop if
1793 something is wrong (for exact error reporting)
1794 */
1795 for ( ; page_count--; extent_page++)
1796 {
1797 uint bitmap_pattern;
1798 if (_ma_check_if_right_bitmap_type(info, page_type, extent_page,
1799 &bitmap_pattern))
1800 {
1801 _ma_check_print_error(param,
1802 "Page %9s: Row: %3d has an extent with "
1803 "wrong information in bitmap: "
1804 "Page: %9s Page_type: %d Bitmap: %d",
1805 llstr(page, llbuff), row,
1806 llstr(extent_page, llbuff2),
1807 page_type, bitmap_pattern);
1808 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1809 DBUG_RETURN(1);
1810 }
1811 }
1812 }
1813 }
1814 param->full_page_count+= info->cur_row.full_page_count;
1815 param->tail_count+= info->cur_row.tail_count;
1816 if (check_keys_in_record(param, info, extend,
1817 ma_recordpos(page, row), record))
1818 DBUG_RETURN(1);
1819 }
1820 DBUG_RETURN(0);
1821}
1822
1823
1824/*
1825 Check if rows-in-block data file is consistent
1826*/
1827
1828static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
1829 uchar *record)
1830{
1831 MARIA_SHARE *share= info->s;
1832 my_off_t pos;
1833 pgcache_page_no_t page;
1834 uchar *page_buff, *bitmap_buff, *data;
1835 char llbuff[22], llbuff2[22];
1836 uint block_size= share->block_size;
1837 ha_rows full_page_count, tail_count;
1838 my_bool UNINIT_VAR(full_dir), now_transactional;
1839 uint offset_page, offset, free_count;
1840
1841 if (_ma_scan_init_block_record(info))
1842 {
1843 _ma_check_print_error(param, "got error %d when initializing scan",
1844 my_errno);
1845 return 1;
1846 }
1847
1848 now_transactional= info->s->now_transactional;
1849 info->s->now_transactional= 0; /* Don't log changes */
1850
1851 bitmap_buff= info->scan.bitmap_buff;
1852 page_buff= info->scan.page_buff;
1853 full_page_count= tail_count= 0;
1854 param->full_page_count= param->tail_count= 0;
1855 param->used= param->link_used= 0;
1856 param->splits= share->state.state.data_file_length / block_size;
1857
1858 for (pos= 0, page= 0;
1859 pos < share->state.state.data_file_length;
1860 pos+= block_size, page++)
1861 {
1862 uint UNINIT_VAR(row_count), real_row_count, UNINIT_VAR(empty_space),
1863 page_type, bitmap_pattern;
1864 uint bitmap_for_page;
1865
1866 if (_ma_killed_ptr(param))
1867 {
1868 _ma_scan_end_block_record(info);
1869 info->s->now_transactional= now_transactional;
1870 return -1; /* Interrupted */
1871 }
1872 if ((page % share->bitmap.pages_covered) == 0)
1873 {
1874 /* Bitmap page */
1875 if (pagecache_read(share->pagecache,
1876 &info->s->bitmap.file,
1877 page, 1,
1878 bitmap_buff,
1879 PAGECACHE_PLAIN_PAGE,
1880 PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == 0)
1881 {
1882 _ma_check_print_error(param,
1883 "Page %9s: Got error: %d when reading datafile",
1884 llstr(page, llbuff), my_errno);
1885 goto err;
1886 }
1887 param->used+= block_size;
1888 param->link_used+= block_size;
1889 if (param->verbose > 2)
1890 print_bitmap_description(share, page, bitmap_buff);
1891 continue;
1892 }
1893 /* Skip pages marked as empty in bitmap */
1894 offset_page= (uint) ((page % share->bitmap.pages_covered) -1) * 3;
1895 offset= offset_page & 7;
1896 data= bitmap_buff + offset_page / 8;
1897 bitmap_pattern= uint2korr(data);
1898 if (!(bitmap_for_page= ((bitmap_pattern >> offset) & 7)))
1899 {
1900 param->empty+= block_size;
1901 param->del_blocks++;
1902 continue;
1903 }
1904
1905 if (pagecache_read(share->pagecache,
1906 &info->dfile,
1907 page, 1,
1908 page_buff,
1909 share->page_type,
1910 PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == 0)
1911 {
1912 _ma_check_print_error(param,
1913 "Page %9s: Got error: %d when reading datafile",
1914 llstr(page, llbuff), my_errno);
1915 goto err;
1916 }
1917 page_type= page_buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK;
1918 if (page_type == UNALLOCATED_PAGE || page_type >= MAX_PAGE_TYPE)
1919 {
1920 _ma_check_print_error(param,
1921 "Page: %9s Found wrong page type %d. Bitmap: %d '%s'",
1922 llstr(page, llbuff), page_type,
1923 bitmap_for_page, bits_to_txt[bitmap_for_page]);
1924 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1925 goto err;
1926 continue;
1927 }
1928 switch ((enum en_page_type) page_type) {
1929 case UNALLOCATED_PAGE:
1930 case MAX_PAGE_TYPE:
1931 default:
1932 DBUG_ASSERT(0); /* Impossible */
1933 break;
1934 case HEAD_PAGE:
1935 row_count= page_buff[DIR_COUNT_OFFSET];
1936 empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET);
1937 param->used+= block_size - empty_space;
1938 param->link_used+= (PAGE_HEADER_SIZE(info->s) + PAGE_SUFFIX_SIZE +
1939 row_count * DIR_ENTRY_SIZE);
1940 if (empty_space < share->bitmap.sizes[3])
1941 param->lost+= empty_space;
1942 if (check_page_layout(param, info, pos, page_buff, row_count,
1943 empty_space, &real_row_count, &free_count))
1944 goto err;
1945 full_dir= (row_count == MAX_ROWS_PER_PAGE &&
1946 page_buff[DIR_FREE_OFFSET] == END_OF_DIR_FREE_LIST);
1947 break;
1948 case TAIL_PAGE:
1949 row_count= page_buff[DIR_COUNT_OFFSET];
1950 empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET);
1951 param->used+= block_size - empty_space;
1952 param->link_used+= (PAGE_HEADER_SIZE(info->s) + PAGE_SUFFIX_SIZE +
1953 row_count * DIR_ENTRY_SIZE);
1954 if (empty_space < share->bitmap.sizes[6])
1955 param->lost+= empty_space;
1956 if (check_page_layout(param, info, pos, page_buff, row_count,
1957 empty_space, &real_row_count, &free_count))
1958 goto err;
1959 full_dir= (row_count - free_count >= MAX_ROWS_PER_PAGE -
1960 share->base.blobs);
1961 break;
1962 case BLOB_PAGE:
1963 full_page_count++;
1964 full_dir= 0;
1965 empty_space= block_size; /* for error reporting */
1966 param->link_used+= FULL_PAGE_HEADER_SIZE(info->s);
1967 param->used+= block_size;
1968 break;
1969 }
1970 if (_ma_check_bitmap_data(info, page_type,
1971 full_dir ? 0 : empty_space,
1972 bitmap_for_page))
1973 {
1974 _ma_check_print_error(param,
1975 "Page %9s: Wrong data in bitmap. Page_type: "
1976 "%d full: %d empty_space: %u Bitmap-bits: %d "
1977 "'%s'",
1978 llstr(page, llbuff), page_type, full_dir,
1979 empty_space, bitmap_for_page,
1980 bits_to_txt[bitmap_for_page]);
1981 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1982 goto err;
1983 }
1984 if ((enum en_page_type) page_type == BLOB_PAGE)
1985 continue;
1986 param->empty+= empty_space;
1987 if ((enum en_page_type) page_type == TAIL_PAGE)
1988 {
1989 tail_count+= real_row_count;
1990 continue;
1991 }
1992 if (check_head_page(param, info, record, extend, pos, page_buff,
1993 row_count))
1994 goto err;
1995 }
1996
1997 /* Verify that rest of bitmap is zero */
1998
1999 if (page % share->bitmap.pages_covered)
2000 {
2001 /* Not at end of bitmap */
2002 uint bitmap_pattern;
2003 uint byte_offset;
2004
2005 offset_page= (uint) ((page % share->bitmap.pages_covered) -1) * 3;
2006 offset= offset_page & 7;
2007 byte_offset= offset_page / 8;
2008 data= bitmap_buff + byte_offset;
2009 bitmap_pattern= uint2korr(data);
2010 if (byte_offset + 1 == share->bitmap.max_total_size)
2011 {
2012 /* On last byte of bitmap; Remove possible checksum */
2013 bitmap_pattern&= 0xff;
2014 }
2015 if (((bitmap_pattern >> offset)) ||
2016 (byte_offset + 2 < share->bitmap.max_total_size &&
2017 _ma_check_if_zero(data+2, share->bitmap.max_total_size -
2018 byte_offset - 2)))
2019 {
2020 ulonglong bitmap_page;
2021 bitmap_page= page / share->bitmap.pages_covered;
2022 bitmap_page*= share->bitmap.pages_covered;
2023
2024 _ma_check_print_error(param,
2025 "Bitmap at page %s has pages reserved outside of "
2026 "data file length",
2027 llstr(bitmap_page, llbuff));
2028 DBUG_EXECUTE("bitmap", _ma_print_bitmap(&share->bitmap, bitmap_buff,
2029 bitmap_page););
2030 }
2031 }
2032
2033 _ma_scan_end_block_record(info);
2034
2035 if (full_page_count != param->full_page_count)
2036 _ma_check_print_error(param, "Full page count read through records was %s "
2037 "but we found %s pages while scanning table",
2038 llstr(param->full_page_count, llbuff),
2039 llstr(full_page_count, llbuff2));
2040 if (tail_count != param->tail_count)
2041 _ma_check_print_error(param, "Tail count read through records was %s but "
2042 "we found %s tails while scanning table",
2043 llstr(param->tail_count, llbuff),
2044 llstr(tail_count, llbuff2));
2045
2046 info->s->now_transactional= now_transactional;
2047 return param->error_printed != 0;
2048
2049err:
2050 _ma_scan_end_block_record(info);
2051 info->s->now_transactional= now_transactional;
2052 return 1;
2053}
2054
2055
2056/* Check that record-link is ok */
2057
2058int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, my_bool extend)
2059{
2060 MARIA_SHARE *share= info->s;
2061 int error;
2062 uchar *record;
2063 char llbuff[22],llbuff2[22],llbuff3[22];
2064 DBUG_ENTER("maria_chk_data_link");
2065
2066 if (!(param->testflag & T_SILENT))
2067 {
2068 if (extend)
2069 puts("- check records and index references");
2070 else
2071 puts("- check record links");
2072 }
2073
2074 if (!(record= (uchar*) my_malloc(share->base.default_rec_buff_size, MYF(0))))
2075 {
2076 _ma_check_print_error(param,"Not enough memory for record");
2077 DBUG_RETURN(-1);
2078 }
2079 param->records= param->del_blocks= 0;
2080 param->used= param->link_used= param->splits= param->del_length= 0;
2081 param->lost= 0;
2082 param->tmp_record_checksum= param->glob_crc= 0;
2083 param->err_count= 0;
2084
2085 error= 0;
2086 param->empty= share->pack.header_length;
2087
2088 bzero((char*) param->tmp_key_crc,
2089 share->base.keys * sizeof(param->tmp_key_crc[0]));
2090
2091 info->in_check_table= 1; /* Don't assert on checksum errors */
2092
2093 switch (share->data_file_type) {
2094 case BLOCK_RECORD:
2095 error= check_block_record(param, info, extend, record);
2096 break;
2097 case STATIC_RECORD:
2098 error= check_static_record(param, info, extend, record);
2099 break;
2100 case DYNAMIC_RECORD:
2101 error= check_dynamic_record(param, info, extend, record);
2102 break;
2103 case COMPRESSED_RECORD:
2104 error= check_compressed_record(param, info, extend, record);
2105 break;
2106 case NO_RECORD:
2107 param->records= share->state.state.records;
2108 param->record_checksum= 0;
2109 extend= 1; /* No row checksums */
2110 /* no data, nothing to do */
2111 break;
2112 } /* switch */
2113
2114 info->in_check_table= 0;
2115
2116 if (error)
2117 goto err;
2118
2119 if (param->testflag & T_WRITE_LOOP)
2120 {
2121 fputs(" \r",stdout);
2122 fflush(stdout);
2123 }
2124 if (param->records != share->state.state.records)
2125 {
2126 _ma_check_print_error(param,
2127 "Record-count is not ok; found %-10s Should be: %s",
2128 llstr(param->records,llbuff),
2129 llstr(share->state.state.records,llbuff2));
2130 error=1;
2131 }
2132 if (param->record_checksum &&
2133 param->record_checksum != param->tmp_record_checksum)
2134 {
2135 _ma_check_print_error(param,
2136 "Key pointers and record positions doesn't match");
2137 error=1;
2138 }
2139 if (param->glob_crc != share->state.state.checksum &&
2140 (share->options &
2141 (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)))
2142 {
2143 _ma_check_print_warning(param,
2144 "Record checksum is not the same as checksum "
2145 "stored in the index file");
2146 error=1;
2147 }
2148 if (!extend)
2149 {
2150 uint key;
2151 for (key=0 ; key < share->base.keys; key++)
2152 {
2153 if (param->tmp_key_crc[key] != param->key_crc[key] &&
2154 !(share->keyinfo[key].flag &
2155 (HA_FULLTEXT | HA_SPATIAL | HA_RTREE_INDEX)))
2156 {
2157 _ma_check_print_error(param,"Checksum for key: %2d doesn't match "
2158 "checksum for records",
2159 key+1);
2160 error=1;
2161 }
2162 }
2163 }
2164
2165 if (param->del_length != share->state.state.empty)
2166 {
2167 _ma_check_print_warning(param,
2168 "Found %s deleted space. Should be %s",
2169 llstr(param->del_length,llbuff2),
2170 llstr(share->state.state.empty,llbuff));
2171 }
2172 /* Skip following checks for BLOCK RECORD as they don't make any sence */
2173 if (share->data_file_type != BLOCK_RECORD)
2174 {
2175 if (param->used + param->empty + param->del_length !=
2176 share->state.state.data_file_length)
2177 {
2178 _ma_check_print_warning(param,
2179 "Found %s record data and %s unused data and %s "
2180 "deleted data",
2181 llstr(param->used, llbuff),
2182 llstr(param->empty,llbuff2),
2183 llstr(param->del_length,llbuff3));
2184 _ma_check_print_warning(param,
2185 "Total %s Should be: %s",
2186 llstr((param->used+param->empty +
2187 param->del_length), llbuff),
2188 llstr(share->state.state.data_file_length,
2189 llbuff2));
2190 }
2191 if (param->del_blocks != share->state.state.del)
2192 {
2193 _ma_check_print_warning(param,
2194 "Found %10s deleted blocks. Should be: %s",
2195 llstr(param->del_blocks,llbuff),
2196 llstr(share->state.state.del,llbuff2));
2197 }
2198 if (param->splits != share->state.split)
2199 {
2200 _ma_check_print_warning(param,
2201 "Found %10s parts. Should be: %s",
2202 llstr(param->splits, llbuff),
2203 llstr(share->state.split,llbuff2));
2204 }
2205 }
2206 if (param->testflag & T_INFO)
2207 {
2208 if (param->warning_printed || param->error_printed)
2209 puts("");
2210 if (param->used != 0 && ! param->error_printed)
2211 {
2212 if (param->records)
2213 {
2214 printf("Records:%18s M.recordlength:%9lu Packed:%14.0f%%\n",
2215 llstr(param->records,llbuff),
2216 (long)((param->used - param->link_used)/param->records),
2217 (share->base.blobs ? 0.0 :
2218 (ulonglong2double((ulonglong) share->base.reclength *
2219 param->records)-
2220 my_off_t2double(param->used))/
2221 ulonglong2double((ulonglong) share->base.reclength *
2222 param->records)*100.0));
2223 printf("Recordspace used:%9.0f%% Empty space:%12d%% "
2224 "Blocks/Record: %6.2f\n",
2225 (ulonglong2double(param->used - param->link_used)/
2226 ulonglong2double(param->used-param->link_used+param->empty) *
2227 100.0),
2228 (!param->records ? 100 :
2229 (int) (ulonglong2double(param->del_length+param->empty)/
2230 my_off_t2double(param->used)*100.0)),
2231 ulonglong2double(param->splits - param->del_blocks) /
2232 param->records);
2233 }
2234 else
2235 printf("Records:%18s\n", "0");
2236 }
2237 printf("Record blocks:%12s Delete blocks:%10s\n",
2238 llstr(param->splits - param->del_blocks, llbuff),
2239 llstr(param->del_blocks, llbuff2));
2240 printf("Record data: %12s Deleted data: %10s\n",
2241 llstr(param->used - param->link_used,llbuff),
2242 llstr(param->del_length, llbuff2));
2243 printf("Empty space: %12s Linkdata: %10s\n",
2244 llstr(param->empty, llbuff),llstr(param->link_used, llbuff2));
2245 if (share->data_file_type == BLOCK_RECORD)
2246 {
2247 printf("Full pages: %12s Tail count: %12s\n",
2248 llstr(param->full_page_count, llbuff),
2249 llstr(param->tail_count, llbuff2));
2250 printf("Lost space: %12s\n", llstr(param->lost, llbuff));
2251 if (param->max_found_trid)
2252 {
2253 printf("Max trans. id: %11s\n",
2254 llstr(param->max_found_trid, llbuff));
2255 }
2256 }
2257 }
2258 my_free(record);
2259 DBUG_RETURN (error);
2260
2261err:
2262 my_free(record);
2263 param->testflag|=T_RETRY_WITHOUT_QUICK;
2264 DBUG_RETURN(1);
2265} /* maria_chk_data_link */
2266
2267
2268/**
2269 Prepares a table for a repair or index sort: flushes pages, records durably
2270 in the table that it is undergoing the operation (if that op crashes, that
2271 info will serve for Recovery and the user).
2272
2273 If we start overwriting the index file, and crash then, old REDOs will
2274 be tried and fail. To prevent that, we bump skip_redo_lsn, and thus we have
2275 to flush and sync pages so that old REDOs can be skipped.
2276 If this is not a bulk insert, which Recovery can handle gracefully (by
2277 truncating files, see UNDO_BULK_INSERT) we also mark the table
2278 crashed-on-repair, so that user knows it has to re-repair. If bulk insert we
2279 shouldn't mark it crashed-on-repair, because if we did this, the UNDO phase
2280 would skip the table (UNDO_BULK_INSERT would not be applied),
2281 and maria_chk would not improve that.
2282 If this is an OPTIMIZE which merely sorts index, we need to do the same
2283 too: old REDOs should not apply to the new index file.
2284 Only the flush is needed when in maria_chk which is not crash-safe.
2285
2286 @param info table
2287 @param param repair parameters
2288 @param discard_index if index pages can be thrown away
2289*/
2290
2291static my_bool protect_against_repair_crash(MARIA_HA *info,
2292 const HA_CHECK *param,
2293 my_bool discard_index)
2294{
2295 MARIA_SHARE *share= info->s;
2296
2297 /*
2298 There are other than recovery-related reasons to do the writes below:
2299 - the physical size of the data file is sometimes used during repair: we
2300 need to flush to have it exact
2301 - we flush the state because maria_open(HA_OPEN_COPY) will want to read
2302 it from disk.
2303 */
2304 if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
2305 FLUSH_FORCE_WRITE,
2306 discard_index ? FLUSH_IGNORE_CHANGED :
2307 FLUSH_FORCE_WRITE) ||
2308 (share->changed &&
2309 _ma_state_info_write(share,
2310 MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
2311 MA_STATE_INFO_WRITE_FULL_INFO |
2312 MA_STATE_INFO_WRITE_LOCK)))
2313 return TRUE;
2314 /* In maria_chk this is not needed: */
2315 if (maria_multi_threaded && share->base.born_transactional)
2316 {
2317 if ((param->testflag & T_NO_CREATE_RENAME_LSN) == 0)
2318 {
2319 /* this can be true only for a transactional table */
2320 maria_mark_in_repair(info);
2321 if (_ma_state_info_write(share,
2322 MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
2323 MA_STATE_INFO_WRITE_LOCK))
2324 return TRUE;
2325 }
2326 if (translog_status == TRANSLOG_OK &&
2327 _ma_update_state_lsns(share, translog_get_horizon(),
2328 share->state.create_trid, FALSE, FALSE))
2329 return TRUE;
2330 if (_ma_sync_table_files(info))
2331 return TRUE;
2332 }
2333 return FALSE;
2334}
2335
2336
2337/**
2338 @brief Initialize variables for repair
2339*/
2340
2341static int initialize_variables_for_repair(HA_CHECK *param,
2342 MARIA_SORT_INFO *sort_info,
2343 MARIA_SORT_PARAM *sort_param,
2344 MARIA_HA *info,
2345 my_bool rep_quick,
2346 MARIA_SHARE *org_share)
2347{
2348 MARIA_SHARE *share= info->s;
2349
2350 if (share->data_file_type == NO_RECORD)
2351 {
2352 _ma_check_print_error(param,
2353 "Can't repair tables with record type NO_DATA");
2354 return 1;
2355 }
2356
2357 /* Make a copy to allow us to restore state and check how state changed */
2358 memcpy(org_share, share, sizeof(*share));
2359
2360 /* Repair code relies on share->state.state so we have to update it here */
2361 if (share->lock.update_status)
2362 (*share->lock.update_status)(info);
2363
2364 bzero((char*) sort_info, sizeof(*sort_info));
2365 bzero((char*) sort_param, sizeof(*sort_param));
2366
2367 param->testflag|= T_REP; /* for easy checking */
2368 if (share->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
2369 param->testflag|= T_CALC_CHECKSUM;
2370 param->glob_crc= 0;
2371 if (rep_quick)
2372 param->testflag|= T_QUICK;
2373 else
2374 param->testflag&= ~T_QUICK;
2375 param->org_key_map= share->state.key_map;
2376
2377 /*
2378 Clear check variables set by repair. This is needed to allow one to run
2379 several repair's in a row with same param
2380 */
2381 param->retry_repair= 0;
2382 param->warning_printed= 0;
2383 param->error_printed= 0;
2384
2385 sort_param->sort_info= sort_info;
2386 sort_param->fix_datafile= ! rep_quick;
2387 sort_param->calc_checksum= MY_TEST(param->testflag & T_CALC_CHECKSUM);
2388 sort_info->info= sort_info->new_info= info;
2389 sort_info->param= param;
2390 set_data_file_type(sort_info, info->s);
2391 sort_info->org_data_file_type= share->data_file_type;
2392
2393 bzero(&info->rec_cache, sizeof(info->rec_cache));
2394 info->rec_cache.file= info->dfile.file;
2395 info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
2396
2397 if (protect_against_repair_crash(info, param,
2398 !MY_TEST(param->testflag &
2399 T_CREATE_MISSING_KEYS)))
2400 return 1;
2401
2402 /* calculate max_records */
2403 sort_info->filelength= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
2404 param->max_progress= sort_info->filelength;
2405 if ((param->testflag & T_CREATE_MISSING_KEYS) ||
2406 sort_info->org_data_file_type == COMPRESSED_RECORD)
2407 sort_info->max_records= share->state.state.records;
2408 else
2409 {
2410 ulong rec_length;
2411 rec_length= MY_MAX(share->base.min_pack_length,
2412 share->base.min_block_length);
2413 sort_info->max_records= (ha_rows) (sort_info->filelength / rec_length);
2414 }
2415
2416 /* Set up transaction handler so that we can see all rows */
2417 if (param->max_trid == 0)
2418 {
2419 if (!ma_control_file_inited())
2420 param->max_trid= 0; /* Give warning for first trid found */
2421 else
2422 param->max_trid= max_trid_in_system();
2423 }
2424 maria_ignore_trids(info);
2425 /* Don't write transid's during repair */
2426 maria_versioning(info, 0);
2427 /* remember original number of rows */
2428 *info->state= info->s->state.state;
2429 return 0;
2430}
2431
2432
2433/*
2434 During initialize_variables_for_repair and related functions we set some
2435 variables to values that makes sence during repair.
2436 This function restores these values to their original values so that we can
2437 use the handler in MariaDB without having to close and open the table.
2438*/
2439
2440static void restore_table_state_after_repair(MARIA_HA *info,
2441 MARIA_SHARE *org_share)
2442{
2443 maria_versioning(info, info->s->have_versioning);
2444 info->s->lock_key_trees= org_share->lock_key_trees;
2445 DBUG_ASSERT(!info->s->have_versioning || info->s->lock_key_trees);
2446}
2447
2448
2449/**
2450 @brief Drop all indexes
2451
2452 @param[in] param check parameters
2453 @param[in] info MARIA_HA handle
2454 @param[in] force if to force drop all indexes
2455
2456 @return status
2457 @retval 0 OK
2458 @retval != 0 Error
2459
2460 @note
2461 Once allocated, index blocks remain part of the key file forever.
2462 When indexes are disabled, no block is freed. When enabling indexes,
2463 no block is freed either. The new indexes are create from new
2464 blocks. (Bug #4692)
2465
2466 Before recreating formerly disabled indexes, the unused blocks
2467 must be freed. There are two options to do this:
2468 - Follow the tree of disabled indexes, add all blocks to the
2469 deleted blocks chain. Would require a lot of random I/O.
2470 - Drop all blocks by clearing all index root pointers and all
2471 delete chain pointers and resetting key_file_length to the end
2472 of the index file header. This requires to recreate all indexes,
2473 even those that may still be intact.
2474 The second method is probably faster in most cases.
2475
2476 When disabling indexes, MySQL disables either all indexes or all
2477 non-unique indexes. When MySQL [re-]enables disabled indexes
2478 (T_CREATE_MISSING_KEYS), then we either have "lost" blocks in the
2479 index file, or there are no non-unique indexes. In the latter case,
2480 maria_repair*() would not be called as there would be no disabled
2481 indexes.
2482
2483 If there would be more unique indexes than disabled (non-unique)
2484 indexes, we could do the first method. But this is not implemented
2485 yet. By now we drop and recreate all indexes when repair is called.
2486
2487 However, there is an exception. Sometimes MySQL disables non-unique
2488 indexes when the table is empty (e.g. when copying a table in
2489 mysql_alter_table()). When enabling the non-unique indexes, they
2490 are still empty. So there is no index block that can be lost. This
2491 optimization is implemented in this function.
2492
2493 Note that in normal repair (T_CREATE_MISSING_KEYS not set) we
2494 recreate all enabled indexes unconditonally. We do not change the
2495 key_map. Otherwise we invert the key map temporarily (outside of
2496 this function) and recreate the then "seemingly" enabled indexes.
2497 When we cannot use the optimization, and drop all indexes, we
2498 pretend that all indexes were disabled. By the inversion, we will
2499 then recrate all indexes.
2500*/
2501
2502static int maria_drop_all_indexes(HA_CHECK *param, MARIA_HA *info,
2503 my_bool force)
2504{
2505 MARIA_SHARE *share= info->s;
2506 MARIA_STATE_INFO *state= &share->state;
2507 uint i;
2508 DBUG_ENTER("maria_drop_all_indexes");
2509
2510 /*
2511 If any of the disabled indexes has a key block assigned, we must
2512 drop and recreate all indexes to avoid losing index blocks.
2513
2514 If we want to recreate disabled indexes only _and_ all of these
2515 indexes are empty, we don't need to recreate the existing indexes.
2516 */
2517 if (!force && (param->testflag & T_CREATE_MISSING_KEYS))
2518 {
2519 DBUG_PRINT("repair", ("creating missing indexes"));
2520 for (i= 0; i < share->base.keys; i++)
2521 {
2522 DBUG_PRINT("repair", ("index #: %u key_root:%lld active: %d",
2523 i, state->key_root[i],
2524 maria_is_key_active(state->key_map, i)));
2525 if ((state->key_root[i] != HA_OFFSET_ERROR) &&
2526 !maria_is_key_active(state->key_map, i))
2527 {
2528 /*
2529 This index has at least one key block and it is disabled.
2530 We would lose its block(s) if would just recreate it.
2531 So we need to drop and recreate all indexes.
2532 */
2533 DBUG_PRINT("repair", ("nonempty and disabled: recreate all"));
2534 break;
2535 }
2536 }
2537 if (i >= share->base.keys)
2538 goto end;
2539
2540 /*
2541 We do now drop all indexes and declare them disabled. With the
2542 T_CREATE_MISSING_KEYS flag, maria_repair*() will recreate all
2543 disabled indexes and enable them.
2544 */
2545 maria_clear_all_keys_active(state->key_map);
2546 DBUG_PRINT("repair", ("declared all indexes disabled"));
2547 }
2548
2549 /* Clear index root block pointers. */
2550 for (i= 0; i < share->base.keys; i++)
2551 state->key_root[i]= HA_OFFSET_ERROR;
2552
2553 /* Drop the delete chain. */
2554 share->state.key_del= HA_OFFSET_ERROR;
2555
2556 /* Reset index file length to end of index file header. */
2557 share->state.state.key_file_length= share->base.keystart;
2558
2559end:
2560 DBUG_RETURN(0);
2561}
2562
2563
2564/*
2565 Recover old table by reading each record and writing all keys
2566
2567 NOTES
2568 Save new datafile-name in temp_filename.
2569 We overwrite the index file as we go (writekeys() for example), so if we
2570 crash during this the table is unusable and user (or Recovery in the
2571 future) must repeat the REPAIR/OPTIMIZE operation. We could use a
2572 temporary index file in the future (drawback: more disk space).
2573
2574 IMPLEMENTATION (for hard repair with block format)
2575 - Create new, unrelated MARIA_HA of the table
2576 - Create new datafile and associate it with new handler
2577 - Reset all statistic information in new handler
2578 - Copy all data to new handler with normal write operations
2579 - Move state of new handler to old handler
2580 - Close new handler
2581 - Close data file in old handler
2582 - Rename old data file to new data file.
2583 - Reopen data file in old handler
2584*/
2585
2586int maria_repair(HA_CHECK *param, register MARIA_HA *info,
2587 char *name, my_bool rep_quick)
2588{
2589 int error, got_error;
2590 ha_rows start_records,new_header_length;
2591 my_off_t del;
2592 File new_file;
2593 MARIA_SHARE *share= info->s;
2594 char llbuff[22],llbuff2[22];
2595 MARIA_SORT_INFO sort_info;
2596 MARIA_SORT_PARAM sort_param;
2597 my_bool block_record, scan_inited= 0, reenable_logging= 0;
2598 enum data_file_type org_data_file_type= share->data_file_type;
2599 myf sync_dir= ((share->now_transactional && !share->temporary) ?
2600 MY_SYNC_DIR : 0);
2601 MARIA_SHARE backup_share;
2602 DBUG_ENTER("maria_repair");
2603
2604 got_error= 1;
2605 new_file= -1;
2606 start_records= share->state.state.records;
2607 if (!(param->testflag & T_SILENT))
2608 {
2609 printf("- recovering (with keycache) Aria-table '%s'\n",name);
2610 printf("Data records: %s\n", llstr(start_records, llbuff));
2611 }
2612
2613 if (initialize_variables_for_repair(param, &sort_info, &sort_param, info,
2614 rep_quick, &backup_share))
2615 goto err;
2616
2617 if ((reenable_logging= share->now_transactional))
2618 _ma_tmp_disable_logging_for_table(info, 0);
2619
2620 sort_param.current_filepos= sort_param.filepos= new_header_length=
2621 ((param->testflag & T_UNPACK) ? 0L : share->pack.header_length);
2622
2623 if (!rep_quick)
2624 {
2625 /* Get real path for data file */
2626 if ((new_file= mysql_file_create(key_file_tmp,
2627 fn_format(param->temp_filename,
2628 share->data_file_name.str, "",
2629 DATA_TMP_EXT, 2+4),
2630 0,param->tmpfile_createflag,
2631 MYF(0))) < 0)
2632 {
2633 _ma_check_print_error(param,"Can't create new tempfile: '%s'",
2634 param->temp_filename);
2635 goto err;
2636 }
2637 if (new_header_length &&
2638 maria_filecopy(param, new_file, info->dfile.file, 0L,
2639 new_header_length, "datafile-header"))
2640 goto err;
2641 share->state.dellink= HA_OFFSET_ERROR;
2642 info->rec_cache.file= new_file; /* For sort_delete_record */
2643 if (share->data_file_type == BLOCK_RECORD ||
2644 (param->testflag & T_UNPACK))
2645 {
2646 if (create_new_data_handle(&sort_param, new_file))
2647 goto err;
2648 sort_info.new_info->rec_cache.file= new_file;
2649 }
2650 }
2651
2652 block_record= sort_info.new_info->s->data_file_type == BLOCK_RECORD;
2653
2654 if (org_data_file_type != BLOCK_RECORD)
2655 {
2656 /* We need a read buffer to read rows in big blocks */
2657 if (init_io_cache(&param->read_cache, info->dfile.file,
2658 (uint) param->read_buffer_length,
2659 READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)))
2660 goto err;
2661 }
2662 if (sort_info.new_info->s->data_file_type != BLOCK_RECORD)
2663 {
2664 /* When writing to not block records, we need a write buffer */
2665 if (!rep_quick)
2666 {
2667 if (init_io_cache(&sort_info.new_info->rec_cache, new_file,
2668 (uint) param->write_buffer_length,
2669 WRITE_CACHE, new_header_length, 1,
2670 MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
2671 goto err;
2672 sort_info.new_info->opt_flag|=WRITE_CACHE_USED;
2673 }
2674 }
2675 else if (block_record)
2676 {
2677 scan_inited= 1;
2678 if (maria_scan_init(sort_info.info))
2679 goto err;
2680 }
2681
2682 if (!(sort_param.record=
2683 (uchar *) my_malloc((uint)
2684 share->base.default_rec_buff_size, MYF(0))) ||
2685 _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size,
2686 share->base.default_rec_buff_size))
2687 {
2688 _ma_check_print_error(param, "Not enough memory for extra record");
2689 goto err;
2690 }
2691
2692 sort_param.read_cache=param->read_cache;
2693 sort_param.pos=sort_param.max_pos=share->pack.header_length;
2694 param->read_cache.end_of_file= sort_info.filelength;
2695 sort_param.master=1;
2696 sort_info.max_records= ~(ha_rows) 0;
2697
2698 del= share->state.state.del;
2699 share->state.state.records= share->state.state.del= share->state.split= 0;
2700 share->state.state.empty= 0;
2701
2702 if (param->testflag & T_CREATE_MISSING_KEYS)
2703 maria_set_all_keys_active(share->state.key_map, share->base.keys);
2704 maria_drop_all_indexes(param, info, TRUE);
2705
2706 maria_lock_memory(param); /* Everything is alloced */
2707
2708 sort_param.sort_info->info->in_check_table= 1;
2709 /* Re-create all keys, which are set in key_map. */
2710 while (!(error=sort_get_next_record(&sort_param)))
2711 {
2712 if (block_record && _ma_sort_write_record(&sort_param))
2713 goto err;
2714
2715 if (writekeys(&sort_param))
2716 {
2717 if (my_errno != HA_ERR_FOUND_DUPP_KEY)
2718 goto err;
2719 DBUG_DUMP("record", sort_param.record,
2720 share->base.default_rec_buff_size);
2721 _ma_check_print_warning(param,
2722 "Duplicate key %2d for record at %10s against "
2723 "new record at %10s",
2724 info->errkey+1,
2725 llstr(sort_param.current_filepos, llbuff),
2726 llstr(info->dup_key_pos,llbuff2));
2727 if (param->testflag & T_VERBOSE)
2728 {
2729 MARIA_KEY tmp_key;
2730 MARIA_KEYDEF *keyinfo= share->keyinfo + info->errkey;
2731 (*keyinfo->make_key)(info, &tmp_key, (uint) info->errkey,
2732 info->lastkey_buff,
2733 sort_param.record, 0L, 0);
2734 _ma_print_key(stdout, &tmp_key);
2735 }
2736 sort_info.dupp++;
2737 if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK)
2738 {
2739 param->testflag|=T_RETRY_WITHOUT_QUICK;
2740 param->error_printed=1;
2741 goto err;
2742 }
2743 /* purecov: begin tested */
2744 if (block_record)
2745 {
2746 sort_info.new_info->s->state.state.records--;
2747 if ((*sort_info.new_info->s->write_record_abort)(sort_info.new_info))
2748 {
2749 _ma_check_print_error(param,"Couldn't delete duplicate row");
2750 goto err;
2751 }
2752 }
2753 /* purecov: end */
2754 continue;
2755 }
2756 if (!block_record)
2757 {
2758 if (_ma_sort_write_record(&sort_param))
2759 goto err;
2760 /* Filepos is pointer to where next row will be stored */
2761 sort_param.current_filepos= sort_param.filepos;
2762 }
2763 }
2764 if (error > 0 || maria_write_data_suffix(&sort_info, !rep_quick) ||
2765 flush_io_cache(&sort_info.new_info->rec_cache) ||
2766 param->read_cache.error < 0)
2767 goto err;
2768
2769 if (param->testflag & T_WRITE_LOOP)
2770 {
2771 fputs(" \r",stdout); fflush(stdout);
2772 }
2773 if (mysql_file_chsize(share->kfile.file,
2774 share->state.state.key_file_length, 0, MYF(0)))
2775 {
2776 _ma_check_print_warning(param,
2777 "Can't change size of indexfile, error: %d",
2778 my_errno);
2779 goto err;
2780 }
2781
2782 if (rep_quick && del+sort_info.dupp != share->state.state.del)
2783 {
2784 _ma_check_print_error(param,"Couldn't fix table with quick recovery: "
2785 "Found wrong number of deleted records");
2786 _ma_check_print_error(param,"Run recovery again without -q");
2787 param->retry_repair=1;
2788 param->testflag|=T_RETRY_WITHOUT_QUICK;
2789 goto err;
2790 }
2791
2792 if (param->testflag & T_SAFE_REPAIR)
2793 {
2794 /* Don't repair if we loosed more than one row */
2795 if (sort_info.new_info->s->state.state.records+1 < start_records)
2796 {
2797 share->state.state.records= start_records;
2798 goto err;
2799 }
2800 }
2801
2802 end_io_cache(&sort_info.new_info->rec_cache);
2803 info->opt_flag&= ~WRITE_CACHE_USED;
2804
2805 /*
2806 As we have read the data file (sort_get_next_record()) we may have
2807 cached, non-changed blocks of it in the page cache. We must throw them
2808 away as we are going to close their descriptor ('new_file'). We also want
2809 to flush any index block, so that it is ready for the upcoming sync.
2810 */
2811 if (_ma_flush_table_files_before_swap(param, info))
2812 goto err;
2813
2814 if (!rep_quick)
2815 {
2816 sort_info.new_info->s->state.state.data_file_length= sort_param.filepos;
2817 if (sort_info.new_info != sort_info.info)
2818 {
2819 MARIA_STATE_INFO save_state= sort_info.new_info->s->state;
2820 if (maria_close(sort_info.new_info))
2821 {
2822 _ma_check_print_error(param, "Got error %d on close", my_errno);
2823 goto err;
2824 }
2825 copy_data_file_state(&share->state, &save_state);
2826 new_file= -1;
2827 sort_info.new_info= info;
2828 }
2829 share->state.version=(ulong) time((time_t*) 0); /* Force reopen */
2830
2831 /* Replace the actual file with the temporary file */
2832 if (new_file >= 0)
2833 mysql_file_close(new_file, MYF(MY_WME));
2834 new_file= -1;
2835 change_data_file_descriptor(info, -1);
2836 if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
2837 DATA_TMP_EXT, param->backup_time,
2838 (param->testflag & T_BACKUP_DATA ?
2839 MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
2840 sync_dir) ||
2841 _ma_open_datafile(info, share))
2842 {
2843 goto err;
2844 }
2845 }
2846 else
2847 {
2848 share->state.state.data_file_length= sort_param.max_pos;
2849 }
2850 if (param->testflag & T_CALC_CHECKSUM)
2851 share->state.state.checksum= param->glob_crc;
2852
2853 if (!(param->testflag & T_SILENT))
2854 {
2855 if (start_records != share->state.state.records)
2856 printf("Data records: %s\n", llstr(share->state.state.records,llbuff));
2857 }
2858 if (sort_info.dupp)
2859 _ma_check_print_warning(param,
2860 "%s records have been removed",
2861 llstr(sort_info.dupp,llbuff));
2862
2863 got_error= 0;
2864 /* If invoked by external program that uses thr_lock */
2865 if (&share->state.state != info->state)
2866 *info->state= *info->state_start= share->state.state;
2867
2868err:
2869 if (scan_inited)
2870 maria_scan_end(sort_info.info);
2871 _ma_reset_state(info);
2872
2873 end_io_cache(&param->read_cache);
2874 end_io_cache(&sort_info.new_info->rec_cache);
2875 info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
2876 sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
2877 sort_param.sort_info->info->in_check_table= 0;
2878 /* this below could fail, shouldn't we detect error? */
2879 if (got_error)
2880 {
2881 if (! param->error_printed)
2882 _ma_check_print_error(param,"%d for record at pos %s",my_errno,
2883 llstr(sort_param.start_recpos,llbuff));
2884 (void)_ma_flush_table_files_before_swap(param, info);
2885 if (sort_info.new_info && sort_info.new_info != sort_info.info)
2886 {
2887 unuse_data_file_descriptor(sort_info.new_info);
2888 maria_close(sort_info.new_info);
2889 }
2890 if (new_file >= 0)
2891 {
2892 mysql_file_close(new_file,MYF(0));
2893 mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME));
2894 }
2895 maria_mark_crashed_on_repair(info);
2896 }
2897 /* If caller had disabled logging it's not up to us to re-enable it */
2898 if (reenable_logging)
2899 _ma_reenable_logging_for_table(info, FALSE);
2900 restore_table_state_after_repair(info, &backup_share);
2901
2902 my_free(sort_param.rec_buff);
2903 my_free(sort_param.record);
2904 my_free(sort_info.buff);
2905 if (!got_error && (param->testflag & T_UNPACK))
2906 restore_data_file_type(share);
2907 share->state.changed|= (STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES |
2908 STATE_NOT_ANALYZED | STATE_NOT_ZEROFILLED);
2909 if (!rep_quick)
2910 share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_MOVABLE);
2911 DBUG_RETURN(got_error);
2912}
2913
2914
2915/* Uppdate keyfile when doing repair */
2916
2917static int writekeys(MARIA_SORT_PARAM *sort_param)
2918{
2919 uint i;
2920 MARIA_HA *info= sort_param->sort_info->info;
2921 MARIA_SHARE *share= info->s;
2922 uchar *record= sort_param->record;
2923 uchar *key_buff;
2924 my_off_t filepos= sort_param->current_filepos;
2925 MARIA_KEY key;
2926 DBUG_ENTER("writekeys");
2927
2928 key_buff= info->lastkey_buff+share->base.max_key_length;
2929
2930 for (i=0 ; i < share->base.keys ; i++)
2931 {
2932 if (maria_is_key_active(share->state.key_map, i))
2933 {
2934 if (share->keyinfo[i].flag & HA_FULLTEXT )
2935 {
2936 if (_ma_ft_add(info, i, key_buff, record, filepos))
2937 goto err;
2938 }
2939 else
2940 {
2941 if (!(*share->keyinfo[i].make_key)(info, &key, i, key_buff, record,
2942 filepos, 0))
2943 goto err;
2944 if ((*share->keyinfo[i].ck_insert)(info, &key))
2945 goto err;
2946 }
2947 }
2948 }
2949 DBUG_RETURN(0);
2950
2951 err:
2952 if (my_errno == HA_ERR_FOUND_DUPP_KEY)
2953 {
2954 info->errkey=(int) i; /* This key was found */
2955 while ( i-- > 0 )
2956 {
2957 if (maria_is_key_active(share->state.key_map, i))
2958 {
2959 if (share->keyinfo[i].flag & HA_FULLTEXT)
2960 {
2961 if (_ma_ft_del(info,i,key_buff,record,filepos))
2962 break;
2963 }
2964 else
2965 {
2966 (*share->keyinfo[i].make_key)(info, &key, i, key_buff, record,
2967 filepos, 0);
2968 if (_ma_ck_delete(info, &key))
2969 break;
2970 }
2971 }
2972 }
2973 }
2974 /* Remove checksum that was added to glob_crc in sort_get_next_record */
2975 if (sort_param->calc_checksum)
2976 sort_param->sort_info->param->glob_crc-= info->cur_row.checksum;
2977 DBUG_PRINT("error",("errno: %d",my_errno));
2978 DBUG_RETURN(-1);
2979} /* writekeys */
2980
2981
2982 /* Change all key-pointers that points to a records */
2983
2984int maria_movepoint(register MARIA_HA *info, uchar *record,
2985 MARIA_RECORD_POS oldpos, MARIA_RECORD_POS newpos,
2986 uint prot_key)
2987{
2988 uint i;
2989 uchar *key_buff;
2990 MARIA_SHARE *share= info->s;
2991 MARIA_PAGE page;
2992 DBUG_ENTER("maria_movepoint");
2993
2994 key_buff= info->lastkey_buff + share->base.max_key_length;
2995 for (i=0 ; i < share->base.keys; i++)
2996 {
2997 if (i != prot_key && maria_is_key_active(share->state.key_map, i))
2998 {
2999 MARIA_KEY key;
3000 (*share->keyinfo[i].make_key)(info, &key, i, key_buff, record, oldpos,
3001 0);
3002 if (key.keyinfo->flag & HA_NOSAME)
3003 { /* Change pointer direct */
3004 MARIA_KEYDEF *keyinfo;
3005 keyinfo=share->keyinfo+i;
3006 if (_ma_search(info, &key, (uint32) (SEARCH_SAME | SEARCH_SAVE_BUFF),
3007 share->state.key_root[i]))
3008 DBUG_RETURN(-1);
3009 _ma_page_setup(&page, info, keyinfo, info->last_keypage,
3010 info->keyread_buff);
3011
3012 _ma_dpointer(share, info->int_keypos - page.node -
3013 share->rec_reflength,newpos);
3014
3015 if (_ma_write_keypage(&page, PAGECACHE_LOCK_LEFT_UNLOCKED,
3016 DFLT_INIT_HITS))
3017 DBUG_RETURN(-1);
3018 }
3019 else
3020 { /* Change old key to new */
3021 if (_ma_ck_delete(info, &key))
3022 DBUG_RETURN(-1);
3023 (*share->keyinfo[i].make_key)(info, &key, i, key_buff, record, newpos,
3024 0);
3025 if (_ma_ck_write(info, &key))
3026 DBUG_RETURN(-1);
3027 }
3028 }
3029 }
3030 DBUG_RETURN(0);
3031} /* maria_movepoint */
3032
3033
3034 /* Tell system that we want all memory for our cache */
3035
3036void maria_lock_memory(HA_CHECK *param __attribute__((unused)))
3037{
3038#ifdef SUN_OS /* Key-cacheing thrases on sun 4.1 */
3039 if (param->opt_maria_lock_memory)
3040 {
3041 int success = mlockall(MCL_CURRENT); /* or plock(DATLOCK); */
3042 if (geteuid() == 0 && success != 0)
3043 _ma_check_print_warning(param,
3044 "Failed to lock memory. errno %d",my_errno);
3045 }
3046#endif
3047} /* maria_lock_memory */
3048
3049
3050/**
3051 Flush all changed blocks to disk.
3052
3053 We release blocks as it's unlikely that they would all be needed soon.
3054 This function needs to be called before swapping data or index files or
3055 syncing them.
3056
3057 @param param description of the repair operation
3058 @param info table
3059*/
3060
3061static my_bool _ma_flush_table_files_before_swap(HA_CHECK *param,
3062 MARIA_HA *info)
3063{
3064 DBUG_ENTER("_ma_flush_table_files_before_swap");
3065 if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
3066 FLUSH_RELEASE, FLUSH_RELEASE))
3067 {
3068 _ma_check_print_error(param, "%d when trying to write buffers", my_errno);
3069 DBUG_RETURN(TRUE);
3070 }
3071 DBUG_RETURN(FALSE);
3072}
3073
3074
3075 /* Sort index for more efficent reads */
3076
3077int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, char *name)
3078{
3079 reg2 uint key;
3080 reg1 MARIA_KEYDEF *keyinfo;
3081 File new_file;
3082 my_off_t index_pos[HA_MAX_POSSIBLE_KEY];
3083 uint r_locks,w_locks;
3084 int old_lock;
3085 MARIA_SHARE *share= info->s;
3086 MARIA_STATE_INFO old_state;
3087 myf sync_dir= ((share->now_transactional && !share->temporary) ?
3088 MY_SYNC_DIR : 0);
3089 DBUG_ENTER("maria_sort_index");
3090
3091 /* cannot sort index files with R-tree indexes */
3092 for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
3093 key++,keyinfo++)
3094 if (keyinfo->key_alg == HA_KEY_ALG_RTREE)
3095 DBUG_RETURN(0);
3096
3097 if (!(param->testflag & T_SILENT))
3098 printf("- Sorting index for Aria-table '%s'\n",name);
3099
3100 if (protect_against_repair_crash(info, param, FALSE))
3101 DBUG_RETURN(1);
3102
3103 /* Get real path for index file */
3104 fn_format(param->temp_filename,name,"", MARIA_NAME_IEXT,2+4+32);
3105 if ((new_file=mysql_file_create(key_file_kfile, fn_format(param->temp_filename,param->temp_filename,
3106 "", INDEX_TMP_EXT,2+4),
3107 0, param->tmpfile_createflag, MYF(0))) < 0)
3108 {
3109 _ma_check_print_error(param,"Can't create new tempfile: '%s'",
3110 param->temp_filename);
3111 DBUG_RETURN(-1);
3112 }
3113 if (maria_filecopy(param, new_file, share->kfile.file, 0L,
3114 (ulong) share->base.keystart, "headerblock"))
3115 goto err;
3116
3117 param->new_file_pos=share->base.keystart;
3118 for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
3119 key++,keyinfo++)
3120 {
3121 if (maria_is_key_active(share->state.key_map, key) &&
3122 share->state.key_root[key] != HA_OFFSET_ERROR)
3123 {
3124 index_pos[key]=param->new_file_pos; /* Write first block here */
3125 if (sort_one_index(param,info,keyinfo,share->state.key_root[key],
3126 new_file))
3127 goto err;
3128 }
3129 else
3130 index_pos[key]= HA_OFFSET_ERROR; /* No blocks */
3131 }
3132
3133 /* Flush key cache for this file if we are calling this outside maria_chk */
3134 flush_pagecache_blocks(share->pagecache, &share->kfile,
3135 FLUSH_IGNORE_CHANGED);
3136
3137 share->state.version=(ulong) time((time_t*) 0);
3138 old_state= share->state; /* save state if not stored */
3139 r_locks= share->r_locks;
3140 w_locks= share->w_locks;
3141 old_lock= info->lock_type;
3142
3143 /* Put same locks as old file */
3144 share->r_locks= share->w_locks= share->tot_locks= 0;
3145 (void) _ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE);
3146 mysql_mutex_lock(&share->intern_lock);
3147 mysql_file_close(share->kfile.file, MYF(MY_WME));
3148 share->kfile.file = -1;
3149 mysql_mutex_unlock(&share->intern_lock);
3150 mysql_file_close(new_file, MYF(MY_WME));
3151 if (maria_change_to_newfile(share->index_file_name.str, MARIA_NAME_IEXT,
3152 INDEX_TMP_EXT, 0, sync_dir) ||
3153 _ma_open_keyfile(share))
3154 goto err2;
3155 info->lock_type= F_UNLCK; /* Force maria_readinfo to lock */
3156 _ma_readinfo(info,F_WRLCK,0); /* Will lock the table */
3157 info->lock_type= old_lock;
3158 share->r_locks= r_locks;
3159 share->w_locks= w_locks;
3160 share->tot_locks= r_locks+w_locks;
3161 share->state= old_state; /* Restore old state */
3162
3163 share->state.state.key_file_length=param->new_file_pos;
3164 info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
3165 for (key=0 ; key < share->base.keys ; key++)
3166 share->state.key_root[key]=index_pos[key];
3167 share->state.key_del= HA_OFFSET_ERROR;
3168
3169 share->state.changed&= ~STATE_NOT_SORTED_PAGES;
3170 DBUG_EXECUTE_IF("maria_flush_whole_log",
3171 {
3172 DBUG_PRINT("maria_flush_whole_log", ("now"));
3173 translog_flush(translog_get_horizon());
3174 });
3175 DBUG_EXECUTE_IF("maria_crash_sort_index",
3176 {
3177 DBUG_PRINT("maria_crash_sort_index", ("now"));
3178 DBUG_SUICIDE();
3179 });
3180 DBUG_RETURN(0);
3181
3182err:
3183 mysql_file_close(new_file, MYF(MY_WME));
3184err2:
3185 mysql_file_delete(key_file_tmp, param->temp_filename,MYF(MY_WME));
3186 DBUG_RETURN(-1);
3187} /* maria_sort_index */
3188
3189
3190/**
3191 @brief write a page directly to index file
3192
3193*/
3194
3195static int write_page(MARIA_SHARE *share, File file,
3196 uchar *buff, uint block_size,
3197 my_off_t pos, int myf_rw)
3198{
3199 int res;
3200 PAGECACHE_IO_HOOK_ARGS args;
3201 args.page= buff;
3202 args.pageno= (pgcache_page_no_t) (pos / share->block_size);
3203 args.data= (uchar*) share;
3204 (* share->kfile.pre_write_hook)(&args);
3205 res= (int)my_pwrite(file, args.page, block_size, pos, myf_rw);
3206 (* share->kfile.post_write_hook)(res, &args);
3207 return res;
3208}
3209
3210
3211/* Sort index blocks recursive using one index */
3212
3213static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
3214 MARIA_KEYDEF *keyinfo,
3215 my_off_t pagepos, File new_file)
3216{
3217 uint length,nod_flag;
3218 uchar *buff,*keypos,*endpos;
3219 my_off_t new_page_pos,next_page;
3220 MARIA_SHARE *share= info->s;
3221 MARIA_KEY key;
3222 MARIA_PAGE page;
3223 DBUG_ENTER("sort_one_index");
3224
3225 /* cannot walk over R-tree indices */
3226 DBUG_ASSERT(keyinfo->key_alg != HA_KEY_ALG_RTREE);
3227 new_page_pos=param->new_file_pos;
3228 param->new_file_pos+=keyinfo->block_length;
3229 key.keyinfo= keyinfo;
3230
3231 if (!(buff= (uchar*) my_alloca((uint) keyinfo->block_length +
3232 keyinfo->maxlength +
3233 MARIA_INDEX_OVERHEAD_SIZE)))
3234 {
3235 _ma_check_print_error(param,"Not enough memory for key block");
3236 DBUG_RETURN(-1);
3237 }
3238 key.data= buff + keyinfo->block_length;
3239
3240 if (_ma_fetch_keypage(&page, info, keyinfo, pagepos,
3241 PAGECACHE_LOCK_LEFT_UNLOCKED,
3242 DFLT_INIT_HITS, buff, 0))
3243 {
3244 report_keypage_fault(param, info, pagepos);
3245 goto err;
3246 }
3247
3248 if ((nod_flag= page.node) || keyinfo->flag & HA_FULLTEXT)
3249 {
3250 keypos= page.buff + share->keypage_header + nod_flag;
3251 endpos= page.buff + page.size;
3252
3253 for ( ;; )
3254 {
3255 if (nod_flag)
3256 {
3257 next_page= _ma_kpos(nod_flag,keypos);
3258 /* Save new pos */
3259 _ma_kpointer(info,keypos-nod_flag,param->new_file_pos);
3260 if (sort_one_index(param,info,keyinfo,next_page,new_file))
3261 {
3262 DBUG_PRINT("error",
3263 ("From page: %ld, keyoffset: %lu used_length: %d",
3264 (ulong) pagepos, (ulong) (keypos - buff),
3265 (int) page.size));
3266 DBUG_DUMP("buff", page.buff, page.size);
3267 goto err;
3268 }
3269 }
3270 if (keypos >= endpos ||
3271 !(*keyinfo->get_key)(&key, page.flag, nod_flag, &keypos))
3272 break;
3273 DBUG_ASSERT(keypos <= endpos);
3274 if (keyinfo->flag & HA_FULLTEXT)
3275 {
3276 uint off;
3277 int subkeys;
3278 get_key_full_length_rdonly(off, key.data);
3279 subkeys= ft_sintXkorr(key.data + off);
3280 if (subkeys < 0)
3281 {
3282 next_page= _ma_row_pos_from_key(&key);
3283 _ma_dpointer(share, keypos - nod_flag - share->rec_reflength,
3284 param->new_file_pos); /* Save new pos */
3285 if (sort_one_index(param,info,&share->ft2_keyinfo,
3286 next_page,new_file))
3287 goto err;
3288 }
3289 }
3290 }
3291 }
3292
3293 /* Fill block with zero and write it to the new index file */
3294 length= page.size;
3295 bzero(buff+length,keyinfo->block_length-length);
3296 if (write_page(share, new_file, buff, keyinfo->block_length,
3297 new_page_pos, MYF(MY_NABP | MY_WAIT_IF_FULL)))
3298 {
3299 _ma_check_print_error(param,"Can't write indexblock, error: %d",my_errno);
3300 goto err;
3301 }
3302 my_afree(buff);
3303 DBUG_RETURN(0);
3304err:
3305 my_afree(buff);
3306 DBUG_RETURN(1);
3307} /* sort_one_index */
3308
3309
3310/**
3311 @brief Fill empty space in index file with zeroes
3312
3313 @return
3314 @retval 0 Ok
3315 @retval 1 Error
3316*/
3317
3318static my_bool maria_zerofill_index(HA_CHECK *param, MARIA_HA *info,
3319 const char *name)
3320{
3321 MARIA_SHARE *share= info->s;
3322 MARIA_PINNED_PAGE page_link;
3323 char llbuff[21];
3324 uchar *buff;
3325 pgcache_page_no_t page;
3326 my_off_t pos;
3327 my_off_t key_file_length= share->state.state.key_file_length;
3328 uint block_size= share->block_size;
3329 my_bool zero_lsn= (share->base.born_transactional &&
3330 !(param->testflag & T_ZEROFILL_KEEP_LSN));
3331 int error= 1;
3332 DBUG_ENTER("maria_zerofill_index");
3333
3334 if (!(param->testflag & T_SILENT))
3335 printf("- Zerofilling index for Aria-table '%s'\n",name);
3336
3337 /* Go through the index file */
3338 for (pos= share->base.keystart, page= (ulonglong) (pos / block_size);
3339 pos < key_file_length;
3340 pos+= block_size, page++)
3341 {
3342 uint length;
3343 if (!(buff= pagecache_read(share->pagecache,
3344 &share->kfile, page,
3345 DFLT_INIT_HITS, 0,
3346 PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
3347 &page_link.link)))
3348 {
3349 pagecache_unlock_by_link(share->pagecache, page_link.link,
3350 PAGECACHE_LOCK_WRITE_UNLOCK,
3351 PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3352 LSN_IMPOSSIBLE, 0, FALSE);
3353 _ma_check_print_error(param,
3354 "Page %9s: Got error %d when reading index file",
3355 llstr(pos, llbuff), my_errno);
3356 goto end;
3357 }
3358 if (zero_lsn)
3359 bzero(buff, LSN_SIZE);
3360
3361 if (share->base.born_transactional)
3362 {
3363 uint keynr= _ma_get_keynr(share, buff);
3364 if (keynr < share->base.keys)
3365 {
3366 MARIA_PAGE page;
3367 DBUG_ASSERT(keynr < share->base.keys);
3368
3369 _ma_page_setup(&page, info, share->keyinfo + keynr, pos, buff);
3370 if (_ma_compact_keypage(&page, ~(TrID) 0))
3371 {
3372 _ma_check_print_error(param,
3373 "Page %9s: Got error %d when reading index "
3374 "file",
3375 llstr(pos, llbuff), my_errno);
3376 goto end;
3377 }
3378 }
3379 }
3380
3381 length= _ma_get_page_used(share, buff);
3382 DBUG_ASSERT(length <= block_size);
3383 if (length < block_size)
3384 bzero(buff + length, block_size - length);
3385 pagecache_unlock_by_link(share->pagecache, page_link.link,
3386 PAGECACHE_LOCK_WRITE_UNLOCK,
3387 PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3388 LSN_IMPOSSIBLE, 1, FALSE);
3389 }
3390 error= 0; /* ok */
3391
3392end:
3393 if (flush_pagecache_blocks(share->pagecache, &share->kfile,
3394 FLUSH_FORCE_WRITE))
3395 DBUG_RETURN(1);
3396 DBUG_RETURN(error);
3397}
3398
3399
3400/**
3401 @brief Fill empty space in data file with zeroes
3402
3403 @todo
3404 Zerofill all pages marked in bitmap as empty and change them to
3405 be of type UNALLOCATED_PAGE
3406
3407 @return
3408 @retval 0 Ok
3409 @retval 1 Error
3410*/
3411
3412static my_bool maria_zerofill_data(HA_CHECK *param, MARIA_HA *info,
3413 const char *name)
3414{
3415 MARIA_SHARE *share= info->s;
3416 MARIA_PINNED_PAGE page_link;
3417 char llbuff[21];
3418 my_off_t pos;
3419 pgcache_page_no_t page;
3420 uint block_size= share->block_size;
3421 MARIA_FILE_BITMAP *bitmap= &share->bitmap;
3422 my_bool zero_lsn= !(param->testflag & T_ZEROFILL_KEEP_LSN), error;
3423 DBUG_ENTER("maria_zerofill_data");
3424
3425 /* This works only with BLOCK_RECORD files */
3426 if (share->data_file_type != BLOCK_RECORD)
3427 DBUG_RETURN(0);
3428
3429 if (!(param->testflag & T_SILENT))
3430 printf("- Zerofilling data for Aria-table '%s'\n",name);
3431
3432 /* Go through the record file */
3433 for (page= 1, pos= block_size;
3434 pos < share->state.state.data_file_length;
3435 pos+= block_size, page++)
3436 {
3437 uchar *buff;
3438 enum en_page_type page_type;
3439
3440 /* Ignore bitmap pages */
3441 if ((page % share->bitmap.pages_covered) == 0)
3442 continue;
3443 if (!(buff= pagecache_read(share->pagecache,
3444 &info->dfile,
3445 page, 1, 0,
3446 PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
3447 &page_link.link)))
3448 {
3449 _ma_check_print_error(param,
3450 "Page %9s: Got error: %d when reading datafile",
3451 llstr(pos, llbuff), my_errno);
3452 goto err;
3453 }
3454 page_type= (enum en_page_type) (buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK);
3455 switch (page_type) {
3456 case UNALLOCATED_PAGE:
3457 if (zero_lsn)
3458 bzero(buff, block_size);
3459 else
3460 bzero(buff + LSN_SIZE, block_size - LSN_SIZE);
3461 break;
3462 case BLOB_PAGE:
3463 if (_ma_bitmap_get_page_bits(info, bitmap, page) == 0)
3464 {
3465 /* Unallocated page */
3466 if (zero_lsn)
3467 bzero(buff, block_size);
3468 else
3469 bzero(buff + LSN_SIZE, block_size - LSN_SIZE);
3470 }
3471 else
3472 if (zero_lsn)
3473 bzero(buff, LSN_SIZE);
3474 break;
3475 case HEAD_PAGE:
3476 case TAIL_PAGE:
3477 {
3478 uint max_entry= (uint) buff[DIR_COUNT_OFFSET];
3479 uint offset, dir_start, empty_space;
3480 uchar *dir;
3481
3482 if (zero_lsn)
3483 bzero(buff, LSN_SIZE);
3484 if (max_entry != 0)
3485 {
3486 my_bool is_head_page= (page_type == HEAD_PAGE);
3487 dir= dir_entry_pos(buff, block_size, max_entry - 1);
3488 _ma_compact_block_page(share,
3489 buff, max_entry -1, 0,
3490 is_head_page ? ~(TrID) 0 : 0,
3491 is_head_page ?
3492 share->base.min_block_length : 0);
3493
3494 /* compactation may have increased free space */
3495 empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
3496 if (!enough_free_entries_on_page(share, buff))
3497 empty_space= 0; /* Page is full */
3498 if (_ma_bitmap_set(info, page, is_head_page,
3499 empty_space))
3500 goto err;
3501
3502 /* Zerofill the not used part */
3503 offset= uint2korr(dir) + uint2korr(dir+2);
3504 dir_start= (uint) (dir - buff);
3505 DBUG_ASSERT(dir_start >= offset);
3506 if (dir_start > offset)
3507 bzero(buff + offset, dir_start - offset);
3508 }
3509 break;
3510 }
3511 default:
3512 _ma_check_print_error(param,
3513 "Page %9s: Found unrecognizable block of type %d",
3514 llstr(pos, llbuff), page_type);
3515 goto err;
3516 }
3517 pagecache_unlock_by_link(share->pagecache, page_link.link,
3518 PAGECACHE_LOCK_WRITE_UNLOCK,
3519 PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3520 LSN_IMPOSSIBLE, 1, FALSE);
3521 }
3522 error= _ma_bitmap_flush(share);
3523 if (flush_pagecache_blocks(share->pagecache, &info->dfile,
3524 FLUSH_FORCE_WRITE))
3525 error= 1;
3526 DBUG_RETURN(error);
3527
3528err:
3529 pagecache_unlock_by_link(share->pagecache, page_link.link,
3530 PAGECACHE_LOCK_WRITE_UNLOCK,
3531 PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3532 LSN_IMPOSSIBLE, 0, FALSE);
3533 /* flush what was changed so far */
3534 (void) _ma_bitmap_flush(share);
3535 (void) flush_pagecache_blocks(share->pagecache, &info->dfile,
3536 FLUSH_FORCE_WRITE);
3537
3538 DBUG_RETURN(1);
3539}
3540
3541
3542/**
3543 @brief Fill empty space in index and data files with zeroes
3544
3545 @return
3546 @retval 0 Ok
3547 @retval 1 Error
3548*/
3549
3550int maria_zerofill(HA_CHECK *param, MARIA_HA *info, const char *name)
3551{
3552 my_bool error, reenable_logging,
3553 zero_lsn= !(param->testflag & T_ZEROFILL_KEEP_LSN);
3554 MARIA_SHARE *share= info->s;
3555 DBUG_ENTER("maria_zerofill");
3556 if ((reenable_logging= share->now_transactional))
3557 _ma_tmp_disable_logging_for_table(info, 0);
3558 if (!(error= (maria_zerofill_index(param, info, name) ||
3559 maria_zerofill_data(param, info, name) ||
3560 _ma_set_uuid(info->s, 0))))
3561 {
3562 /*
3563 Mark that we have done zerofill of data and index. If we zeroed pages'
3564 LSN, table is movable.
3565 */
3566 share->state.changed&= ~STATE_NOT_ZEROFILLED;
3567 if (zero_lsn)
3568 {
3569 share->state.changed&= ~(STATE_NOT_MOVABLE | STATE_MOVED);
3570 /* Table should get new LSNs */
3571 share->state.create_rename_lsn= share->state.is_of_horizon=
3572 share->state.skip_redo_lsn= LSN_NEEDS_NEW_STATE_LSNS;
3573 }
3574 /* Ensure state is later flushed to disk, if within maria_chk */
3575 info->update= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
3576
3577 /*
3578 Reset create_trid to make file comparable and to ensure that new
3579 trid's in the file starts from 0.
3580 */
3581 share->state.create_trid= 0;
3582 }
3583 if (reenable_logging)
3584 _ma_reenable_logging_for_table(info, FALSE);
3585 DBUG_RETURN(error);
3586}
3587
3588
3589/*
3590 Let temporary file replace old file.
3591 This assumes that the new file was created in the same
3592 directory as given by realpath(filename).
3593 This will ensure that any symlinks that are used will still work.
3594 Copy stats from old file to new file, deletes orignal and
3595 changes new file name to old file name
3596*/
3597
3598int maria_change_to_newfile(const char * filename, const char * old_ext,
3599 const char * new_ext, time_t backup_time,
3600 myf MyFlags)
3601{
3602 char old_filename[FN_REFLEN],new_filename[FN_REFLEN];
3603 /* Get real path to filename */
3604 (void) fn_format(old_filename,filename,"",old_ext,2+4+32);
3605 return my_redel(old_filename,
3606 fn_format(new_filename,old_filename,"",new_ext,2+4),
3607 backup_time,
3608 MYF(MY_WME | MY_LINK_WARNING | MyFlags));
3609} /* maria_change_to_newfile */
3610
3611
3612/* Copy a block between two files */
3613
3614int maria_filecopy(HA_CHECK *param, File to,File from,my_off_t start,
3615 my_off_t length, const char *type)
3616{
3617 uchar tmp_buff[IO_SIZE], *buff;
3618 ulong buff_length;
3619 DBUG_ENTER("maria_filecopy");
3620
3621 buff_length=(ulong) MY_MIN(param->write_buffer_length,length);
3622 if (!(buff=my_malloc(buff_length,MYF(0))))
3623 {
3624 buff=tmp_buff; buff_length=IO_SIZE;
3625 }
3626
3627 mysql_file_seek(from, start, MY_SEEK_SET,MYF(0));
3628 while (length > buff_length)
3629 {
3630 if (mysql_file_read(from, buff, buff_length, MYF(MY_NABP)) ||
3631 mysql_file_write(to, buff, buff_length, param->myf_rw))
3632 goto err;
3633 length-= buff_length;
3634 }
3635 if (mysql_file_read(from, buff, (size_t) length,MYF(MY_NABP)) ||
3636 mysql_file_write(to, buff, (size_t) length,param->myf_rw))
3637 goto err;
3638 if (buff != tmp_buff)
3639 my_free(buff);
3640 DBUG_RETURN(0);
3641err:
3642 if (buff != tmp_buff)
3643 my_free(buff);
3644 _ma_check_print_error(param,"Can't copy %s to tempfile, error %d",
3645 type,my_errno);
3646 DBUG_RETURN(1);
3647}
3648
3649
3650/*
3651 Repair table or given index using sorting
3652
3653 SYNOPSIS
3654 maria_repair_by_sort()
3655 param Repair parameters
3656 info MARIA handler to repair
3657 name Name of table (for warnings)
3658 rep_quick set to <> 0 if we should not change data file
3659
3660 RESULT
3661 0 ok
3662 <>0 Error
3663*/
3664
3665int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
3666 const char * name, my_bool rep_quick)
3667{
3668 int got_error;
3669 uint i, keys_to_repair;
3670 ha_rows start_records;
3671 my_off_t new_header_length, org_header_length, del;
3672 File new_file;
3673 MARIA_SORT_PARAM sort_param;
3674 MARIA_SHARE *share= info->s;
3675 HA_KEYSEG *keyseg;
3676 double *rec_per_key_part;
3677 char llbuff[22];
3678 MARIA_SORT_INFO sort_info;
3679 ulonglong UNINIT_VAR(key_map);
3680 myf sync_dir= ((share->now_transactional && !share->temporary) ?
3681 MY_SYNC_DIR : 0);
3682 my_bool scan_inited= 0, reenable_logging= 0;
3683 MARIA_SHARE backup_share;
3684 DBUG_ENTER("maria_repair_by_sort");
3685
3686 got_error= 1;
3687 new_file= -1;
3688 start_records= share->state.state.records;
3689 if (!(param->testflag & T_SILENT))
3690 {
3691 printf("- recovering (with sort) Aria-table '%s'\n",name);
3692 printf("Data records: %s\n", llstr(start_records,llbuff));
3693 }
3694
3695 if (initialize_variables_for_repair(param, &sort_info, &sort_param, info,
3696 rep_quick, &backup_share))
3697 goto err;
3698
3699 if ((reenable_logging= share->now_transactional))
3700 _ma_tmp_disable_logging_for_table(info, 0);
3701
3702 org_header_length= share->pack.header_length;
3703 new_header_length= (param->testflag & T_UNPACK) ? 0 : org_header_length;
3704 sort_param.filepos= new_header_length;
3705
3706 if (!rep_quick)
3707 {
3708 /* Get real path for data file */
3709 if ((new_file=mysql_file_create(key_file_tmp,
3710 fn_format(param->temp_filename,
3711 share->data_file_name.str, "",
3712 DATA_TMP_EXT, 2+4),
3713 0,param->tmpfile_createflag,
3714 MYF(0))) < 0)
3715 {
3716 _ma_check_print_error(param,"Can't create new tempfile: '%s'",
3717 param->temp_filename);
3718 goto err;
3719 }
3720 if (new_header_length &&
3721 maria_filecopy(param, new_file, info->dfile.file, 0L,
3722 new_header_length, "datafile-header"))
3723 goto err;
3724
3725 share->state.dellink= HA_OFFSET_ERROR;
3726 info->rec_cache.file= new_file; /* For sort_delete_record */
3727 if (share->data_file_type == BLOCK_RECORD ||
3728 (param->testflag & T_UNPACK))
3729 {
3730 if (create_new_data_handle(&sort_param, new_file))
3731 goto err;
3732 sort_info.new_info->rec_cache.file= new_file;
3733 }
3734 }
3735
3736 if (!(sort_info.key_block=
3737 alloc_key_blocks(param,
3738 (uint) param->sort_key_blocks,
3739 share->base.max_key_block_length)))
3740 goto err;
3741 sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks;
3742
3743 if (share->data_file_type != BLOCK_RECORD)
3744 {
3745 /* We need a read buffer to read rows in big blocks */
3746 if (init_io_cache(&param->read_cache, info->dfile.file,
3747 (uint) param->read_buffer_length,
3748 READ_CACHE, org_header_length, 1, MYF(MY_WME)))
3749 goto err;
3750 }
3751 if (sort_info.new_info->s->data_file_type != BLOCK_RECORD)
3752 {
3753 /* When writing to not block records, we need a write buffer */
3754 if (!rep_quick)
3755 {
3756 if (init_io_cache(&sort_info.new_info->rec_cache, new_file,
3757 (uint) param->write_buffer_length,
3758 WRITE_CACHE, new_header_length, 1,
3759 MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
3760 goto err;
3761 sort_info.new_info->opt_flag|= WRITE_CACHE_USED;
3762 }
3763 }
3764
3765 if (!(sort_param.record=
3766 (uchar*) my_malloc((size_t) share->base.default_rec_buff_size,
3767 MYF(0))) ||
3768 _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size,
3769 share->base.default_rec_buff_size))
3770 {
3771 _ma_check_print_error(param, "Not enough memory for extra record");
3772 goto err;
3773 }
3774
3775 /* Optionally drop indexes and optionally modify the key_map */
3776 maria_drop_all_indexes(param, info, FALSE);
3777 key_map= share->state.key_map;
3778 if (param->testflag & T_CREATE_MISSING_KEYS)
3779 {
3780 /* Invert the copied key_map to recreate all disabled indexes. */
3781 key_map= ~key_map;
3782 }
3783
3784 param->read_cache.end_of_file= sort_info.filelength;
3785 sort_param.wordlist=NULL;
3786 init_alloc_root(&sort_param.wordroot, "sort", FTPARSER_MEMROOT_ALLOC_SIZE, 0,
3787 MYF(param->malloc_flags));
3788
3789 sort_param.key_cmp=sort_key_cmp;
3790 sort_param.lock_in_memory=maria_lock_memory;
3791 sort_param.tmpdir=param->tmpdir;
3792 sort_param.master =1;
3793
3794 del=share->state.state.del;
3795
3796 /* Calculate number of keys to repair */
3797 keys_to_repair= 0;
3798 for (sort_param.key=0 ; sort_param.key < share->base.keys ;
3799 sort_param.key++)
3800 {
3801 if (maria_is_key_active(key_map, sort_param.key))
3802 keys_to_repair++;
3803 }
3804 /* For each key we scan and merge sort the keys */
3805 param->max_stage= keys_to_repair*2;
3806
3807 rec_per_key_part= param->new_rec_per_key_part;
3808 for (sort_param.key=0 ; sort_param.key < share->base.keys ;
3809 rec_per_key_part+=sort_param.keyinfo->keysegs, sort_param.key++)
3810 {
3811 sort_param.keyinfo=share->keyinfo+sort_param.key;
3812 /*
3813 Skip this index if it is marked disabled in the copied
3814 (and possibly inverted) key_map.
3815 */
3816 if (! maria_is_key_active(key_map, sort_param.key))
3817 {
3818 /* Remember old statistics for key */
3819 memcpy((char*) rec_per_key_part,
3820 (char*) (share->state.rec_per_key_part +
3821 (uint) (rec_per_key_part - param->new_rec_per_key_part)),
3822 sort_param.keyinfo->keysegs*sizeof(*rec_per_key_part));
3823 DBUG_PRINT("repair", ("skipping seemingly disabled index #: %u",
3824 sort_param.key));
3825 continue;
3826 }
3827
3828 if ((!(param->testflag & T_SILENT)))
3829 printf ("- Fixing index %d\n",sort_param.key+1);
3830
3831 sort_param.read_cache=param->read_cache;
3832 sort_param.seg=sort_param.keyinfo->seg;
3833 sort_param.max_pos= sort_param.pos= org_header_length;
3834 keyseg=sort_param.seg;
3835 bzero((char*) sort_param.unique,sizeof(sort_param.unique));
3836 sort_param.key_length=share->rec_reflength;
3837 for (i=0 ; keyseg[i].type != HA_KEYTYPE_END; i++)
3838 {
3839 sort_param.key_length+=keyseg[i].length;
3840 if (keyseg[i].flag & HA_SPACE_PACK)
3841 sort_param.key_length+=get_pack_length(keyseg[i].length);
3842 if (keyseg[i].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
3843 sort_param.key_length+= 2 + MY_TEST(keyseg[i].length >= 127);
3844 if (keyseg[i].flag & HA_NULL_PART)
3845 sort_param.key_length++;
3846 }
3847 share->state.state.records=share->state.state.del=share->state.split=0;
3848 share->state.state.empty=0;
3849
3850 if (sort_param.keyinfo->flag & HA_FULLTEXT)
3851 {
3852 uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
3853 sort_param.keyinfo->seg->charset->mbmaxlen;
3854 sort_param.key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
3855 /*
3856 fulltext indexes may have much more entries than the
3857 number of rows in the table. We estimate the number here.
3858
3859 Note, built-in parser is always nr. 0 - see ftparser_call_initializer()
3860 */
3861 if (sort_param.keyinfo->ftkey_nr == 0)
3862 {
3863 /*
3864 for built-in parser the number of generated index entries
3865 cannot be larger than the size of the data file divided
3866 by the minimal word's length
3867 */
3868 sort_info.max_records=
3869 (ha_rows) (sort_info.filelength/ft_min_word_len+1);
3870 }
3871 else
3872 {
3873 /*
3874 for external plugin parser we cannot tell anything at all :(
3875 so, we'll use all the sort memory and start from ~10 buffpeks.
3876 (see _ma_create_index_by_sort)
3877 */
3878 sort_info.max_records=
3879 10*param->sort_buffer_length/sort_param.key_length;
3880 }
3881
3882 sort_param.key_read= sort_maria_ft_key_read;
3883 sort_param.key_write= sort_maria_ft_key_write;
3884 }
3885 else
3886 {
3887 sort_param.key_read= sort_key_read;
3888 sort_param.key_write= sort_key_write;
3889 }
3890
3891 if (sort_info.new_info->s->data_file_type == BLOCK_RECORD)
3892 {
3893 scan_inited= 1;
3894 if (maria_scan_init(sort_info.info))
3895 goto err;
3896 }
3897 if (_ma_create_index_by_sort(&sort_param,
3898 (my_bool) (!(param->testflag & T_VERBOSE)),
3899 (size_t) param->sort_buffer_length))
3900 {
3901 if ((param->testflag & T_CREATE_UNIQUE_BY_SORT) && sort_param.sort_info->dupp)
3902 share->state.dupp_key= sort_param.key;
3903 else
3904 param->retry_repair= 1;
3905 _ma_check_print_error(param, "Create index by sort failed");
3906 goto err;
3907 }
3908 DBUG_EXECUTE_IF("maria_flush_whole_log",
3909 {
3910 DBUG_PRINT("maria_flush_whole_log", ("now"));
3911 translog_flush(translog_get_horizon());
3912 });
3913 DBUG_EXECUTE_IF("maria_crash_create_index_by_sort",
3914 {
3915 DBUG_PRINT("maria_crash_create_index_by_sort", ("now"));
3916 DBUG_SUICIDE();
3917 });
3918 if (scan_inited)
3919 {
3920 scan_inited= 0;
3921 maria_scan_end(sort_info.info);
3922 }
3923
3924 /* No need to calculate checksum again. */
3925 sort_param.calc_checksum= 0;
3926 free_root(&sort_param.wordroot, MYF(0));
3927
3928 /* Set for next loop */
3929 sort_info.max_records= (ha_rows) sort_info.new_info->s->state.state.records;
3930 param->stage++; /* Next stage */
3931 param->progress= 0;
3932
3933 if (param->testflag & T_STATISTICS)
3934 maria_update_key_parts(sort_param.keyinfo, rec_per_key_part,
3935 sort_param.unique,
3936 (param->stats_method ==
3937 MI_STATS_METHOD_IGNORE_NULLS ?
3938 sort_param.notnull : NULL),
3939 (ulonglong) share->state.state.records);
3940 maria_set_key_active(share->state.key_map, sort_param.key);
3941 DBUG_PRINT("repair", ("set enabled index #: %u", sort_param.key));
3942
3943 if (_ma_flush_table_files_before_swap(param, info))
3944 goto err;
3945
3946 if (sort_param.fix_datafile)
3947 {
3948 param->read_cache.end_of_file=sort_param.filepos;
3949 if (maria_write_data_suffix(&sort_info,1) ||
3950 end_io_cache(&sort_info.new_info->rec_cache))
3951 {
3952 _ma_check_print_error(param, "Got error when flushing row cache");
3953 goto err;
3954 }
3955 sort_info.new_info->opt_flag&= ~WRITE_CACHE_USED;
3956
3957 if (param->testflag & T_SAFE_REPAIR)
3958 {
3959 /* Don't repair if we loosed more than one row */
3960 if (sort_info.new_info->s->state.state.records+1 < start_records)
3961 {
3962 _ma_check_print_error(param,
3963 "Rows lost (Found %lu of %lu); Aborting "
3964 "because safe repair was requested",
3965 (ulong) sort_info.new_info->s->
3966 state.state.records,
3967 (ulong) start_records);
3968 share->state.state.records=start_records;
3969 goto err;
3970 }
3971 }
3972
3973 sort_info.new_info->s->state.state.data_file_length= sort_param.filepos;
3974 if (sort_info.new_info != sort_info.info)
3975 {
3976 MARIA_STATE_INFO save_state= sort_info.new_info->s->state;
3977 if (maria_close(sort_info.new_info))
3978 {
3979 _ma_check_print_error(param, "Got error %d on close", my_errno);
3980 goto err;
3981 }
3982 copy_data_file_state(&share->state, &save_state);
3983 new_file= -1;
3984 sort_info.new_info= info;
3985 info->rec_cache.file= info->dfile.file;
3986 }
3987
3988 share->state.version=(ulong) time((time_t*) 0); /* Force reopen */
3989
3990 /* Replace the actual file with the temporary file */
3991 if (new_file >= 0)
3992 {
3993 mysql_file_close(new_file, MYF(MY_WME));
3994 new_file= -1;
3995 }
3996 change_data_file_descriptor(info, -1);
3997 if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
3998 DATA_TMP_EXT, param->backup_time,
3999 (param->testflag & T_BACKUP_DATA ?
4000 MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
4001 sync_dir) ||
4002 _ma_open_datafile(info, share))
4003 {
4004 _ma_check_print_error(param, "Couldn't change to new data file");
4005 goto err;
4006 }
4007 if (param->testflag & T_UNPACK)
4008 restore_data_file_type(share);
4009
4010 org_header_length= share->pack.header_length;
4011 sort_info.org_data_file_type= share->data_file_type;
4012 sort_info.filelength= share->state.state.data_file_length;
4013 sort_param.fix_datafile=0;
4014
4015 /* Offsets are now in proportion to the new file length */
4016 param->max_progress= sort_info.filelength;
4017
4018 }
4019 else
4020 share->state.state.data_file_length=sort_param.max_pos;
4021
4022 param->read_cache.file= info->dfile.file; /* re-init read cache */
4023 if (share->data_file_type != BLOCK_RECORD)
4024 reinit_io_cache(&param->read_cache, READ_CACHE,
4025 share->pack.header_length, 1, 1);
4026 }
4027
4028 if (param->testflag & T_WRITE_LOOP)
4029 {
4030 fputs(" \r",stdout);
4031 fflush(stdout);
4032 }
4033
4034 if (rep_quick && del+sort_info.dupp != share->state.state.del)
4035 {
4036 _ma_check_print_error(param,"Couldn't fix table with quick recovery: "
4037 "Found wrong number of deleted records");
4038 _ma_check_print_error(param,"Run recovery again without -q");
4039 got_error=1;
4040 param->retry_repair=1;
4041 param->testflag|=T_RETRY_WITHOUT_QUICK;
4042 goto err;
4043 }
4044
4045 if (rep_quick && (param->testflag & T_FORCE_UNIQUENESS))
4046 {
4047 my_off_t skr= share->state.state.data_file_length +
4048 ((sort_info.org_data_file_type == COMPRESSED_RECORD) ?
4049 MEMMAP_EXTRA_MARGIN : 0);
4050#ifdef USE_RELOC
4051 if (sort_info.org_data_file_type == STATIC_RECORD &&
4052 skr < share->base.reloc*share->base.min_pack_length)
4053 skr=share->base.reloc*share->base.min_pack_length;
4054#endif
4055 if (skr != sort_info.filelength)
4056 if (mysql_file_chsize(info->dfile.file, skr, 0, MYF(0)))
4057 _ma_check_print_warning(param,
4058 "Can't change size of datafile, error: %d",
4059 my_errno);
4060 }
4061
4062 if (param->testflag & T_CALC_CHECKSUM)
4063 share->state.state.checksum=param->glob_crc;
4064
4065 if (mysql_file_chsize(share->kfile.file,
4066 share->state.state.key_file_length, 0, MYF(0)))
4067 _ma_check_print_warning(param,
4068 "Can't change size of indexfile, error: %d",
4069 my_errno);
4070
4071 if (!(param->testflag & T_SILENT))
4072 {
4073 if (start_records != share->state.state.records)
4074 printf("Data records: %s\n", llstr(share->state.state.records,llbuff));
4075 }
4076 if (sort_info.dupp)
4077 _ma_check_print_warning(param,
4078 "%s records have been removed",
4079 llstr(sort_info.dupp,llbuff));
4080 got_error=0;
4081 /* If invoked by external program that uses thr_lock */
4082 if (&share->state.state != info->state)
4083 *info->state= *info->state_start= share->state.state;
4084
4085err:
4086 if (scan_inited)
4087 maria_scan_end(sort_info.info);
4088 _ma_reset_state(info);
4089
4090 end_io_cache(&sort_info.new_info->rec_cache);
4091 end_io_cache(&param->read_cache);
4092 info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4093 sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4094 if (got_error)
4095 {
4096 if (! param->error_printed)
4097 _ma_check_print_error(param,"%d when fixing table",my_errno);
4098 (void)_ma_flush_table_files_before_swap(param, info);
4099 if (sort_info.new_info && sort_info.new_info != sort_info.info)
4100 {
4101 unuse_data_file_descriptor(sort_info.new_info);
4102 maria_close(sort_info.new_info);
4103 }
4104 if (new_file >= 0)
4105 {
4106 mysql_file_close(new_file, MYF(0));
4107 mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME));
4108 }
4109 maria_mark_crashed_on_repair(info);
4110 }
4111 else
4112 {
4113 if (key_map == share->state.key_map)
4114 share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS;
4115 /*
4116 Now that we have flushed and forced everything, we can bump
4117 create_rename_lsn:
4118 */
4119 DBUG_EXECUTE_IF("maria_flush_whole_log",
4120 {
4121 DBUG_PRINT("maria_flush_whole_log", ("now"));
4122 translog_flush(translog_get_horizon());
4123 });
4124 DBUG_EXECUTE_IF("maria_crash_repair",
4125 {
4126 DBUG_PRINT("maria_crash_repair", ("now"));
4127 DBUG_SUICIDE();
4128 });
4129 }
4130 share->state.changed|= STATE_NOT_SORTED_PAGES;
4131 if (!rep_quick)
4132 share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_ZEROFILLED |
4133 STATE_NOT_MOVABLE);
4134
4135 /* If caller had disabled logging it's not up to us to re-enable it */
4136 if (reenable_logging)
4137 _ma_reenable_logging_for_table(info, FALSE);
4138 restore_table_state_after_repair(info, &backup_share);
4139
4140 my_free(sort_param.rec_buff);
4141 my_free(sort_param.record);
4142 my_free(sort_info.key_block);
4143 my_free(sort_info.ft_buf);
4144 my_free(sort_info.buff);
4145 DBUG_RETURN(got_error);
4146}
4147
4148
4149/*
4150 Threaded repair of table using sorting
4151
4152 SYNOPSIS
4153 maria_repair_parallel()
4154 param Repair parameters
4155 info MARIA handler to repair
4156 name Name of table (for warnings)
4157 rep_quick set to <> 0 if we should not change data file
4158
4159 DESCRIPTION
4160 Same as maria_repair_by_sort but do it multithreaded
4161 Each key is handled by a separate thread.
4162 TODO: make a number of threads a parameter
4163
4164 In parallel repair we use one thread per index. There are two modes:
4165
4166 Quick
4167
4168 Only the indexes are rebuilt. All threads share a read buffer.
4169 Every thread that needs fresh data in the buffer enters the shared
4170 cache lock. The last thread joining the lock reads the buffer from
4171 the data file and wakes all other threads.
4172
4173 Non-quick
4174
4175 The data file is rebuilt and all indexes are rebuilt to point to
4176 the new record positions. One thread is the master thread. It
4177 reads from the old data file and writes to the new data file. It
4178 also creates one of the indexes. The other threads read from a
4179 buffer which is filled by the master. If they need fresh data,
4180 they enter the shared cache lock. If the masters write buffer is
4181 full, it flushes it to the new data file and enters the shared
4182 cache lock too. When all threads joined in the lock, the master
4183 copies its write buffer to the read buffer for the other threads
4184 and wakes them.
4185
4186 RESULT
4187 0 ok
4188 <>0 Error
4189*/
4190
4191int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info,
4192 const char * name, my_bool rep_quick)
4193{
4194 int got_error;
4195 uint i,key, total_key_length, istep;
4196 ha_rows start_records;
4197 my_off_t new_header_length,del;
4198 File new_file;
4199 MARIA_SORT_PARAM *sort_param=0, tmp_sort_param;
4200 MARIA_SHARE *share= info->s;
4201 double *rec_per_key_part;
4202 HA_KEYSEG *keyseg;
4203 char llbuff[22];
4204 IO_CACHE new_data_cache; /* For non-quick repair. */
4205 IO_CACHE_SHARE io_share;
4206 MARIA_SORT_INFO sort_info;
4207 MARIA_SHARE backup_share;
4208 ulonglong UNINIT_VAR(key_map);
4209 pthread_attr_t thr_attr;
4210 myf sync_dir= ((share->now_transactional && !share->temporary) ?
4211 MY_SYNC_DIR : 0);
4212 my_bool reenable_logging= 0;
4213 DBUG_ENTER("maria_repair_parallel");
4214
4215 got_error= 1;
4216 new_file= -1;
4217 start_records= share->state.state.records;
4218 if (!(param->testflag & T_SILENT))
4219 {
4220 printf("- parallel recovering (with sort) Aria-table '%s'\n",name);
4221 printf("Data records: %s\n", llstr(start_records, llbuff));
4222 }
4223
4224 bzero(&new_data_cache, sizeof(new_data_cache));
4225 if (initialize_variables_for_repair(param, &sort_info, &tmp_sort_param, info,
4226 rep_quick, &backup_share))
4227 goto err;
4228
4229 if ((reenable_logging= share->now_transactional))
4230 _ma_tmp_disable_logging_for_table(info, 0);
4231
4232 new_header_length= ((param->testflag & T_UNPACK) ? 0 :
4233 share->pack.header_length);
4234
4235 /*
4236 Quick repair (not touching data file, rebuilding indexes):
4237 {
4238 Read cache is (HA_CHECK *param)->read_cache using info->dfile.file.
4239 }
4240
4241 Non-quick repair (rebuilding data file and indexes):
4242 {
4243 Master thread:
4244
4245 Read cache is (HA_CHECK *param)->read_cache using info->dfile.file.
4246 Write cache is (MARIA_INFO *info)->rec_cache using new_file.
4247
4248 Slave threads:
4249
4250 Read cache is new_data_cache synced to master rec_cache.
4251
4252 The final assignment of the filedescriptor for rec_cache is done
4253 after the cache creation.
4254
4255 Don't check file size on new_data_cache, as the resulting file size
4256 is not known yet.
4257
4258 As rec_cache and new_data_cache are synced, write_buffer_length is
4259 used for the read cache 'new_data_cache'. Both start at the same
4260 position 'new_header_length'.
4261 }
4262 */
4263 DBUG_PRINT("info", ("is quick repair: %d", (int) rep_quick));
4264 if (!rep_quick)
4265 my_b_clear(&new_data_cache);
4266
4267 /* Initialize pthread structures before goto err. */
4268 mysql_mutex_init(key_SORT_INFO_mutex, &sort_info.mutex, MY_MUTEX_INIT_FAST);
4269 mysql_cond_init(key_SORT_INFO_cond, &sort_info.cond, 0);
4270
4271 if (!(sort_info.key_block=
4272 alloc_key_blocks(param, (uint) param->sort_key_blocks,
4273 share->base.max_key_block_length)))
4274 goto err;
4275
4276 if (init_io_cache(&param->read_cache, info->dfile.file,
4277 (uint) param->read_buffer_length,
4278 READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)))
4279 goto err;
4280
4281 sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks;
4282 info->opt_flag|=WRITE_CACHE_USED;
4283 info->rec_cache.file= info->dfile.file; /* for sort_delete_record */
4284
4285 if (!rep_quick)
4286 {
4287 /* Get real path for data file */
4288 if ((new_file= mysql_file_create(key_file_tmp,
4289 fn_format(param->temp_filename,
4290 share->data_file_name.str, "",
4291 DATA_TMP_EXT,
4292 2+4),
4293 0,param->tmpfile_createflag,
4294 MYF(0))) < 0)
4295 {
4296 _ma_check_print_error(param,"Can't create new tempfile: '%s'",
4297 param->temp_filename);
4298 goto err;
4299 }
4300 if (new_header_length &&
4301 maria_filecopy(param, new_file, info->dfile.file,0L,new_header_length,
4302 "datafile-header"))
4303 goto err;
4304 if (param->testflag & T_UNPACK)
4305 restore_data_file_type(share);
4306 share->state.dellink= HA_OFFSET_ERROR;
4307
4308 if (init_io_cache(&new_data_cache, -1,
4309 (uint) param->write_buffer_length,
4310 READ_CACHE, new_header_length, 1,
4311 MYF(MY_WME | MY_DONT_CHECK_FILESIZE)))
4312 goto err;
4313
4314 if (init_io_cache(&info->rec_cache, new_file,
4315 (uint) param->write_buffer_length,
4316 WRITE_CACHE, new_header_length, 1,
4317 MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
4318 goto err;
4319
4320 }
4321
4322 /* Optionally drop indexes and optionally modify the key_map. */
4323 maria_drop_all_indexes(param, info, FALSE);
4324 key_map= share->state.key_map;
4325 if (param->testflag & T_CREATE_MISSING_KEYS)
4326 {
4327 /* Invert the copied key_map to recreate all disabled indexes. */
4328 key_map= ~key_map;
4329 }
4330
4331 param->read_cache.end_of_file= sort_info.filelength;
4332
4333 /*
4334 +1 below is required hack for parallel repair mode.
4335 The share->state.state.records value, that is compared later
4336 to sort_info.max_records and cannot exceed it, is
4337 increased in sort_key_write. In maria_repair_by_sort, sort_key_write
4338 is called after sort_key_read, where the comparison is performed,
4339 but in parallel mode master thread can call sort_key_write
4340 before some other repair thread calls sort_key_read.
4341 Furthermore I'm not even sure +1 would be enough.
4342 May be sort_info.max_records shold be always set to max value in
4343 parallel mode.
4344 */
4345 sort_info.max_records++;
4346
4347 del=share->state.state.del;
4348
4349 if (!(sort_param=(MARIA_SORT_PARAM *)
4350 my_malloc((uint) share->base.keys *
4351 (sizeof(MARIA_SORT_PARAM) + share->base.pack_reclength),
4352 MYF(MY_ZEROFILL))))
4353 {
4354 _ma_check_print_error(param,"Not enough memory for key!");
4355 goto err;
4356 }
4357 total_key_length=0;
4358 rec_per_key_part= param->new_rec_per_key_part;
4359 share->state.state.records=share->state.state.del=share->state.split=0;
4360 share->state.state.empty=0;
4361
4362 for (i=key=0, istep=1 ; key < share->base.keys ;
4363 rec_per_key_part+=sort_param[i].keyinfo->keysegs, i+=istep, key++)
4364 {
4365 sort_param[i].key=key;
4366 sort_param[i].keyinfo=share->keyinfo+key;
4367 sort_param[i].seg=sort_param[i].keyinfo->seg;
4368 /*
4369 Skip this index if it is marked disabled in the copied
4370 (and possibly inverted) key_map.
4371 */
4372 if (! maria_is_key_active(key_map, key))
4373 {
4374 /* Remember old statistics for key */
4375 memcpy((char*) rec_per_key_part,
4376 (char*) (share->state.rec_per_key_part+
4377 (uint) (rec_per_key_part - param->new_rec_per_key_part)),
4378 sort_param[i].keyinfo->keysegs*sizeof(*rec_per_key_part));
4379 istep=0;
4380 continue;
4381 }
4382 istep=1;
4383 if ((!(param->testflag & T_SILENT)))
4384 printf ("- Fixing index %d\n",key+1);
4385 if (sort_param[i].keyinfo->flag & HA_FULLTEXT)
4386 {
4387 sort_param[i].key_read=sort_maria_ft_key_read;
4388 sort_param[i].key_write=sort_maria_ft_key_write;
4389 }
4390 else
4391 {
4392 sort_param[i].key_read=sort_key_read;
4393 sort_param[i].key_write=sort_key_write;
4394 }
4395 sort_param[i].key_cmp=sort_key_cmp;
4396 sort_param[i].lock_in_memory=maria_lock_memory;
4397 sort_param[i].tmpdir=param->tmpdir;
4398 sort_param[i].sort_info=&sort_info;
4399 sort_param[i].master=0;
4400 sort_param[i].fix_datafile=0;
4401 sort_param[i].calc_checksum= 0;
4402
4403 sort_param[i].filepos=new_header_length;
4404 sort_param[i].max_pos=sort_param[i].pos=share->pack.header_length;
4405
4406 sort_param[i].record= (((uchar *)(sort_param+share->base.keys))+
4407 (share->base.pack_reclength * i));
4408 if (_ma_alloc_buffer(&sort_param[i].rec_buff, &sort_param[i].rec_buff_size,
4409 share->base.default_rec_buff_size))
4410 {
4411 _ma_check_print_error(param,"Not enough memory!");
4412 goto err;
4413 }
4414 sort_param[i].key_length=share->rec_reflength;
4415 for (keyseg=sort_param[i].seg; keyseg->type != HA_KEYTYPE_END;
4416 keyseg++)
4417 {
4418 sort_param[i].key_length+=keyseg->length;
4419 if (keyseg->flag & HA_SPACE_PACK)
4420 sort_param[i].key_length+=get_pack_length(keyseg->length);
4421 if (keyseg->flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
4422 sort_param[i].key_length+= 2 + MY_TEST(keyseg->length >= 127);
4423 if (keyseg->flag & HA_NULL_PART)
4424 sort_param[i].key_length++;
4425 }
4426 total_key_length+=sort_param[i].key_length;
4427
4428 if (sort_param[i].keyinfo->flag & HA_FULLTEXT)
4429 {
4430 uint ft_max_word_len_for_sort=
4431 (FT_MAX_WORD_LEN_FOR_SORT *
4432 sort_param[i].keyinfo->seg->charset->mbmaxlen);
4433 sort_param[i].key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
4434 init_alloc_root(&sort_param[i].wordroot, "sort",
4435 FTPARSER_MEMROOT_ALLOC_SIZE, 0,
4436 MYF(param->malloc_flags));
4437 }
4438 }
4439 sort_info.total_keys=i;
4440 sort_param[0].master= 1;
4441 sort_param[0].fix_datafile= ! rep_quick;
4442 sort_param[0].calc_checksum= MY_TEST(param->testflag & T_CALC_CHECKSUM);
4443
4444 if (!maria_ftparser_alloc_param(info))
4445 goto err;
4446
4447 sort_info.got_error=0;
4448 mysql_mutex_lock(&sort_info.mutex);
4449
4450 /*
4451 Initialize the I/O cache share for use with the read caches and, in
4452 case of non-quick repair, the write cache. When all threads join on
4453 the cache lock, the writer copies the write cache contents to the
4454 read caches.
4455 */
4456 if (i > 1)
4457 {
4458 if (rep_quick)
4459 init_io_cache_share(&param->read_cache, &io_share, NULL, i);
4460 else
4461 init_io_cache_share(&new_data_cache, &io_share, &info->rec_cache, i);
4462 }
4463 else
4464 io_share.total_threads= 0; /* share not used */
4465
4466 (void) pthread_attr_init(&thr_attr);
4467 (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED);
4468
4469 for (i=0 ; i < sort_info.total_keys ; i++)
4470 {
4471 /*
4472 Copy the properly initialized IO_CACHE structure so that every
4473 thread has its own copy. In quick mode param->read_cache is shared
4474 for use by all threads. In non-quick mode all threads but the
4475 first copy the shared new_data_cache, which is synchronized to the
4476 write cache of the first thread. The first thread copies
4477 param->read_cache, which is not shared.
4478 */
4479 sort_param[i].read_cache= ((rep_quick || !i) ? param->read_cache :
4480 new_data_cache);
4481 DBUG_PRINT("io_cache_share", ("thread: %u read_cache: %p",
4482 i, &sort_param[i].read_cache));
4483
4484 /*
4485 two approaches: the same amount of memory for each thread
4486 or the memory for the same number of keys for each thread...
4487 In the second one all the threads will fill their sort_buffers
4488 (and call write_keys) at the same time, putting more stress on i/o.
4489 */
4490 sort_param[i].sortbuff_size=
4491#ifndef USING_SECOND_APPROACH
4492 param->sort_buffer_length/sort_info.total_keys;
4493#else
4494 param->sort_buffer_length*sort_param[i].key_length/total_key_length;
4495#endif
4496 if (mysql_thread_create(key_thread_find_all_keys,
4497 &sort_param[i].thr, &thr_attr,
4498 _ma_thr_find_all_keys, (void *) (sort_param+i)))
4499 {
4500 _ma_check_print_error(param,"Cannot start a repair thread");
4501 /* Cleanup: Detach from the share. Avoid others to be blocked. */
4502 if (io_share.total_threads)
4503 remove_io_thread(&sort_param[i].read_cache);
4504 DBUG_PRINT("error", ("Cannot start a repair thread"));
4505 sort_info.got_error=1;
4506 }
4507 else
4508 sort_info.threads_running++;
4509 }
4510 (void) pthread_attr_destroy(&thr_attr);
4511
4512 /* waiting for all threads to finish */
4513 while (sort_info.threads_running)
4514 mysql_cond_wait(&sort_info.cond, &sort_info.mutex);
4515 mysql_mutex_unlock(&sort_info.mutex);
4516
4517 if ((got_error= _ma_thr_write_keys(sort_param)))
4518 {
4519 param->retry_repair=1;
4520 goto err;
4521 }
4522 got_error=1; /* Assume the following may go wrong */
4523
4524 if (_ma_flush_table_files_before_swap(param, info))
4525 goto err;
4526
4527 if (sort_param[0].fix_datafile)
4528 {
4529 /*
4530 Append some nulls to the end of a memory mapped file. Destroy the
4531 write cache. The master thread did already detach from the share
4532 by remove_io_thread() in sort.c:thr_find_all_keys().
4533 */
4534 if (maria_write_data_suffix(&sort_info,1) ||
4535 end_io_cache(&info->rec_cache))
4536 goto err;
4537 if (param->testflag & T_SAFE_REPAIR)
4538 {
4539 /* Don't repair if we loosed more than one row */
4540 if (sort_info.new_info->s->state.state.records+1 < start_records)
4541 {
4542 _ma_check_print_error(param,
4543 "Rows lost (Found %lu of %lu); Aborting "
4544 "because safe repair was requested",
4545 (ulong) share->state.state.records,
4546 (ulong) start_records);
4547 share->state.state.records=start_records;
4548 goto err;
4549 }
4550 }
4551 share->state.state.data_file_length= sort_param->filepos;
4552 /* Only whole records */
4553 share->state.version= (ulong) time((time_t*) 0);
4554 /*
4555 Exchange the data file descriptor of the table, so that we use the
4556 new file from now on.
4557 */
4558 mysql_file_close(info->dfile.file, MYF(0));
4559 info->dfile.file= new_file;
4560 share->pack.header_length=(ulong) new_header_length;
4561 }
4562 else
4563 share->state.state.data_file_length=sort_param->max_pos;
4564
4565 if (rep_quick && del+sort_info.dupp != share->state.state.del)
4566 {
4567 _ma_check_print_error(param,"Couldn't fix table with quick recovery: "
4568 "Found wrong number of deleted records");
4569 _ma_check_print_error(param,"Run recovery again without -q");
4570 param->retry_repair=1;
4571 param->testflag|=T_RETRY_WITHOUT_QUICK;
4572 goto err;
4573 }
4574
4575 if (rep_quick && (param->testflag & T_FORCE_UNIQUENESS))
4576 {
4577 my_off_t skr= share->state.state.data_file_length +
4578 ((sort_info.org_data_file_type == COMPRESSED_RECORD) ?
4579 MEMMAP_EXTRA_MARGIN : 0);
4580#ifdef USE_RELOC
4581 if (sort_info.org_data_file_type == STATIC_RECORD &&
4582 skr < share->base.reloc*share->base.min_pack_length)
4583 skr=share->base.reloc*share->base.min_pack_length;
4584#endif
4585 if (skr != sort_info.filelength)
4586 if (mysql_file_chsize(info->dfile.file, skr, 0, MYF(0)))
4587 _ma_check_print_warning(param,
4588 "Can't change size of datafile, error: %d",
4589 my_errno);
4590 }
4591 if (param->testflag & T_CALC_CHECKSUM)
4592 share->state.state.checksum=param->glob_crc;
4593
4594 if (mysql_file_chsize(share->kfile.file,
4595 share->state.state.key_file_length, 0, MYF(0)))
4596 _ma_check_print_warning(param,
4597 "Can't change size of indexfile, error: %d",
4598 my_errno);
4599
4600 if (!(param->testflag & T_SILENT))
4601 {
4602 if (start_records != share->state.state.records)
4603 printf("Data records: %s\n", llstr(share->state.state.records,llbuff));
4604 }
4605 if (sort_info.dupp)
4606 _ma_check_print_warning(param,
4607 "%s records have been removed",
4608 llstr(sort_info.dupp,llbuff));
4609 got_error=0;
4610 /* If invoked by external program that uses thr_lock */
4611 if (&share->state.state != info->state)
4612 *info->state= *info->state_start= share->state.state;
4613
4614err:
4615 _ma_reset_state(info);
4616
4617 /*
4618 Destroy the write cache. The master thread did already detach from
4619 the share by remove_io_thread() or it was not yet started (if the
4620 error happend before creating the thread).
4621 */
4622 end_io_cache(&sort_info.new_info->rec_cache);
4623 end_io_cache(&param->read_cache);
4624 info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4625 sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4626 /*
4627 Destroy the new data cache in case of non-quick repair. All slave
4628 threads did either detach from the share by remove_io_thread()
4629 already or they were not yet started (if the error happend before
4630 creating the threads).
4631 */
4632 if (!rep_quick && my_b_inited(&new_data_cache))
4633 end_io_cache(&new_data_cache);
4634 if (!got_error)
4635 {
4636 /* Replace the actual file with the temporary file */
4637 if (new_file >= 0)
4638 {
4639 mysql_file_close(new_file,MYF(0));
4640 info->dfile.file= new_file= -1;
4641 if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
4642 DATA_TMP_EXT, param->backup_time,
4643 MYF((param->testflag & T_BACKUP_DATA ?
4644 MY_REDEL_MAKE_BACKUP : 0) |
4645 sync_dir)) ||
4646 _ma_open_datafile(info,share))
4647 got_error=1;
4648 }
4649 }
4650 if (got_error)
4651 {
4652 if (! param->error_printed)
4653 _ma_check_print_error(param,"%d when fixing table",my_errno);
4654 (void)_ma_flush_table_files_before_swap(param, info);
4655 if (new_file >= 0)
4656 {
4657 mysql_file_close(new_file,MYF(0));
4658 mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME));
4659 if (info->dfile.file == new_file)
4660 info->dfile.file= -1;
4661 }
4662 maria_mark_crashed_on_repair(info);
4663 }
4664 else if (key_map == share->state.key_map)
4665 share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS;
4666 share->state.changed|= STATE_NOT_SORTED_PAGES;
4667 if (!rep_quick)
4668 share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_ZEROFILLED |
4669 STATE_NOT_MOVABLE);
4670
4671 mysql_cond_destroy (&sort_info.cond);
4672 mysql_mutex_destroy(&sort_info.mutex);
4673
4674 /* If caller had disabled logging it's not up to us to re-enable it */
4675 if (reenable_logging)
4676 _ma_reenable_logging_for_table(info, FALSE);
4677 restore_table_state_after_repair(info, &backup_share);
4678
4679 my_free(sort_info.ft_buf);
4680 my_free(sort_info.key_block);
4681 my_free(sort_param);
4682 my_free(sort_info.buff);
4683 if (!got_error && (param->testflag & T_UNPACK))
4684 restore_data_file_type(share);
4685 DBUG_RETURN(got_error);
4686}
4687
4688 /* Read next record and return next key */
4689
4690static int sort_key_read(MARIA_SORT_PARAM *sort_param, uchar *key)
4691{
4692 int error;
4693 MARIA_SORT_INFO *sort_info= sort_param->sort_info;
4694 MARIA_HA *info= sort_info->info;
4695 MARIA_KEY int_key;
4696 DBUG_ENTER("sort_key_read");
4697
4698 if ((error=sort_get_next_record(sort_param)))
4699 DBUG_RETURN(error);
4700 if (info->s->state.state.records == sort_info->max_records)
4701 {
4702 _ma_check_print_error(sort_info->param,
4703 "Key %d - Found too many records; Can't continue",
4704 sort_param->key+1);
4705 DBUG_RETURN(1);
4706 }
4707 if (_ma_sort_write_record(sort_param))
4708 DBUG_RETURN(1);
4709
4710 (*info->s->keyinfo[sort_param->key].make_key)(info, &int_key,
4711 sort_param->key, key,
4712 sort_param->record,
4713 sort_param->current_filepos,
4714 0);
4715 sort_param->real_key_length= int_key.data_length + int_key.ref_length;
4716#ifdef HAVE_valgrind
4717 bzero(key+sort_param->real_key_length,
4718 (sort_param->key_length-sort_param->real_key_length));
4719#endif
4720 DBUG_RETURN(0);
4721} /* sort_key_read */
4722
4723
4724static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, uchar *key)
4725{
4726 int error;
4727 MARIA_SORT_INFO *sort_info=sort_param->sort_info;
4728 MARIA_HA *info=sort_info->info;
4729 FT_WORD *wptr=0;
4730 MARIA_KEY int_key;
4731 DBUG_ENTER("sort_maria_ft_key_read");
4732
4733 if (!sort_param->wordlist)
4734 {
4735 for (;;)
4736 {
4737 free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE));
4738 if ((error=sort_get_next_record(sort_param)))
4739 DBUG_RETURN(error);
4740 if ((error= _ma_sort_write_record(sort_param)))
4741 DBUG_RETURN(error);
4742 if (!(wptr= _ma_ft_parserecord(info,sort_param->key,sort_param->record,
4743 &sort_param->wordroot)))
4744
4745 DBUG_RETURN(1);
4746 if (wptr->pos)
4747 break;
4748 }
4749 sort_param->wordptr=sort_param->wordlist=wptr;
4750 }
4751 else
4752 {
4753 error=0;
4754 wptr=(FT_WORD*)(sort_param->wordptr);
4755 }
4756
4757 _ma_ft_make_key(info, &int_key, sort_param->key, key, wptr++,
4758 sort_param->current_filepos);
4759 sort_param->real_key_length= int_key.data_length + int_key.ref_length;
4760
4761#ifdef HAVE_valgrind
4762 if (sort_param->key_length > sort_param->real_key_length)
4763 bzero(key+sort_param->real_key_length,
4764 (sort_param->key_length-sort_param->real_key_length));
4765#endif
4766 if (!wptr->pos)
4767 {
4768 free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE));
4769 sort_param->wordlist=0;
4770 }
4771 else
4772 sort_param->wordptr=(void*)wptr;
4773
4774 DBUG_RETURN(error);
4775} /* sort_maria_ft_key_read */
4776
4777
4778/*
4779 Read next record from file using parameters in sort_info.
4780
4781 SYNOPSIS
4782 sort_get_next_record()
4783 sort_param Information about and for the sort process
4784
4785 NOTES
4786 Dynamic Records With Non-Quick Parallel Repair
4787
4788 For non-quick parallel repair we use a synchronized read/write
4789 cache. This means that one thread is the master who fixes the data
4790 file by reading each record from the old data file and writing it
4791 to the new data file. By doing this the records in the new data
4792 file are written contiguously. Whenever the write buffer is full,
4793 it is copied to the read buffer. The slaves read from the read
4794 buffer, which is not associated with a file. Thus read_cache.file
4795 is -1. When using _mi_read_cache(), the slaves must always set
4796 flag to READING_NEXT so that the function never tries to read from
4797 file. This is safe because the records are contiguous. There is no
4798 need to read outside the cache. This condition is evaluated in the
4799 variable 'parallel_flag' for quick reference. read_cache.file must
4800 be >= 0 in every other case.
4801
4802 RETURN
4803 -1 end of file
4804 0 ok
4805 sort_param->current_filepos points to record position.
4806 sort_param->record contains record
4807 sort_param->max_pos contains position to last byte read
4808 > 0 error
4809*/
4810
4811static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
4812{
4813 int searching;
4814 int parallel_flag;
4815 uint found_record,b_type,left_length;
4816 my_off_t pos;
4817 MARIA_BLOCK_INFO block_info;
4818 MARIA_SORT_INFO *sort_info=sort_param->sort_info;
4819 HA_CHECK *param=sort_info->param;
4820 MARIA_HA *info=sort_info->info;
4821 MARIA_SHARE *share= info->s;
4822 char llbuff[22],llbuff2[22];
4823 DBUG_ENTER("sort_get_next_record");
4824
4825 if (_ma_killed_ptr(param))
4826 DBUG_RETURN(1);
4827 if (param->progress_counter++ >= WRITE_COUNT)
4828 {
4829 param->progress_counter= 0;
4830 _ma_report_progress(param, param->progress, param->max_progress);
4831 }
4832
4833 switch (sort_info->org_data_file_type) {
4834 case BLOCK_RECORD:
4835 {
4836 for (;;)
4837 {
4838 int flag;
4839 /*
4840 Assume table is transactional and it had LSN pages in the
4841 cache. Repair has flushed them, left data pages stay in
4842 cache, and disabled transactionality (so share's current page
4843 type is PLAIN); page cache would assert if it finds a cached LSN page
4844 while _ma_scan_block_record() requested a PLAIN page. So we use
4845 UNKNOWN.
4846 */
4847 enum pagecache_page_type save_page_type= share->page_type;
4848 share->page_type= PAGECACHE_READ_UNKNOWN_PAGE;
4849 if (info != sort_info->new_info)
4850 {
4851 /* Safe scanning */
4852 flag= _ma_safe_scan_block_record(sort_info, info,
4853 sort_param->record);
4854 }
4855 else
4856 {
4857 /*
4858 Scan on clean table.
4859 It requires a reliable data_file_length so we set it.
4860 */
4861 share->state.state.data_file_length= sort_info->filelength;
4862 info->cur_row.trid= 0;
4863 flag= _ma_scan_block_record(info, sort_param->record,
4864 info->cur_row.nextpos, 1);
4865 set_if_bigger(param->max_found_trid, info->cur_row.trid);
4866 if (info->cur_row.trid > param->max_trid)
4867 {
4868 _ma_check_print_not_visible_error(param, info->cur_row.trid);
4869 flag= HA_ERR_ROW_NOT_VISIBLE;
4870 }
4871 }
4872 param->progress= (ma_recordpos_to_page(info->cur_row.lastpos)*
4873 share->block_size);
4874
4875 share->page_type= save_page_type;
4876 if (!flag)
4877 {
4878 if (sort_param->calc_checksum)
4879 {
4880 ha_checksum checksum;
4881 checksum= (*share->calc_check_checksum)(info, sort_param->record);
4882 if (share->calc_checksum &&
4883 info->cur_row.checksum != (checksum & 255))
4884 {
4885 if (param->testflag & T_VERBOSE)
4886 {
4887 record_pos_to_txt(info, info->cur_row.lastpos, llbuff);
4888 _ma_check_print_info(param,
4889 "Found record with wrong checksum at %s",
4890 llbuff);
4891 }
4892 continue;
4893 }
4894 info->cur_row.checksum= checksum;
4895 param->glob_crc+= checksum;
4896 }
4897 sort_param->start_recpos= sort_param->current_filepos=
4898 info->cur_row.lastpos;
4899 DBUG_RETURN(0);
4900 }
4901 if (flag == HA_ERR_END_OF_FILE)
4902 {
4903 sort_param->max_pos= share->state.state.data_file_length;
4904 DBUG_RETURN(-1);
4905 }
4906 /* Retry only if wrong record, not if disk error */
4907 if (flag != HA_ERR_WRONG_IN_RECORD && flag != HA_ERR_WRONG_CRC)
4908 {
4909 retry_if_quick(sort_param, flag);
4910 DBUG_RETURN(flag);
4911 }
4912 }
4913 break; /* Impossible */
4914 }
4915 case STATIC_RECORD:
4916 for (;;)
4917 {
4918 if (my_b_read(&sort_param->read_cache,sort_param->record,
4919 share->base.pack_reclength))
4920 {
4921 if (sort_param->read_cache.error)
4922 param->out_flag |= O_DATA_LOST;
4923 retry_if_quick(sort_param, my_errno);
4924 DBUG_RETURN(-1);
4925 }
4926 sort_param->start_recpos=sort_param->pos;
4927 param->progress= sort_param->pos;
4928 if (!sort_param->fix_datafile)
4929 {
4930 sort_param->current_filepos= sort_param->pos;
4931 if (sort_param->master)
4932 share->state.split++;
4933 }
4934 sort_param->max_pos=(sort_param->pos+=share->base.pack_reclength);
4935 if (*sort_param->record)
4936 {
4937 if (sort_param->calc_checksum)
4938 param->glob_crc+= (info->cur_row.checksum=
4939 _ma_static_checksum(info,sort_param->record));
4940 DBUG_RETURN(0);
4941 }
4942 if (!sort_param->fix_datafile && sort_param->master)
4943 {
4944 share->state.state.del++;
4945 share->state.state.empty+=share->base.pack_reclength;
4946 }
4947 }
4948 case DYNAMIC_RECORD:
4949 {
4950 uchar *UNINIT_VAR(to);
4951 ha_checksum checksum= 0;
4952
4953 pos=sort_param->pos;
4954 param->progress= pos;
4955 searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND));
4956 parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0;
4957 for (;;)
4958 {
4959 found_record=block_info.second_read= 0;
4960 left_length=1;
4961 if (searching)
4962 {
4963 pos=MY_ALIGN(pos,MARIA_DYN_ALIGN_SIZE);
4964 param->testflag|=T_RETRY_WITHOUT_QUICK;
4965 sort_param->start_recpos=pos;
4966 }
4967 do
4968 {
4969 if (pos > sort_param->max_pos)
4970 sort_param->max_pos=pos;
4971 if (pos & (MARIA_DYN_ALIGN_SIZE-1))
4972 {
4973 if ((param->testflag & T_VERBOSE) || searching == 0)
4974 _ma_check_print_info(param,"Wrong aligned block at %s",
4975 llstr(pos,llbuff));
4976 if (searching)
4977 goto try_next;
4978 }
4979 if (found_record && pos == param->search_after_block)
4980 _ma_check_print_info(param,"Block: %s used by record at %s",
4981 llstr(param->search_after_block,llbuff),
4982 llstr(sort_param->start_recpos,llbuff2));
4983 if (_ma_read_cache(info, &sort_param->read_cache,
4984 block_info.header, pos,
4985 MARIA_BLOCK_INFO_HEADER_LENGTH,
4986 (! found_record ? READING_NEXT : 0) |
4987 parallel_flag | READING_HEADER))
4988 {
4989 if (found_record)
4990 {
4991 _ma_check_print_info(param,
4992 "Can't read whole record at %s (errno: %d)",
4993 llstr(sort_param->start_recpos,llbuff),errno);
4994 goto try_next;
4995 }
4996 DBUG_RETURN(-1);
4997 }
4998 if (searching && ! sort_param->fix_datafile)
4999 {
5000 param->error_printed=1;
5001 param->retry_repair=1;
5002 param->testflag|=T_RETRY_WITHOUT_QUICK;
5003 my_errno= HA_ERR_WRONG_IN_RECORD;
5004 DBUG_RETURN(1); /* Something wrong with data */
5005 }
5006 b_type= _ma_get_block_info(info, &block_info,-1,pos);
5007 if ((b_type & (BLOCK_ERROR | BLOCK_FATAL_ERROR)) ||
5008 ((b_type & BLOCK_FIRST) &&
5009 (block_info.rec_len < (uint) share->base.min_pack_length ||
5010 block_info.rec_len > (uint) share->base.max_pack_length)))
5011 {
5012 uint i;
5013 if (param->testflag & T_VERBOSE || searching == 0)
5014 _ma_check_print_info(param,
5015 "Wrong bytesec: %3d-%3d-%3d at %10s; Skipped",
5016 block_info.header[0],block_info.header[1],
5017 block_info.header[2],llstr(pos,llbuff));
5018 if (found_record)
5019 goto try_next;
5020 block_info.second_read=0;
5021 searching=1;
5022 /* Search after block in read header string */
5023 for (i=MARIA_DYN_ALIGN_SIZE ;
5024 i < MARIA_BLOCK_INFO_HEADER_LENGTH ;
5025 i+= MARIA_DYN_ALIGN_SIZE)
5026 if (block_info.header[i] >= 1 &&
5027 block_info.header[i] <= MARIA_MAX_DYN_HEADER_BYTE)
5028 break;
5029 pos+=(ulong) i;
5030 sort_param->start_recpos=pos;
5031 continue;
5032 }
5033 if (b_type & BLOCK_DELETED)
5034 {
5035 my_bool error=0;
5036 if (block_info.block_len+ (uint) (block_info.filepos-pos) <
5037 share->base.min_block_length)
5038 {
5039 if (!searching)
5040 _ma_check_print_info(param,
5041 "Deleted block with impossible length %lu "
5042 "at %s",
5043 block_info.block_len,llstr(pos,llbuff));
5044 error=1;
5045 }
5046 else
5047 {
5048 if ((block_info.next_filepos != HA_OFFSET_ERROR &&
5049 block_info.next_filepos >=
5050 share->state.state.data_file_length) ||
5051 (block_info.prev_filepos != HA_OFFSET_ERROR &&
5052 block_info.prev_filepos >=
5053 share->state.state.data_file_length))
5054 {
5055 if (!searching)
5056 _ma_check_print_info(param,
5057 "Delete link points outside datafile at "
5058 "%s",
5059 llstr(pos,llbuff));
5060 error=1;
5061 }
5062 }
5063 if (error)
5064 {
5065 if (found_record)
5066 goto try_next;
5067 searching=1;
5068 pos+= MARIA_DYN_ALIGN_SIZE;
5069 sort_param->start_recpos=pos;
5070 block_info.second_read=0;
5071 continue;
5072 }
5073 }
5074 else
5075 {
5076 if (block_info.block_len+ (uint) (block_info.filepos-pos) <
5077 share->base.min_block_length ||
5078 block_info.block_len > (uint) share->base.max_pack_length+
5079 MARIA_SPLIT_LENGTH)
5080 {
5081 if (!searching)
5082 _ma_check_print_info(param,
5083 "Found block with impossible length %lu "
5084 "at %s; Skipped",
5085 block_info.block_len+
5086 (uint) (block_info.filepos-pos),
5087 llstr(pos,llbuff));
5088 if (found_record)
5089 goto try_next;
5090 searching=1;
5091 pos+= MARIA_DYN_ALIGN_SIZE;
5092 sort_param->start_recpos=pos;
5093 block_info.second_read=0;
5094 continue;
5095 }
5096 }
5097 if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
5098 {
5099 if (!sort_param->fix_datafile && sort_param->master &&
5100 (b_type & BLOCK_DELETED))
5101 {
5102 share->state.state.empty+=block_info.block_len;
5103 share->state.state.del++;
5104 share->state.split++;
5105 }
5106 if (found_record)
5107 goto try_next;
5108 if (searching)
5109 {
5110 pos+=MARIA_DYN_ALIGN_SIZE;
5111 sort_param->start_recpos=pos;
5112 }
5113 else
5114 pos=block_info.filepos+block_info.block_len;
5115 block_info.second_read=0;
5116 continue;
5117 }
5118
5119 if (!sort_param->fix_datafile && sort_param->master)
5120 share->state.split++;
5121 if (! found_record++)
5122 {
5123 sort_param->find_length=left_length=block_info.rec_len;
5124 sort_param->start_recpos=pos;
5125 if (!sort_param->fix_datafile)
5126 sort_param->current_filepos= sort_param->start_recpos;
5127 if (sort_param->fix_datafile && (param->testflag & T_EXTEND))
5128 sort_param->pos=block_info.filepos+1;
5129 else
5130 sort_param->pos=block_info.filepos+block_info.block_len;
5131 if (share->base.blobs)
5132 {
5133 if (_ma_alloc_buffer(&sort_param->rec_buff,
5134 &sort_param->rec_buff_size,
5135 block_info.rec_len +
5136 share->base.extra_rec_buff_size))
5137
5138 {
5139 if (param->max_record_length >= block_info.rec_len)
5140 {
5141 _ma_check_print_error(param,"Not enough memory for blob at %s "
5142 "(need %lu)",
5143 llstr(sort_param->start_recpos,llbuff),
5144 (ulong) block_info.rec_len);
5145 DBUG_RETURN(1);
5146 }
5147 else
5148 {
5149 _ma_check_print_info(param,"Not enough memory for blob at %s "
5150 "(need %lu); Row skipped",
5151 llstr(sort_param->start_recpos,llbuff),
5152 (ulong) block_info.rec_len);
5153 goto try_next;
5154 }
5155 }
5156 }
5157 to= sort_param->rec_buff;
5158 }
5159 if (left_length < block_info.data_len || ! block_info.data_len)
5160 {
5161 _ma_check_print_info(param,
5162 "Found block with too small length at %s; "
5163 "Skipped",
5164 llstr(sort_param->start_recpos,llbuff));
5165 goto try_next;
5166 }
5167 if (block_info.filepos + block_info.data_len >
5168 sort_param->read_cache.end_of_file)
5169 {
5170 _ma_check_print_info(param,
5171 "Found block that points outside data file "
5172 "at %s",
5173 llstr(sort_param->start_recpos,llbuff));
5174 goto try_next;
5175 }
5176 /*
5177 Copy information that is already read. Avoid accessing data
5178 below the cache start. This could happen if the header
5179 streched over the end of the previous buffer contents.
5180 */
5181 {
5182 uint header_len= (uint) (block_info.filepos - pos);
5183 uint prefetch_len= (MARIA_BLOCK_INFO_HEADER_LENGTH - header_len);
5184
5185 if (prefetch_len > block_info.data_len)
5186 prefetch_len= block_info.data_len;
5187 if (prefetch_len)
5188 {
5189 memcpy(to, block_info.header + header_len, prefetch_len);
5190 block_info.filepos+= prefetch_len;
5191 block_info.data_len-= prefetch_len;
5192 left_length-= prefetch_len;
5193 to+= prefetch_len;
5194 }
5195 }
5196 if (block_info.data_len &&
5197 _ma_read_cache(info, &sort_param->read_cache,to,block_info.filepos,
5198 block_info.data_len,
5199 (found_record == 1 ? READING_NEXT : 0) |
5200 parallel_flag))
5201 {
5202 _ma_check_print_info(param,
5203 "Read error for block at: %s (error: %d); "
5204 "Skipped",
5205 llstr(block_info.filepos,llbuff),my_errno);
5206 goto try_next;
5207 }
5208 left_length-=block_info.data_len;
5209 to+=block_info.data_len;
5210 pos=block_info.next_filepos;
5211 if (pos == HA_OFFSET_ERROR && left_length)
5212 {
5213 _ma_check_print_info(param,
5214 "Wrong block with wrong total length "
5215 "starting at %s",
5216 llstr(sort_param->start_recpos,llbuff));
5217 goto try_next;
5218 }
5219 if (pos + MARIA_BLOCK_INFO_HEADER_LENGTH >
5220 sort_param->read_cache.end_of_file)
5221 {
5222 _ma_check_print_info(param,
5223 "Found link that points at %s (outside data "
5224 "file) at %s",
5225 llstr(pos,llbuff2),
5226 llstr(sort_param->start_recpos,llbuff));
5227 goto try_next;
5228 }
5229 } while (left_length);
5230
5231 if (_ma_rec_unpack(info,sort_param->record,sort_param->rec_buff,
5232 sort_param->find_length) != MY_FILE_ERROR)
5233 {
5234 if (sort_param->read_cache.error < 0)
5235 DBUG_RETURN(1);
5236 if (sort_param->calc_checksum)
5237 checksum= (share->calc_check_checksum)(info, sort_param->record);
5238 if ((param->testflag & (T_EXTEND | T_REP)) || searching)
5239 {
5240 if (_ma_rec_check(info, sort_param->record, sort_param->rec_buff,
5241 sort_param->find_length,
5242 (param->testflag & T_QUICK) &&
5243 sort_param->calc_checksum &&
5244 MY_TEST(share->calc_checksum), checksum))
5245 {
5246 _ma_check_print_info(param,"Found wrong packed record at %s",
5247 llstr(sort_param->start_recpos,llbuff));
5248 goto try_next;
5249 }
5250 }
5251 if (sort_param->calc_checksum)
5252 param->glob_crc+= checksum;
5253 DBUG_RETURN(0);
5254 }
5255 if (!searching)
5256 _ma_check_print_info(param,"Key %d - Found wrong stored record at %s",
5257 sort_param->key+1,
5258 llstr(sort_param->start_recpos,llbuff));
5259 try_next:
5260 pos=(sort_param->start_recpos+=MARIA_DYN_ALIGN_SIZE);
5261 searching=1;
5262 }
5263 }
5264 case COMPRESSED_RECORD:
5265 param->progress= sort_param->pos;
5266 for (searching=0 ;; searching=1, sort_param->pos++)
5267 {
5268 if (_ma_read_cache(info, &sort_param->read_cache, block_info.header,
5269 sort_param->pos,
5270 share->pack.ref_length,READING_NEXT))
5271 DBUG_RETURN(-1);
5272 if (searching && ! sort_param->fix_datafile)
5273 {
5274 param->error_printed=1;
5275 param->retry_repair=1;
5276 param->testflag|=T_RETRY_WITHOUT_QUICK;
5277 my_errno= HA_ERR_WRONG_IN_RECORD;
5278 DBUG_RETURN(1); /* Something wrong with data */
5279 }
5280 sort_param->start_recpos=sort_param->pos;
5281 if (_ma_pack_get_block_info(info, &sort_param->bit_buff, &block_info,
5282 &sort_param->rec_buff,
5283 &sort_param->rec_buff_size, -1,
5284 sort_param->pos))
5285 DBUG_RETURN(-1);
5286 if (!block_info.rec_len &&
5287 sort_param->pos + MEMMAP_EXTRA_MARGIN ==
5288 sort_param->read_cache.end_of_file)
5289 DBUG_RETURN(-1);
5290 if (block_info.rec_len < (uint) share->min_pack_length ||
5291 block_info.rec_len > (uint) share->max_pack_length)
5292 {
5293 if (! searching)
5294 _ma_check_print_info(param,
5295 "Found block with wrong recordlength: %lu "
5296 "at %s\n",
5297 block_info.rec_len,
5298 llstr(sort_param->pos,llbuff));
5299 continue;
5300 }
5301 if (_ma_read_cache(info, &sort_param->read_cache, sort_param->rec_buff,
5302 block_info.filepos, block_info.rec_len,
5303 READING_NEXT))
5304 {
5305 if (! searching)
5306 _ma_check_print_info(param,"Couldn't read whole record from %s",
5307 llstr(sort_param->pos,llbuff));
5308 continue;
5309 }
5310#ifdef HAVE_valgrind
5311 bzero(sort_param->rec_buff + block_info.rec_len,
5312 share->base.extra_rec_buff_size);
5313#endif
5314 if (_ma_pack_rec_unpack(info, &sort_param->bit_buff, sort_param->record,
5315 sort_param->rec_buff, block_info.rec_len))
5316 {
5317 if (! searching)
5318 _ma_check_print_info(param,"Found wrong record at %s",
5319 llstr(sort_param->pos,llbuff));
5320 continue;
5321 }
5322 if (!sort_param->fix_datafile)
5323 {
5324 sort_param->current_filepos= sort_param->pos;
5325 if (sort_param->master)
5326 share->state.split++;
5327 }
5328 sort_param->max_pos= (sort_param->pos=block_info.filepos+
5329 block_info.rec_len);
5330 info->packed_length=block_info.rec_len;
5331
5332 if (sort_param->calc_checksum)
5333 {
5334 info->cur_row.checksum= (*share->calc_check_checksum)(info,
5335 sort_param->
5336 record);
5337 param->glob_crc+= info->cur_row.checksum;
5338 }
5339 DBUG_RETURN(0);
5340 }
5341 case NO_RECORD:
5342 DBUG_RETURN(1); /* Impossible */
5343 }
5344 DBUG_RETURN(1); /* Impossible */
5345}
5346
5347
5348/**
5349 @brief Write record to new file.
5350
5351 @fn _ma_sort_write_record()
5352 @param sort_param Sort parameters.
5353
5354 @note
5355 This is only called by a master thread if parallel repair is used.
5356
5357 @return
5358 @retval 0 OK
5359 sort_param->current_filepos points to inserted record for
5360 block_records and to the place for the next record for
5361 other row types.
5362 sort_param->filepos points to end of file
5363 @retval 1 Error
5364*/
5365
5366int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param)
5367{
5368 int flag;
5369 uint length;
5370 ulong block_length,reclength;
5371 uchar *from;
5372 uchar block_buff[8];
5373 MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5374 HA_CHECK *param= sort_info->param;
5375 MARIA_HA *info= sort_info->new_info;
5376 MARIA_SHARE *share= info->s;
5377 DBUG_ENTER("_ma_sort_write_record");
5378
5379 if (sort_param->fix_datafile)
5380 {
5381 sort_param->current_filepos= sort_param->filepos;
5382 switch (sort_info->new_data_file_type) {
5383 case BLOCK_RECORD:
5384 if ((sort_param->current_filepos=
5385 (*share->write_record_init)(info, sort_param->record)) ==
5386 HA_OFFSET_ERROR)
5387 {
5388 _ma_check_print_error(param, "%d when writing to datafile", my_errno);
5389 DBUG_RETURN(1);
5390 }
5391 /* Pointer to end of file */
5392 sort_param->filepos= share->state.state.data_file_length;
5393 break;
5394 case STATIC_RECORD:
5395 if (my_b_write(&info->rec_cache,sort_param->record,
5396 share->base.pack_reclength))
5397 {
5398 _ma_check_print_error(param,"%d when writing to datafile",my_errno);
5399 DBUG_RETURN(1);
5400 }
5401 sort_param->filepos+=share->base.pack_reclength;
5402 share->state.split++;
5403 break;
5404 case DYNAMIC_RECORD:
5405 if (! info->blobs)
5406 from=sort_param->rec_buff;
5407 else
5408 {
5409 /* must be sure that local buffer is big enough */
5410 reclength=share->base.pack_reclength+
5411 _ma_calc_total_blob_length(info,sort_param->record)+
5412 ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+
5413 MARIA_DYN_DELETE_BLOCK_HEADER;
5414 if (sort_info->buff_length < reclength)
5415 {
5416 if (!(sort_info->buff=my_realloc(sort_info->buff, (uint) reclength,
5417 MYF(MY_FREE_ON_ERROR |
5418 MY_ALLOW_ZERO_PTR))))
5419 DBUG_RETURN(1);
5420 sort_info->buff_length=reclength;
5421 }
5422 from= (uchar *) sort_info->buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER);
5423 }
5424 /* We can use info->checksum here as only one thread calls this */
5425 info->cur_row.checksum= (*share->calc_check_checksum)(info,
5426 sort_param->
5427 record);
5428 reclength= _ma_rec_pack(info,from,sort_param->record);
5429 flag=0;
5430
5431 do
5432 {
5433 block_length= reclength + 3 + MY_TEST(reclength >= (65520 - 3));
5434 if (block_length < share->base.min_block_length)
5435 block_length=share->base.min_block_length;
5436 info->update|=HA_STATE_WRITE_AT_END;
5437 block_length=MY_ALIGN(block_length,MARIA_DYN_ALIGN_SIZE);
5438 if (block_length > MARIA_MAX_BLOCK_LENGTH)
5439 block_length=MARIA_MAX_BLOCK_LENGTH;
5440 if (_ma_write_part_record(info,0L,block_length,
5441 sort_param->filepos+block_length,
5442 &from,&reclength,&flag))
5443 {
5444 _ma_check_print_error(param,"%d when writing to datafile",my_errno);
5445 DBUG_RETURN(1);
5446 }
5447 sort_param->filepos+=block_length;
5448 share->state.split++;
5449 } while (reclength);
5450 break;
5451 case COMPRESSED_RECORD:
5452 reclength=info->packed_length;
5453 length= _ma_save_pack_length((uint) share->pack.version, block_buff,
5454 reclength);
5455 if (share->base.blobs)
5456 length+= _ma_save_pack_length((uint) share->pack.version,
5457 block_buff + length, info->blob_length);
5458 if (my_b_write(&info->rec_cache,block_buff,length) ||
5459 my_b_write(&info->rec_cache, sort_param->rec_buff, reclength))
5460 {
5461 _ma_check_print_error(param,"%d when writing to datafile",my_errno);
5462 DBUG_RETURN(1);
5463 }
5464 sort_param->filepos+=reclength+length;
5465 share->state.split++;
5466 break;
5467 case NO_RECORD:
5468 DBUG_RETURN(1); /* Impossible */
5469 }
5470 }
5471 if (sort_param->master)
5472 {
5473 share->state.state.records++;
5474 if ((param->testflag & T_WRITE_LOOP) &&
5475 (share->state.state.records % WRITE_COUNT) == 0)
5476 {
5477 char llbuff[22];
5478 printf("%s\r", llstr(share->state.state.records,llbuff));
5479 fflush(stdout);
5480 }
5481 }
5482 DBUG_RETURN(0);
5483} /* _ma_sort_write_record */
5484
5485
5486/* Compare two keys from _ma_create_index_by_sort */
5487
5488static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,
5489 const void *b)
5490{
5491 uint not_used[2];
5492 return (ha_key_cmp(sort_param->seg, *((uchar* const *) a),
5493 *((uchar* const *) b),
5494 USE_WHOLE_KEY, SEARCH_SAME, not_used));
5495} /* sort_key_cmp */
5496
5497
5498static int sort_key_write(MARIA_SORT_PARAM *sort_param, const uchar *a)
5499{
5500 uint diff_pos[2];
5501 char llbuff[22],llbuff2[22];
5502 MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5503 HA_CHECK *param= sort_info->param;
5504 int cmp;
5505
5506 if (sort_info->key_block->inited)
5507 {
5508 cmp= ha_key_cmp(sort_param->seg, sort_info->key_block->lastkey,
5509 a, USE_WHOLE_KEY,
5510 SEARCH_FIND | SEARCH_UPDATE | SEARCH_INSERT,
5511 diff_pos);
5512 if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
5513 ha_key_cmp(sort_param->seg, sort_info->key_block->lastkey,
5514 a, USE_WHOLE_KEY,
5515 SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos);
5516 else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
5517 {
5518 diff_pos[0]= maria_collect_stats_nonulls_next(sort_param->seg,
5519 sort_param->notnull,
5520 sort_info->key_block->lastkey,
5521 a);
5522 }
5523 sort_param->unique[diff_pos[0]-1]++;
5524 }
5525 else
5526 {
5527 cmp= -1;
5528 if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
5529 maria_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull,
5530 a);
5531 }
5532 if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0)
5533 {
5534 DBUG_EXECUTE("key", _ma_print_keydata(DBUG_FILE, sort_param->seg, a,
5535 USE_WHOLE_KEY););
5536 sort_info->dupp++;
5537 sort_info->info->cur_row.lastpos= get_record_for_key(sort_param->keyinfo,
5538 a);
5539 if ((param->testflag & (T_CREATE_UNIQUE_BY_SORT | T_SUPPRESS_ERR_HANDLING))
5540 == T_CREATE_UNIQUE_BY_SORT)
5541 param->testflag|= T_SUPPRESS_ERR_HANDLING;
5542 _ma_check_print_warning(param,
5543 "Duplicate key %2u for record at %10s against "
5544 "record at %10s",
5545 sort_param->key + 1,
5546 llstr(sort_info->info->cur_row.lastpos, llbuff),
5547 llstr(get_record_for_key(sort_param->keyinfo,
5548 sort_info->key_block->
5549 lastkey),
5550 llbuff2));
5551 param->testflag|=T_RETRY_WITHOUT_QUICK;
5552 if (sort_info->param->testflag & T_VERBOSE)
5553 _ma_print_keydata(stdout,sort_param->seg, a, USE_WHOLE_KEY);
5554 return (sort_delete_record(sort_param));
5555 }
5556#ifndef DBUG_OFF
5557 if (cmp > 0)
5558 {
5559 _ma_check_print_error(param,
5560 "Internal error: Keys are not in order from sort");
5561 return(1);
5562 }
5563#endif
5564 return (sort_insert_key(sort_param, sort_info->key_block,
5565 a, HA_OFFSET_ERROR));
5566} /* sort_key_write */
5567
5568
5569int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param)
5570{
5571 MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5572 SORT_KEY_BLOCKS *key_block=sort_info->key_block;
5573 MARIA_SHARE *share=sort_info->info->s;
5574 uint val_off, val_len;
5575 int error;
5576 SORT_FT_BUF *maria_ft_buf=sort_info->ft_buf;
5577 uchar *from, *to;
5578
5579 val_len=share->ft2_keyinfo.keylength;
5580 get_key_full_length_rdonly(val_off, maria_ft_buf->lastkey);
5581 to= maria_ft_buf->lastkey+val_off;
5582
5583 if (maria_ft_buf->buf)
5584 {
5585 /* flushing first-level tree */
5586 error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey,
5587 HA_OFFSET_ERROR);
5588 for (from=to+val_len;
5589 !error && from < maria_ft_buf->buf;
5590 from+= val_len)
5591 {
5592 memcpy(to, from, val_len);
5593 error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey,
5594 HA_OFFSET_ERROR);
5595 }
5596 return error;
5597 }
5598 /* flushing second-level tree keyblocks */
5599 error=_ma_flush_pending_blocks(sort_param);
5600 /* updating lastkey with second-level tree info */
5601 ft_intXstore(maria_ft_buf->lastkey+val_off, -maria_ft_buf->count);
5602 _ma_dpointer(sort_info->info->s, maria_ft_buf->lastkey+val_off+HA_FT_WLEN,
5603 share->state.key_root[sort_param->key]);
5604 /* restoring first level tree data in sort_info/sort_param */
5605 sort_info->key_block=sort_info->key_block_end- sort_info->param->sort_key_blocks;
5606 sort_param->keyinfo=share->keyinfo+sort_param->key;
5607 share->state.key_root[sort_param->key]=HA_OFFSET_ERROR;
5608 /* writing lastkey in first-level tree */
5609 return error ? error :
5610 sort_insert_key(sort_param,sort_info->key_block,
5611 maria_ft_buf->lastkey,HA_OFFSET_ERROR);
5612}
5613
5614
5615static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
5616 const uchar *a)
5617{
5618 uint a_len, val_off, val_len, error;
5619 MARIA_SORT_INFO *sort_info= sort_param->sort_info;
5620 SORT_FT_BUF *ft_buf= sort_info->ft_buf;
5621 SORT_KEY_BLOCKS *key_block= sort_info->key_block;
5622 MARIA_SHARE *share= sort_info->info->s;
5623
5624 val_len=HA_FT_WLEN+share->rec_reflength;
5625 get_key_full_length_rdonly(a_len, a);
5626
5627 if (!ft_buf)
5628 {
5629 /*
5630 use two-level tree only if key_reflength fits in rec_reflength place
5631 and row format is NOT static - for _ma_dpointer not to garble offsets
5632 */
5633 if ((share->base.key_reflength <=
5634 share->rec_reflength) &&
5635 (share->options &
5636 (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)))
5637 ft_buf= (SORT_FT_BUF *)my_malloc(sort_param->keyinfo->block_length +
5638 sizeof(SORT_FT_BUF), MYF(MY_WME));
5639
5640 if (!ft_buf)
5641 {
5642 sort_param->key_write=sort_key_write;
5643 return sort_key_write(sort_param, a);
5644 }
5645 sort_info->ft_buf= ft_buf;
5646 goto word_init_ft_buf; /* no need to duplicate the code */
5647 }
5648 get_key_full_length_rdonly(val_off, ft_buf->lastkey);
5649
5650 if (ha_compare_text(sort_param->seg->charset,
5651 a+1,a_len-1,
5652 ft_buf->lastkey+1,val_off-1, 0)==0)
5653 {
5654 uchar *p;
5655 if (!ft_buf->buf) /* store in second-level tree */
5656 {
5657 ft_buf->count++;
5658 return sort_insert_key(sort_param,key_block,
5659 a + a_len, HA_OFFSET_ERROR);
5660 }
5661
5662 /* storing the key in the buffer. */
5663 memcpy (ft_buf->buf, (const char *)a+a_len, val_len);
5664 ft_buf->buf+=val_len;
5665 if (ft_buf->buf < ft_buf->end)
5666 return 0;
5667
5668 /* converting to two-level tree */
5669 p=ft_buf->lastkey+val_off;
5670
5671 while (key_block->inited)
5672 key_block++;
5673 sort_info->key_block=key_block;
5674 sort_param->keyinfo= &share->ft2_keyinfo;
5675 ft_buf->count=(uint)(ft_buf->buf - p)/val_len;
5676
5677 /* flushing buffer to second-level tree */
5678 for (error=0; !error && p < ft_buf->buf; p+= val_len)
5679 error=sort_insert_key(sort_param,key_block,p,HA_OFFSET_ERROR);
5680 ft_buf->buf=0;
5681 return error;
5682 }
5683
5684 /* flushing buffer */
5685 if ((error=_ma_sort_ft_buf_flush(sort_param)))
5686 return error;
5687
5688word_init_ft_buf:
5689 a_len+=val_len;
5690 memcpy(ft_buf->lastkey, a, a_len);
5691 ft_buf->buf=ft_buf->lastkey+a_len;
5692 /*
5693 32 is just a safety margin here
5694 (at least MY_MAX(val_len, sizeof(nod_flag)) should be there).
5695 May be better performance could be achieved if we'd put
5696 (sort_info->keyinfo->block_length-32)/XXX
5697 instead.
5698 TODO: benchmark the best value for XXX.
5699 */
5700 ft_buf->end= ft_buf->lastkey+ (sort_param->keyinfo->block_length-32);
5701 return 0;
5702} /* sort_maria_ft_key_write */
5703
5704
5705/* get pointer to record from a key */
5706
5707static my_off_t get_record_for_key(MARIA_KEYDEF *keyinfo,
5708 const uchar *key_data)
5709{
5710 MARIA_KEY key;
5711 key.keyinfo= keyinfo;
5712 key.data= (uchar*) key_data;
5713 key.data_length= (_ma_keylength(keyinfo, key_data) -
5714 keyinfo->share->rec_reflength);
5715 return _ma_row_pos_from_key(&key);
5716} /* get_record_for_key */
5717
5718
5719/* Insert a key in sort-key-blocks */
5720
5721static int sort_insert_key(MARIA_SORT_PARAM *sort_param,
5722 register SORT_KEY_BLOCKS *key_block,
5723 const uchar *key,
5724 my_off_t prev_block)
5725{
5726 uint a_length,t_length,nod_flag;
5727 my_off_t filepos;
5728 uchar *anc_buff,*lastkey;
5729 MARIA_KEY_PARAM s_temp;
5730 MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
5731 MARIA_SORT_INFO *sort_info= sort_param->sort_info;
5732 HA_CHECK *param=sort_info->param;
5733 MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
5734 MARIA_KEY tmp_key;
5735 MARIA_HA *info= sort_info->info;
5736 MARIA_SHARE *share= info->s;
5737 DBUG_ENTER("sort_insert_key");
5738
5739 anc_buff= key_block->buff;
5740 lastkey=key_block->lastkey;
5741 nod_flag= (key_block == sort_info->key_block ? 0 :
5742 share->base.key_reflength);
5743
5744 if (!key_block->inited)
5745 {
5746 key_block->inited=1;
5747 if (key_block == sort_info->key_block_end)
5748 {
5749 _ma_check_print_error(param,
5750 "To many key-block-levels; "
5751 "Try increasing sort_key_blocks");
5752 DBUG_RETURN(1);
5753 }
5754 a_length= share->keypage_header + nod_flag;
5755 key_block->end_pos= anc_buff + share->keypage_header;
5756 bzero(anc_buff, share->keypage_header);
5757 _ma_store_keynr(share, anc_buff, (uint) (sort_param->keyinfo -
5758 share->keyinfo));
5759 lastkey=0; /* No previous key in block */
5760 }
5761 else
5762 a_length= _ma_get_page_used(share, anc_buff);
5763
5764 /* Save pointer to previous block */
5765 if (nod_flag)
5766 {
5767 _ma_store_keypage_flag(share, anc_buff, KEYPAGE_FLAG_ISNOD);
5768 _ma_kpointer(info,key_block->end_pos,prev_block);
5769 }
5770
5771 tmp_key.keyinfo= keyinfo;
5772 tmp_key.data= (uchar*) key;
5773 tmp_key.data_length= _ma_keylength(keyinfo, key) - share->rec_reflength;
5774 tmp_key.ref_length= share->rec_reflength;
5775
5776 t_length= (*keyinfo->pack_key)(&tmp_key, nod_flag,
5777 (uchar*) 0, lastkey, lastkey, &s_temp);
5778 (*keyinfo->store_key)(keyinfo, key_block->end_pos+nod_flag,&s_temp);
5779 a_length+=t_length;
5780 _ma_store_page_used(share, anc_buff, a_length);
5781 key_block->end_pos+=t_length;
5782 if (a_length <= share->max_index_block_size)
5783 {
5784 MARIA_KEY tmp_key2;
5785 tmp_key2.data= key_block->lastkey;
5786 _ma_copy_key(&tmp_key2, &tmp_key);
5787 key_block->last_length=a_length-t_length;
5788 DBUG_RETURN(0);
5789 }
5790
5791 /* Fill block with end-zero and write filled block */
5792 _ma_store_page_used(share, anc_buff, key_block->last_length);
5793 bzero(anc_buff+key_block->last_length,
5794 keyinfo->block_length- key_block->last_length);
5795 if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) == HA_OFFSET_ERROR)
5796 DBUG_RETURN(1);
5797 _ma_fast_unlock_key_del(info);
5798
5799 /* If we read the page from the key cache, we have to write it back to it */
5800 if (page_link->changed)
5801 {
5802 MARIA_PAGE page;
5803 pop_dynamic(&info->pinned_pages);
5804 _ma_page_setup(&page, info, keyinfo, filepos, anc_buff);
5805 if (_ma_write_keypage(&page, PAGECACHE_LOCK_WRITE_UNLOCK, DFLT_INIT_HITS))
5806 DBUG_RETURN(1);
5807 }
5808 else
5809 {
5810 if (write_page(share, share->kfile.file, anc_buff,
5811 keyinfo->block_length, filepos, param->myf_rw))
5812 DBUG_RETURN(1);
5813 }
5814 DBUG_DUMP("buff", anc_buff, _ma_get_page_used(share, anc_buff));
5815
5816 /* Write separator-key to block in next level */
5817 if (sort_insert_key(sort_param,key_block+1,key_block->lastkey,filepos))
5818 DBUG_RETURN(1);
5819
5820 /* clear old block and write new key in it */
5821 key_block->inited=0;
5822 DBUG_RETURN(sort_insert_key(sort_param, key_block,key,prev_block));
5823} /* sort_insert_key */
5824
5825
5826/* Delete record when we found a duplicated key */
5827
5828static int sort_delete_record(MARIA_SORT_PARAM *sort_param)
5829{
5830 uint i;
5831 int old_file,error;
5832 uchar *key;
5833 MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5834 HA_CHECK *param=sort_info->param;
5835 MARIA_HA *row_info= sort_info->new_info, *key_info= sort_info->info;
5836 DBUG_ENTER("sort_delete_record");
5837
5838 if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK)
5839 {
5840 _ma_check_print_error(param,
5841 "Quick-recover aborted; Run recovery without switch "
5842 "-q or with switch -qq");
5843 DBUG_RETURN(1);
5844 }
5845 if (key_info->s->options & HA_OPTION_COMPRESS_RECORD)
5846 {
5847 _ma_check_print_error(param,
5848 "Recover aborted; Can't run standard recovery on "
5849 "compressed tables with errors in data-file. "
5850 "Use 'aria_chk --safe-recover' to fix it");
5851 DBUG_RETURN(1);
5852 }
5853
5854 old_file= row_info->dfile.file;
5855 /* This only affects static and dynamic row formats */
5856 row_info->dfile.file= row_info->rec_cache.file;
5857 if (flush_io_cache(&row_info->rec_cache))
5858 DBUG_RETURN(1);
5859
5860 key= key_info->lastkey_buff + key_info->s->base.max_key_length;
5861 if ((error=(*row_info->s->read_record)(row_info, sort_param->record,
5862 key_info->cur_row.lastpos)) &&
5863 error != HA_ERR_RECORD_DELETED)
5864 {
5865 _ma_check_print_error(param,"Can't read record to be removed");
5866 row_info->dfile.file= old_file;
5867 DBUG_RETURN(1);
5868 }
5869 row_info->cur_row.lastpos= key_info->cur_row.lastpos;
5870
5871 for (i=0 ; i < sort_info->current_key ; i++)
5872 {
5873 MARIA_KEY tmp_key;
5874 (*key_info->s->keyinfo[i].make_key)(key_info, &tmp_key, i, key,
5875 sort_param->record,
5876 key_info->cur_row.lastpos, 0);
5877 if (_ma_ck_delete(key_info, &tmp_key))
5878 {
5879 _ma_check_print_error(param,
5880 "Can't delete key %d from record to be removed",
5881 i+1);
5882 row_info->dfile.file= old_file;
5883 DBUG_RETURN(1);
5884 }
5885 }
5886 if (sort_param->calc_checksum)
5887 param->glob_crc-=(*key_info->s->calc_check_checksum)(key_info,
5888 sort_param->record);
5889 error= (*row_info->s->delete_record)(row_info, sort_param->record);
5890 if (error)
5891 _ma_check_print_error(param,"Got error %d when deleting record",
5892 my_errno);
5893 row_info->dfile.file= old_file; /* restore actual value */
5894 row_info->s->state.state.records--;
5895 DBUG_RETURN(error);
5896} /* sort_delete_record */
5897
5898
5899/* Fix all pending blocks and flush everything to disk */
5900
5901int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param)
5902{
5903 uint nod_flag,length;
5904 my_off_t filepos;
5905 SORT_KEY_BLOCKS *key_block;
5906 MARIA_SORT_INFO *sort_info= sort_param->sort_info;
5907 myf myf_rw=sort_info->param->myf_rw;
5908 MARIA_HA *info=sort_info->info;
5909 MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
5910 MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
5911 DBUG_ENTER("_ma_flush_pending_blocks");
5912
5913 filepos= HA_OFFSET_ERROR; /* if empty file */
5914 nod_flag=0;
5915 for (key_block=sort_info->key_block ; key_block->inited ; key_block++)
5916 {
5917 key_block->inited=0;
5918 length= _ma_get_page_used(info->s, key_block->buff);
5919 if (nod_flag)
5920 _ma_kpointer(info,key_block->end_pos,filepos);
5921 bzero(key_block->buff+length, keyinfo->block_length-length);
5922 if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) ==
5923 HA_OFFSET_ERROR)
5924 goto err;
5925
5926 /* If we read the page from the key cache, we have to write it back */
5927 if (page_link->changed)
5928 {
5929 MARIA_PAGE page;
5930 pop_dynamic(&info->pinned_pages);
5931
5932 _ma_page_setup(&page, info, keyinfo, filepos, key_block->buff);
5933 if (_ma_write_keypage(&page, PAGECACHE_LOCK_WRITE_UNLOCK,
5934 DFLT_INIT_HITS))
5935 goto err;
5936 }
5937 else
5938 {
5939 if (write_page(info->s, info->s->kfile.file, key_block->buff,
5940 keyinfo->block_length, filepos, myf_rw))
5941 goto err;
5942 }
5943 DBUG_DUMP("buff",key_block->buff,length);
5944 nod_flag=1;
5945 }
5946 info->s->state.key_root[sort_param->key]=filepos; /* Last is root for tree */
5947 _ma_fast_unlock_key_del(info);
5948 DBUG_RETURN(0);
5949
5950err:
5951 _ma_fast_unlock_key_del(info);
5952 DBUG_RETURN(1);
5953} /* _ma_flush_pending_blocks */
5954
5955 /* alloc space and pointers for key_blocks */
5956
5957static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks,
5958 uint buffer_length)
5959{
5960 reg1 uint i;
5961 SORT_KEY_BLOCKS *block;
5962 DBUG_ENTER("alloc_key_blocks");
5963
5964 if (!(block= (SORT_KEY_BLOCKS*) my_malloc((sizeof(SORT_KEY_BLOCKS)+
5965 buffer_length+IO_SIZE)*blocks,
5966 MYF(0))))
5967 {
5968 _ma_check_print_error(param,"Not enough memory for sort-key-blocks");
5969 return(0);
5970 }
5971 for (i=0 ; i < blocks ; i++)
5972 {
5973 block[i].inited=0;
5974 block[i].buff= (uchar*) (block+blocks)+(buffer_length+IO_SIZE)*i;
5975 }
5976 DBUG_RETURN(block);
5977} /* alloc_key_blocks */
5978
5979
5980 /* Check if file is almost full */
5981
5982int maria_test_if_almost_full(MARIA_HA *info)
5983{
5984 MARIA_SHARE *share= info->s;
5985
5986 if (share->options & HA_OPTION_COMPRESS_RECORD)
5987 return 0;
5988 return mysql_file_seek(share->kfile.file, 0L, MY_SEEK_END,
5989 MYF(MY_THREADSAFE))/10*9 >
5990 (my_off_t) share->base.max_key_file_length ||
5991 mysql_file_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0)) / 10 * 9 >
5992 (my_off_t) share->base.max_data_file_length;
5993}
5994
5995
5996/* Recreate table with bigger more alloced record-data */
5997
5998int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename)
5999{
6000 int error;
6001 MARIA_HA info;
6002 MARIA_SHARE share;
6003 MARIA_KEYDEF *keyinfo,*key,*key_end;
6004 HA_KEYSEG *keysegs,*keyseg;
6005 MARIA_COLUMNDEF *columndef,*column,*end;
6006 MARIA_UNIQUEDEF *uniquedef,*u_ptr,*u_end;
6007 MARIA_STATUS_INFO status_info;
6008 uint unpack,key_parts;
6009 ha_rows max_records;
6010 ulonglong file_length,tmp_length;
6011 MARIA_CREATE_INFO create_info;
6012 DBUG_ENTER("maria_recreate_table");
6013
6014 if ((!(param->testflag & T_SILENT)))
6015 printf("Recreating table '%s'\n", param->isam_file_name);
6016
6017 error=1; /* Default error */
6018 info= **org_info;
6019 status_info= (*org_info)->state[0];
6020 info.state= &status_info;
6021 share= *(*org_info)->s;
6022 unpack= ((share.data_file_type == COMPRESSED_RECORD) &&
6023 (param->testflag & T_UNPACK));
6024 if (!(keyinfo=(MARIA_KEYDEF*) my_alloca(sizeof(MARIA_KEYDEF) *
6025 share.base.keys)))
6026 DBUG_RETURN(0);
6027 memcpy((uchar*) keyinfo,(uchar*) share.keyinfo,
6028 (size_t) (sizeof(MARIA_KEYDEF)*share.base.keys));
6029
6030 key_parts= share.base.all_key_parts;
6031 if (!(keysegs=(HA_KEYSEG*) my_alloca(sizeof(HA_KEYSEG)*
6032 (key_parts+share.base.keys))))
6033 {
6034 my_afree(keyinfo);
6035 DBUG_RETURN(1);
6036 }
6037 if (!(columndef=(MARIA_COLUMNDEF*)
6038 my_alloca(sizeof(MARIA_COLUMNDEF)*(share.base.fields+1))))
6039 {
6040 my_afree(keyinfo);
6041 my_afree(keysegs);
6042 DBUG_RETURN(1);
6043 }
6044 if (!(uniquedef=(MARIA_UNIQUEDEF*)
6045 my_alloca(sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques+1))))
6046 {
6047 my_afree(columndef);
6048 my_afree(keyinfo);
6049 my_afree(keysegs);
6050 DBUG_RETURN(1);
6051 }
6052
6053 /* Copy the column definitions in their original order */
6054 for (column= share.columndef, end= share.columndef+share.base.fields;
6055 column != end ;
6056 column++)
6057 columndef[column->column_nr]= *column;
6058
6059 /* Change the new key to point at the saved key segments */
6060 memcpy((uchar*) keysegs,(uchar*) share.keyparts,
6061 (size_t) (sizeof(HA_KEYSEG)*(key_parts+share.base.keys+
6062 share.state.header.uniques)));
6063 keyseg=keysegs;
6064 for (key=keyinfo,key_end=keyinfo+share.base.keys; key != key_end ; key++)
6065 {
6066 key->seg=keyseg;
6067 for (; keyseg->type ; keyseg++)
6068 {
6069 if (param->language)
6070 keyseg->language=param->language; /* change language */
6071 }
6072 keyseg++; /* Skip end pointer */
6073 }
6074
6075 /*
6076 Copy the unique definitions and change them to point at the new key
6077 segments
6078 */
6079 memcpy((uchar*) uniquedef,(uchar*) share.uniqueinfo,
6080 (size_t) (sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques)));
6081 for (u_ptr=uniquedef,u_end=uniquedef+share.state.header.uniques;
6082 u_ptr != u_end ; u_ptr++)
6083 {
6084 u_ptr->seg=keyseg;
6085 keyseg+=u_ptr->keysegs+1;
6086 }
6087
6088 file_length=(ulonglong) mysql_file_seek(info.dfile.file, 0L, MY_SEEK_END, MYF(0));
6089 if (share.options & HA_OPTION_COMPRESS_RECORD)
6090 share.base.records=max_records=info.state->records;
6091 else if (share.base.min_pack_length)
6092 max_records=(ha_rows) (file_length / share.base.min_pack_length);
6093 else
6094 max_records=0;
6095 share.options&= ~HA_OPTION_TEMP_COMPRESS_RECORD;
6096
6097 tmp_length= file_length+file_length/10;
6098 set_if_bigger(file_length,param->max_data_file_length);
6099 set_if_bigger(file_length,tmp_length);
6100 set_if_bigger(file_length,(ulonglong) share.base.max_data_file_length);
6101
6102 maria_close(*org_info);
6103
6104 bzero((char*) &create_info,sizeof(create_info));
6105 create_info.max_rows=MY_MAX(max_records,share.base.records);
6106 create_info.reloc_rows=share.base.reloc;
6107 create_info.old_options=(share.options |
6108 (unpack ? HA_OPTION_TEMP_COMPRESS_RECORD : 0));
6109
6110 create_info.data_file_length=file_length;
6111 create_info.auto_increment=share.state.auto_increment;
6112 create_info.language = (param->language ? param->language :
6113 share.base.language);
6114 create_info.key_file_length= status_info.key_file_length;
6115 create_info.org_data_file_type= ((enum data_file_type)
6116 share.state.header.org_data_file_type);
6117
6118 /*
6119 Allow for creating an auto_increment key. This has an effect only if
6120 an auto_increment key exists in the original table.
6121 */
6122 create_info.with_auto_increment= TRUE;
6123 create_info.null_bytes= share.base.null_bytes;
6124 create_info.transactional= share.base.born_transactional;
6125
6126 /*
6127 We don't have to handle symlinks here because we are using
6128 HA_DONT_TOUCH_DATA
6129 */
6130 if (maria_create(filename, share.data_file_type,
6131 share.base.keys - share.state.header.uniques,
6132 keyinfo, share.base.fields, columndef,
6133 share.state.header.uniques, uniquedef,
6134 &create_info,
6135 HA_DONT_TOUCH_DATA))
6136 {
6137 _ma_check_print_error(param,
6138 "Got error %d when trying to recreate indexfile",
6139 my_errno);
6140 goto end;
6141 }
6142 *org_info= maria_open(filename,O_RDWR,
6143 (HA_OPEN_FOR_REPAIR |
6144 ((param->testflag & T_WAIT_FOREVER) ?
6145 HA_OPEN_WAIT_IF_LOCKED :
6146 (param->testflag & T_DESCRIPT) ?
6147 HA_OPEN_IGNORE_IF_LOCKED :
6148 HA_OPEN_ABORT_IF_LOCKED)));
6149 if (!*org_info)
6150 {
6151 _ma_check_print_error(param,
6152 "Got error %d when trying to open re-created "
6153 "indexfile", my_errno);
6154 goto end;
6155 }
6156 /* We are modifing */
6157 (*org_info)->s->options&= ~HA_OPTION_READ_ONLY_DATA;
6158 _ma_readinfo(*org_info,F_WRLCK,0);
6159 (*org_info)->s->state.state.records= info.state->records;
6160 if (share.state.create_time)
6161 (*org_info)->s->state.create_time=share.state.create_time;
6162#ifdef MARIA_EXTERNAL_LOCKING
6163 (*org_info)->s->state.unique= (*org_info)->this_unique= share.state.unique;
6164#endif
6165 (*org_info)->s->state.state.checksum= info.state->checksum;
6166 (*org_info)->s->state.state.del= info.state->del;
6167 (*org_info)->s->state.dellink= share.state.dellink;
6168 (*org_info)->s->state.state.empty= info.state->empty;
6169 (*org_info)->s->state.state.data_file_length= info.state->data_file_length;
6170 *(*org_info)->state= (*org_info)->s->state.state;
6171 if (maria_update_state_info(param,*org_info,UPDATE_TIME | UPDATE_STAT |
6172 UPDATE_OPEN_COUNT))
6173 goto end;
6174 error=0;
6175end:
6176 my_afree(uniquedef);
6177 my_afree(keyinfo);
6178 my_afree(columndef);
6179 my_afree(keysegs);
6180 DBUG_RETURN(error);
6181}
6182
6183
6184 /* write suffix to data file if neaded */
6185
6186int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile)
6187{
6188 MARIA_HA *info=sort_info->new_info;
6189
6190 if (info->s->data_file_type == COMPRESSED_RECORD && fix_datafile)
6191 {
6192 uchar buff[MEMMAP_EXTRA_MARGIN];
6193 bzero(buff,sizeof(buff));
6194 if (my_b_write(&info->rec_cache,buff,sizeof(buff)))
6195 {
6196 _ma_check_print_error(sort_info->param,
6197 "%d when writing to datafile",my_errno);
6198 return 1;
6199 }
6200 sort_info->param->read_cache.end_of_file+=sizeof(buff);
6201 }
6202 return 0;
6203}
6204
6205
6206/* Update state and maria_chk time of indexfile */
6207
6208int maria_update_state_info(HA_CHECK *param, MARIA_HA *info,uint update)
6209{
6210 MARIA_SHARE *share= info->s;
6211 DBUG_ENTER("maria_update_state_info");
6212
6213 if (update & UPDATE_OPEN_COUNT)
6214 {
6215 share->state.open_count=0;
6216 share->global_changed=0;
6217 share->changed= 1;
6218 }
6219 if (update & UPDATE_STAT)
6220 {
6221 uint i, key_parts= mi_uint2korr(share->state.header.key_parts);
6222 share->state.records_at_analyze= share->state.state.records;
6223 share->state.changed&= ~STATE_NOT_ANALYZED;
6224 if (share->state.state.records)
6225 {
6226 for (i=0; i<key_parts; i++)
6227 {
6228 if (!(share->state.rec_per_key_part[i]=param->new_rec_per_key_part[i]))
6229 share->state.changed|= STATE_NOT_ANALYZED;
6230 }
6231 }
6232 }
6233 if (update & (UPDATE_STAT | UPDATE_SORT | UPDATE_TIME | UPDATE_AUTO_INC))
6234 {
6235 if (update & UPDATE_TIME)
6236 {
6237 share->state.check_time= time((time_t*) 0);
6238 if (!share->state.create_time)
6239 share->state.create_time= share->state.check_time;
6240 }
6241 if (_ma_state_info_write(share,
6242 MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
6243 MA_STATE_INFO_WRITE_FULL_INFO))
6244 goto err;
6245 }
6246 { /* Force update of status */
6247 int error;
6248 uint r_locks=share->r_locks,w_locks=share->w_locks;
6249 share->r_locks= share->w_locks= share->tot_locks= 0;
6250 error= _ma_writeinfo(info,WRITEINFO_NO_UNLOCK);
6251 share->r_locks=r_locks;
6252 share->w_locks=w_locks;
6253 share->tot_locks=r_locks+w_locks;
6254 if (!error)
6255 DBUG_RETURN(0);
6256 }
6257err:
6258 _ma_check_print_error(param,"%d when updating keyfile",my_errno);
6259 DBUG_RETURN(1);
6260}
6261
6262/*
6263 Update auto increment value for a table
6264 When setting the 'repair_only' flag we only want to change the
6265 old auto_increment value if its wrong (smaller than some given key).
6266 The reason is that we shouldn't change the auto_increment value
6267 for a table without good reason when only doing a repair; If the
6268 user have inserted and deleted rows, the auto_increment value
6269 may be bigger than the biggest current row and this is ok.
6270
6271 If repair_only is not set, we will update the flag to the value in
6272 param->auto_increment is bigger than the biggest key.
6273*/
6274
6275void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info,
6276 my_bool repair_only)
6277{
6278 MARIA_SHARE *share= info->s;
6279 uchar *record;
6280 DBUG_ENTER("update_auto_increment_key");
6281
6282 if (!share->base.auto_key ||
6283 ! maria_is_key_active(share->state.key_map, share->base.auto_key - 1))
6284 {
6285 if (!(param->testflag & T_VERY_SILENT))
6286 _ma_check_print_info(param,
6287 "Table: %s doesn't have an auto increment key\n",
6288 param->isam_file_name);
6289 DBUG_VOID_RETURN;
6290 }
6291 if (!(param->testflag & T_SILENT) &&
6292 !(param->testflag & T_REP))
6293 printf("Updating Aria file: %s\n", param->isam_file_name);
6294 /*
6295 We have to use an allocated buffer instead of info->rec_buff as
6296 _ma_put_key_in_record() may use info->rec_buff
6297 */
6298 if (!(record= (uchar*) my_malloc((size_t) share->base.default_rec_buff_size,
6299 MYF(0))))
6300 {
6301 _ma_check_print_error(param,"Not enough memory for extra record");
6302 DBUG_VOID_RETURN;
6303 }
6304
6305 maria_extra(info,HA_EXTRA_KEYREAD,0);
6306 if (maria_rlast(info, record, share->base.auto_key-1))
6307 {
6308 if (my_errno != HA_ERR_END_OF_FILE)
6309 {
6310 maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
6311 my_free(record);
6312 _ma_check_print_error(param,"%d when reading last record",my_errno);
6313 DBUG_VOID_RETURN;
6314 }
6315 if (!repair_only)
6316 share->state.auto_increment=param->auto_increment_value;
6317 }
6318 else
6319 {
6320 const HA_KEYSEG *keyseg= share->keyinfo[share->base.auto_key-1].seg;
6321 ulonglong auto_increment=
6322 ma_retrieve_auto_increment(record + keyseg->start, keyseg->type);
6323 set_if_bigger(share->state.auto_increment,auto_increment);
6324 if (!repair_only)
6325 set_if_bigger(share->state.auto_increment, param->auto_increment_value);
6326 }
6327 maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
6328 my_free(record);
6329 maria_update_state_info(param, info, UPDATE_AUTO_INC);
6330 DBUG_VOID_RETURN;
6331}
6332
6333
6334/*
6335 Update statistics for each part of an index
6336
6337 SYNOPSIS
6338 maria_update_key_parts()
6339 keyinfo IN Index information (only key->keysegs used)
6340 rec_per_key_part OUT Store statistics here
6341 unique IN Array of (#distinct tuples)
6342 notnull_tuples IN Array of (#tuples), or NULL
6343 records Number of records in the table
6344
6345 DESCRIPTION
6346 This function is called produce index statistics values from unique and
6347 notnull_tuples arrays after these arrays were produced with sequential
6348 index scan (the scan is done in two places: chk_index() and
6349 sort_key_write()).
6350
6351 This function handles all 3 index statistics collection methods.
6352
6353 Unique is an array:
6354 unique[0]= (#different values of {keypart1}) - 1
6355 unique[1]= (#different values of {keypart1,keypart2} tuple)-unique[0]-1
6356 ...
6357
6358 For MI_STATS_METHOD_IGNORE_NULLS method, notnull_tuples is an array too:
6359 notnull_tuples[0]= (#of {keypart1} tuples such that keypart1 is not NULL)
6360 notnull_tuples[1]= (#of {keypart1,keypart2} tuples such that all
6361 keypart{i} are not NULL)
6362 ...
6363 For all other statistics collection methods notnull_tuples==NULL.
6364
6365 Output is an array:
6366 rec_per_key_part[k] =
6367 = E(#records in the table such that keypart_1=c_1 AND ... AND
6368 keypart_k=c_k for arbitrary constants c_1 ... c_k)
6369
6370 = {assuming that values have uniform distribution and index contains all
6371 tuples from the domain (or that {c_1, ..., c_k} tuple is choosen from
6372 index tuples}
6373
6374 = #tuples-in-the-index / #distinct-tuples-in-the-index.
6375
6376 The #tuples-in-the-index and #distinct-tuples-in-the-index have different
6377 meaning depending on which statistics collection method is used:
6378
6379 MI_STATS_METHOD_* how are nulls compared? which tuples are counted?
6380 NULLS_EQUAL NULL == NULL all tuples in table
6381 NULLS_NOT_EQUAL NULL != NULL all tuples in table
6382 IGNORE_NULLS n/a tuples that don't have NULLs
6383*/
6384
6385void maria_update_key_parts(MARIA_KEYDEF *keyinfo, double *rec_per_key_part,
6386 ulonglong *unique, ulonglong *notnull,
6387 ulonglong records)
6388{
6389 ulonglong count=0, unique_tuples;
6390 ulonglong tuples= records;
6391 uint parts;
6392 double tmp;
6393 for (parts=0 ; parts < keyinfo->keysegs ; parts++)
6394 {
6395 count+=unique[parts];
6396 unique_tuples= count + 1;
6397 if (notnull)
6398 {
6399 tuples= notnull[parts];
6400 /*
6401 #(unique_tuples not counting tuples with NULLs) =
6402 #(unique_tuples counting tuples with NULLs as different) -
6403 #(tuples with NULLs)
6404 */
6405 unique_tuples -= (records - notnull[parts]);
6406 }
6407
6408 if (unique_tuples == 0)
6409 tmp= 1;
6410 else if (count == 0)
6411 tmp= ulonglong2double(tuples); /* 1 unique tuple */
6412 else
6413 tmp= ulonglong2double(tuples) / ulonglong2double(unique_tuples);
6414
6415 /*
6416 for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
6417 let's ensure it is not
6418 */
6419 set_if_bigger(tmp,1);
6420
6421 *rec_per_key_part++= tmp;
6422 }
6423}
6424
6425
6426static ha_checksum maria_byte_checksum(const uchar *buf, uint length)
6427{
6428 ha_checksum crc;
6429 const uchar *end=buf+length;
6430 for (crc=0; buf != end; buf++)
6431 crc=((crc << 1) + *buf) +
6432 MY_TEST(crc & (((ha_checksum) 1) << (8 * sizeof(ha_checksum) - 1)));
6433 return crc;
6434}
6435
6436static my_bool maria_too_big_key_for_sort(MARIA_KEYDEF *key, ha_rows rows)
6437{
6438 uint key_maxlength=key->maxlength;
6439 if (key->flag & HA_FULLTEXT)
6440 {
6441 uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
6442 key->seg->charset->mbmaxlen;
6443 key_maxlength+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
6444 }
6445 return (key->flag & HA_SPATIAL) ||
6446 (key->flag & (HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY | HA_FULLTEXT) &&
6447 ((ulonglong) rows * key_maxlength >
6448 (ulonglong) maria_max_temp_length));
6449}
6450
6451/*
6452 Deactivate all indexes that can be recreated fast.
6453 These include packed keys on which sorting will use more temporary
6454 space than the max allowed file length or for which the unpacked keys
6455 will take much more space than packed keys.
6456 Note that 'rows' may be zero for the case when we don't know how many
6457 rows we will put into the file.
6458 */
6459
6460void maria_disable_indexes_for_rebuild(MARIA_HA *info, ha_rows rows,
6461 my_bool all_keys)
6462{
6463 MARIA_SHARE *share= info->s;
6464 MARIA_KEYDEF *key=share->keyinfo;
6465 uint i;
6466
6467 DBUG_ASSERT(share->state.state.records == 0 &&
6468 (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES));
6469 for (i=0 ; i < share->base.keys ; i++,key++)
6470 {
6471 if (!(key->flag & (HA_SPATIAL | HA_AUTO_KEY | HA_RTREE_INDEX)) &&
6472 ! maria_too_big_key_for_sort(key,rows) && share->base.auto_key != i+1 &&
6473 (all_keys || !(key->flag & HA_NOSAME)))
6474 {
6475 maria_clear_key_active(share->state.key_map, i);
6476 info->update|= HA_STATE_CHANGED;
6477 info->create_unique_index_by_sort= all_keys;
6478 }
6479 }
6480}
6481
6482
6483/*
6484 Return TRUE if we can use repair by sorting
6485 One can set the force argument to force to use sorting
6486 even if the temporary file would be quite big!
6487*/
6488
6489my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows,
6490 ulonglong key_map, my_bool force)
6491{
6492 MARIA_SHARE *share= info->s;
6493 MARIA_KEYDEF *key=share->keyinfo;
6494 uint i;
6495
6496 /*
6497 maria_repair_by_sort only works if we have at least one key. If we don't
6498 have any keys, we should use the normal repair.
6499 */
6500 if (! maria_is_any_key_active(key_map))
6501 return FALSE; /* Can't use sort */
6502 for (i=0 ; i < share->base.keys ; i++,key++)
6503 {
6504 if (!force && maria_too_big_key_for_sort(key,rows))
6505 return FALSE;
6506 }
6507 return TRUE;
6508}
6509
6510
6511/**
6512 @brief Create a new handle for manipulation the new record file
6513
6514 @note
6515 It's ok for Recovery to have two MARIA_SHARE on the same index file
6516 because the one we create here is not transactional
6517*/
6518
6519static my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file)
6520{
6521
6522 MARIA_SORT_INFO *sort_info= param->sort_info;
6523 MARIA_HA *info= sort_info->info;
6524 MARIA_HA *new_info;
6525 DBUG_ENTER("create_new_data_handle");
6526
6527 if (!(sort_info->new_info= maria_open(info->s->open_file_name.str, O_RDWR,
6528 HA_OPEN_COPY | HA_OPEN_FOR_REPAIR |
6529 HA_OPEN_INTERNAL_TABLE)))
6530 DBUG_RETURN(1);
6531
6532 new_info= sort_info->new_info;
6533 _ma_bitmap_set_pagecache_callbacks(&new_info->s->bitmap.file,
6534 new_info->s);
6535 _ma_set_data_pagecache_callbacks(&new_info->dfile, new_info->s);
6536 change_data_file_descriptor(new_info, new_file);
6537 maria_lock_database(new_info, F_EXTRA_LCK);
6538 if ((sort_info->param->testflag & T_UNPACK) &&
6539 info->s->data_file_type == COMPRESSED_RECORD)
6540 {
6541 (*new_info->s->once_end)(new_info->s);
6542 (*new_info->s->end)(new_info);
6543 restore_data_file_type(new_info->s);
6544 _ma_setup_functions(new_info->s);
6545 if ((*new_info->s->once_init)(new_info->s, new_file) ||
6546 (*new_info->s->init)(new_info))
6547 DBUG_RETURN(1);
6548 }
6549 _ma_reset_status(new_info);
6550 if (_ma_initialize_data_file(new_info->s, new_file))
6551 DBUG_RETURN(1);
6552
6553 /* Take into account any bitmap page created above: */
6554 param->filepos= new_info->s->state.state.data_file_length;
6555
6556 /* Use new virtual functions for key generation */
6557 info->s->keypos_to_recpos= new_info->s->keypos_to_recpos;
6558 info->s->recpos_to_keypos= new_info->s->recpos_to_keypos;
6559 DBUG_RETURN(0);
6560}
6561
6562
6563static void
6564set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share)
6565{
6566 if ((sort_info->new_data_file_type=share->data_file_type) ==
6567 COMPRESSED_RECORD && sort_info->param->testflag & T_UNPACK)
6568 {
6569 MARIA_SHARE tmp;
6570 sort_info->new_data_file_type= share->state.header.org_data_file_type;
6571 /* Set delete_function for sort_delete_record() */
6572 tmp= *share;
6573 tmp.state.header.data_file_type= tmp.state.header.org_data_file_type;
6574 tmp.options= ~HA_OPTION_COMPRESS_RECORD;
6575 _ma_setup_functions(&tmp);
6576 share->delete_record=tmp.delete_record;
6577 }
6578}
6579
6580static void restore_data_file_type(MARIA_SHARE *share)
6581{
6582 MARIA_SHARE tmp_share;
6583 share->options&= ~HA_OPTION_COMPRESS_RECORD;
6584 mi_int2store(share->state.header.options,share->options);
6585 share->state.header.data_file_type=
6586 share->state.header.org_data_file_type;
6587 share->data_file_type= share->state.header.data_file_type;
6588 share->pack.header_length= 0;
6589
6590 /* Use new virtual functions for key generation */
6591 tmp_share= *share;
6592 _ma_setup_functions(&tmp_share);
6593 share->keypos_to_recpos= tmp_share.keypos_to_recpos;
6594 share->recpos_to_keypos= tmp_share.recpos_to_keypos;
6595}
6596
6597
6598static void change_data_file_descriptor(MARIA_HA *info, File new_file)
6599{
6600 mysql_file_close(info->dfile.file, MYF(MY_WME));
6601 info->dfile.file= info->s->bitmap.file.file= new_file;
6602 _ma_bitmap_reset_cache(info->s);
6603}
6604
6605
6606/**
6607 @brief Mark the data file to not be used
6608
6609 @note
6610 This is used in repair when we want to ensure the handler will not
6611 write anything to the data file anymore
6612*/
6613
6614static void unuse_data_file_descriptor(MARIA_HA *info)
6615{
6616 (void) flush_pagecache_blocks(info->s->pagecache,
6617 &info->s->bitmap.file,
6618 FLUSH_IGNORE_CHANGED);
6619 info->dfile.file= info->s->bitmap.file.file= -1;
6620 _ma_bitmap_reset_cache(info->s);
6621}
6622
6623
6624/*
6625 Copy all states that has to do with the data file
6626
6627 NOTES
6628 This is done to copy the state from the data file generated from
6629 repair to the original handler
6630*/
6631
6632static void copy_data_file_state(MARIA_STATE_INFO *to,
6633 MARIA_STATE_INFO *from)
6634{
6635 to->state.records= from->state.records;
6636 to->state.del= from->state.del;
6637 to->state.empty= from->state.empty;
6638 to->state.data_file_length= from->state.data_file_length;
6639 to->split= from->split;
6640 to->dellink= from->dellink;
6641 to->first_bitmap_with_space= from->first_bitmap_with_space;
6642}
6643
6644
6645/*
6646 Read 'safely' next record while scanning table.
6647
6648 SYNOPSIS
6649 _ma_safe_scan_block_record()
6650 info Maria handler
6651 record Store found here
6652
6653 NOTES
6654 - One must have called mi_scan() before this
6655
6656 Differences compared to _ma_scan_block_records() are:
6657 - We read all blocks, not only blocks marked by the bitmap to be safe
6658 - In case of errors, next read will read next record.
6659 - More sanity checks
6660
6661 RETURN
6662 0 ok
6663 HA_ERR_END_OF_FILE End of file
6664 # error number
6665*/
6666
6667
6668static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info,
6669 MARIA_HA *info, uchar *record)
6670{
6671 MARIA_SHARE *share= info->s;
6672 MARIA_RECORD_POS record_pos= info->cur_row.nextpos;
6673 pgcache_page_no_t page= sort_info->page;
6674 DBUG_ENTER("_ma_safe_scan_block_record");
6675
6676 for (;;)
6677 {
6678 /* Find next row in current page */
6679 if (likely(record_pos < info->scan.number_of_rows))
6680 {
6681 uint length, offset;
6682 uchar *data, *end_of_data;
6683 char llbuff[22];
6684
6685 while (!(offset= uint2korr(info->scan.dir)))
6686 {
6687 info->scan.dir-= DIR_ENTRY_SIZE;
6688 record_pos++;
6689 if (info->scan.dir < info->scan.dir_end)
6690 {
6691 _ma_check_print_info(sort_info->param,
6692 "Wrong directory on page %s",
6693 llstr(page, llbuff));
6694 goto read_next_page;
6695 }
6696 }
6697 /* found row */
6698 info->cur_row.lastpos= info->scan.row_base_page + record_pos;
6699 info->cur_row.nextpos= record_pos + 1;
6700 data= info->scan.page_buff + offset;
6701 length= uint2korr(info->scan.dir + 2);
6702 end_of_data= data + length;
6703 info->scan.dir-= DIR_ENTRY_SIZE; /* Point to previous row */
6704
6705 if (end_of_data > info->scan.dir_end ||
6706 offset < PAGE_HEADER_SIZE(info->s) ||
6707 length < share->base.min_block_length)
6708 {
6709 _ma_check_print_info(sort_info->param,
6710 "Wrong directory entry %3u at page %s",
6711 (uint) record_pos, llstr(page, llbuff));
6712 record_pos++;
6713 continue;
6714 }
6715 else
6716 {
6717 DBUG_PRINT("info", ("rowid: %lu", (ulong) info->cur_row.lastpos));
6718 DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data));
6719 }
6720 }
6721
6722read_next_page:
6723 /* Read until we find next head page */
6724 for (;;)
6725 {
6726 uint page_type;
6727 char llbuff[22];
6728
6729 sort_info->page++; /* In case of errors */
6730 page++;
6731 if (!(page % share->bitmap.pages_covered))
6732 {
6733 /* Skip bitmap */
6734 page++;
6735 sort_info->page++;
6736 }
6737 if ((my_off_t) (page + 1) * share->block_size > sort_info->filelength)
6738 DBUG_RETURN(HA_ERR_END_OF_FILE);
6739 if (!(pagecache_read(share->pagecache,
6740 &info->dfile,
6741 page, 0, info->scan.page_buff,
6742 PAGECACHE_READ_UNKNOWN_PAGE,
6743 PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
6744 {
6745 if (my_errno == HA_ERR_WRONG_CRC)
6746 {
6747 /*
6748 Don't give errors for zero filled blocks. These can
6749 sometimes be found at end of a bitmap when we wrote a big
6750 record last that was moved to the next bitmap.
6751 */
6752 if (_ma_check_bitmap_data(info, UNALLOCATED_PAGE, 0,
6753 _ma_bitmap_get_page_bits(info,
6754 &share->bitmap,
6755 page)))
6756 {
6757 _ma_check_print_info(sort_info->param,
6758 "Wrong CRC on datapage at %s",
6759 llstr(page, llbuff));
6760 }
6761 continue;
6762 }
6763 DBUG_RETURN(my_errno);
6764 }
6765 page_type= (info->scan.page_buff[PAGE_TYPE_OFFSET] &
6766 PAGE_TYPE_MASK);
6767 if (page_type == HEAD_PAGE)
6768 {
6769 if ((info->scan.number_of_rows=
6770 (uint) (uchar) info->scan.page_buff[DIR_COUNT_OFFSET]) != 0)
6771 break;
6772 _ma_check_print_info(sort_info->param,
6773 "Wrong head page at page %s",
6774 llstr(page, llbuff));
6775 }
6776 else if (page_type >= MAX_PAGE_TYPE)
6777 {
6778 _ma_check_print_info(sort_info->param,
6779 "Found wrong page type: %d at page %s",
6780 page_type, llstr(page, llbuff));
6781 }
6782 }
6783
6784 /* New head page */
6785 info->scan.dir= (info->scan.page_buff + share->block_size -
6786 PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE);
6787 info->scan.dir_end= (info->scan.dir -
6788 (info->scan.number_of_rows - 1) *
6789 DIR_ENTRY_SIZE);
6790 info->scan.row_base_page= ma_recordpos(page, 0);
6791 record_pos= 0;
6792 }
6793}
6794
6795
6796/**
6797 @brief Writes a LOGREC_REPAIR_TABLE record and updates create_rename_lsn
6798 if needed (so that maria_read_log does not redo the repair).
6799
6800 @param param description of the REPAIR operation
6801 @param info table
6802
6803 @return Operation status
6804 @retval 0 ok
6805 @retval 1 error (disk problem)
6806*/
6807
6808my_bool write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info)
6809{
6810 MARIA_SHARE *share= info->s;
6811 /* in case this is maria_chk or recovery... */
6812 if (translog_status == TRANSLOG_OK && !maria_in_recovery &&
6813 share->base.born_transactional)
6814 {
6815 my_bool save_now_transactional= share->now_transactional;
6816
6817 /*
6818 For now this record is only informative. It could serve when applying
6819 logs to a backup, but that needs more thought. Assume table became
6820 corrupted. It is repaired, then some writes happen to it.
6821 Later we restore an old backup, and want to apply this REDO_REPAIR_TABLE
6822 record. For it to give the same result as originally, the table should
6823 be corrupted the same way, so applying previous REDOs should produce the
6824 same corruption; that's really not guaranteed (different execution paths
6825 in execution of REDOs vs runtime code so not same bugs hit, temporary
6826 hardware issues not repeatable etc). Corruption may not be repeatable.
6827 A reasonable solution is to execute the REDO_REPAIR_TABLE record and
6828 check if the checksum of the resulting table matches what it was at the
6829 end of the original repair (should be stored in log record); or execute
6830 the REDO_REPAIR_TABLE if the checksum of the table-before-repair matches
6831 was it was at the start of the original repair (should be stored in log
6832 record).
6833 */
6834 LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
6835 uchar log_data[FILEID_STORE_SIZE + 8 + 8];
6836 LSN lsn;
6837
6838 /*
6839 testflag gives an idea of what REPAIR did (in particular T_QUICK
6840 or not: did it touch the data file or not?).
6841 */
6842 int8store(log_data + FILEID_STORE_SIZE, param->testflag);
6843 /* org_key_map is used when recreating index after a load data infile */
6844 int8store(log_data + FILEID_STORE_SIZE + 8, param->org_key_map);
6845
6846 log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
6847 log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
6848
6849 share->now_transactional= 1;
6850 if (unlikely(translog_write_record(&lsn, LOGREC_REDO_REPAIR_TABLE,
6851 &dummy_transaction_object, info,
6852 (translog_size_t) sizeof(log_data),
6853 sizeof(log_array)/sizeof(log_array[0]),
6854 log_array, log_data, NULL) ||
6855 translog_flush(lsn)))
6856 return TRUE;
6857 /*
6858 The table's existence was made durable earlier (MY_SYNC_DIR passed to
6859 maria_change_to_newfile()). All pages have been flushed, state too, we
6860 need to force it to disk. Old REDOs should not be applied to the table,
6861 which is already enforced as skip_redos_lsn was increased in
6862 protect_against_repair_crash(). But if this is an explicit repair,
6863 even UNDO phase should ignore this table: create_rename_lsn should be
6864 increased, and this also serves for the REDO_REPAIR to be ignored by
6865 maria_read_log.
6866 The fully correct order would be: sync data and index file, remove crash
6867 mark and update LSNs then write state and sync index file. But at this
6868 point state (without crash mark) is already written.
6869 */
6870 if ((!(param->testflag & T_NO_CREATE_RENAME_LSN) &&
6871 _ma_update_state_lsns(share, lsn, share->state.create_trid, FALSE,
6872 FALSE)) ||
6873 _ma_sync_table_files(info))
6874 return TRUE;
6875 share->now_transactional= save_now_transactional;
6876 }
6877 return FALSE;
6878}
6879
6880
6881/**
6882 Writes an UNDO record which if executed in UNDO phase, will empty the
6883 table. Such record is thus logged only in certain cases of bulk insert
6884 (table needs to be empty etc).
6885*/
6886my_bool write_log_record_for_bulk_insert(MARIA_HA *info)
6887{
6888 LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
6889 uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE];
6890 LSN lsn;
6891 lsn_store(log_data, info->trn->undo_lsn);
6892 log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
6893 log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
6894 return translog_write_record(&lsn, LOGREC_UNDO_BULK_INSERT,
6895 info->trn, info,
6896 (translog_size_t)
6897 log_array[TRANSLOG_INTERNAL_PARTS +
6898 0].length,
6899 TRANSLOG_INTERNAL_PARTS + 1, log_array,
6900 log_data + LSN_STORE_SIZE, NULL) ||
6901 translog_flush(lsn); /* WAL */
6902}
6903
6904
6905/* Give error message why reading of key page failed */
6906
6907static void report_keypage_fault(HA_CHECK *param, MARIA_HA *info,
6908 my_off_t position)
6909{
6910 char buff[11];
6911 uint32 block_size= info->s->block_size;
6912
6913 if (my_errno == HA_ERR_CRASHED)
6914 _ma_check_print_error(param,
6915 "Wrong base information on indexpage at page: %s",
6916 llstr(position / block_size, buff));
6917 else
6918 _ma_check_print_error(param,
6919 "Can't read indexpage from page: %s, "
6920 "error: %d",
6921 llstr(position / block_size, buff), my_errno);
6922}
6923
6924
6925/**
6926 When we want to check a table, we verify that the transaction ids of rows
6927 and keys are not bigger than the biggest id generated by Maria so far, which
6928 is returned by the function below.
6929
6930 @note If control file is not open, 0 may be returned; to not confuse
6931 this with a valid max trid of 0, the caller should notice that it failed to
6932 open the control file (ma_control_file_inited() can serve for that).
6933*/
6934
6935static TrID max_trid_in_system(void)
6936{
6937 TrID id= trnman_get_max_trid(); /* 0 if transac manager not initialized */
6938 /* 'id' may be far bigger, if last shutdown is old */
6939 return MY_MAX(id, max_trid_in_control_file);
6940}
6941
6942
6943static void _ma_check_print_not_visible_error(HA_CHECK *param, TrID used_trid)
6944{
6945 char buff[22], buff2[22];
6946 if (!param->not_visible_rows_found++)
6947 {
6948 if (!ma_control_file_inited())
6949 {
6950 _ma_check_print_warning(param,
6951 "Found row with transaction id %s but no "
6952 "aria_control_file was used or specified. "
6953 "The table may be corrupted",
6954 llstr(used_trid, buff));
6955 }
6956 else
6957 {
6958 _ma_check_print_error(param,
6959 "Found row with transaction id %s when max "
6960 "transaction id according to aria_control_file "
6961 "is %s",
6962 llstr(used_trid, buff),
6963 llstr(param->max_trid, buff2));
6964 }
6965 }
6966}
6967
6968
6969/**
6970 Mark that we can retry normal repair if we used quick repair
6971
6972 We shouldn't do this in case of disk error as in this case we are likely
6973 to loose much more than expected.
6974*/
6975
6976void retry_if_quick(MARIA_SORT_PARAM *sort_param, int error)
6977{
6978 HA_CHECK *param=sort_param->sort_info->param;
6979
6980 if (!sort_param->fix_datafile && error >= HA_ERR_FIRST)
6981 {
6982 param->retry_repair=1;
6983 param->testflag|=T_RETRY_WITHOUT_QUICK;
6984 }
6985}
6986
6987/* Print information about bitmap page */
6988
6989static void print_bitmap_description(MARIA_SHARE *share,
6990 pgcache_page_no_t page,
6991 uchar *bitmap_data)
6992{
6993 char *tmp= my_malloc(MAX_BITMAP_INFO_LENGTH, MYF(MY_WME));
6994 if (!tmp)
6995 return;
6996 _ma_get_bitmap_description(&share->bitmap, bitmap_data, page, tmp);
6997 printf("Bitmap page %lu\n%s", (ulong) page, tmp);
6998 my_free(tmp);
6999}
7000